HLSL: add geometry stage support for clip/cull distance

Changes: (1) Allow clip/cull builtins as both input and output in the same shader stage. Previously, not enough data was tracked to handle this. (2) Handle the extra array dimension in GS inputs. The synthesized external variable can now be created with the extra array dimension if needed, and the form conversion code is able to handle it as well. For example, both of these GS inputs would result in the same synthesized external type: triangle in float4 clip[3] : SV_ClipDistance triangle in float2 clip[3][2] : SV_ClipDistance In the second case, the inner array dimension packs with the 2-vector of floats into an array[4], which there is an array[3] of due to the triangle geometry.
2017-08-28 14:02:19 -06:00
parent ea0c1643ab
commit 5e5b12e931
13 changed files with 3448 additions and 204 deletions
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -66,8 +66,10 @@ HlslParseContext::HlslParseContext(TSymbolTable& symbolTable, TIntermediate& int
    entryPointFunction(nullptr),
    entryPointFunctionBody(nullptr),
    gsStreamOutput(nullptr),
-    clipDistanceVariable(nullptr),
-    cullDistanceVariable(nullptr)
+    clipDistanceInput(nullptr),
+    cullDistanceInput(nullptr),
+    clipDistanceOutput(nullptr),
+    cullDistanceOutput(nullptr)
 {
    globalUniformDefaults.clear();
    globalUniformDefaults.layoutMatrix = ElmRowMajor;
@@ -80,8 +82,10 @@ HlslParseContext::HlslParseContext(TSymbolTable& symbolTable, TIntermediate& int
    globalInputDefaults.clear();
    globalOutputDefaults.clear();

-    clipSemanticNSize.fill(0);
-    cullSemanticNSize.fill(0);
+    clipSemanticNSizeIn.fill(0);
+    cullSemanticNSizeIn.fill(0);
+    clipSemanticNSizeOut.fill(0);
+    cullSemanticNSizeOut.fill(0);

    // "Shaders in the transform
    // feedback capturing mode have an initial global default of
@@ -1152,14 +1156,18 @@ void HlslParseContext::splitBuiltIn(const TString& baseName, const TType& member
    if (arraySizes != nullptr && !memberType.isArray())
        ioVar->getWritableType().newArraySizes(*arraySizes);

-    fixBuiltInIoType(ioVar->getWritableType());
-
    splitBuiltIns[tInterstageIoData(memberType.getQualifier().builtIn, outerQualifier.storage)] = ioVar;
    if (!isClipOrCullDistance(ioVar->getType()))
        trackLinkage(*ioVar);

    // Merge qualifier from the user structure
    mergeQualifiers(ioVar->getWritableType().getQualifier(), outerQualifier);
+
+    // Fix the builtin type if needed (e.g, some types require fixed array sizes, no matter how the
+    // shader declared them).  This is done after mergeQualifiers(), in case fixBuiltInIoType looks
+    // at the qualifier to determine e.g, in or out qualifications.
+    fixBuiltInIoType(ioVar->getWritableType());
+
    // But, not location, we're losing that
    ioVar->getWritableType().getQualifier().layoutLocation = TQualifier::layoutLocationEnd;
 }
@@ -1483,10 +1491,18 @@ void HlslParseContext::fixBuiltInIoType(TType& type)
        }
    default:
        if (isClipOrCullDistance(type)) {
+            const int loc = type.getQualifier().layoutLocation;
+
            if (type.getQualifier().builtIn == EbvClipDistance) {
-                clipSemanticNSize[type.getQualifier().layoutLocation] = type.getVectorSize();
+                if (type.getQualifier().storage == EvqVaryingIn)
+                    clipSemanticNSizeIn[loc] = type.getVectorSize();
+                else
+                    clipSemanticNSizeOut[loc] = type.getVectorSize();
            } else {
-                cullSemanticNSize[type.getQualifier().layoutLocation] = type.getVectorSize();
+                if (type.getQualifier().storage == EvqVaryingIn)
+                    cullSemanticNSizeIn[loc] = type.getVectorSize();
+                else
+                    cullSemanticNSizeOut[loc] = type.getVectorSize();
            }
        }

@@ -2274,6 +2290,7 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
    switch (language) {
    case EShLangFragment:
    case EShLangVertex:
+    case EShLangGeometry:
        break;
    default:
        error(loc, "unimplemented: clip/cull not currently implemented for this stage", "", "");
@@ -2292,17 +2309,17 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc

    const TBuiltInVariable builtInType = clipCullNode->getQualifier().builtIn;

-    decltype(clipSemanticNSize)* semanticNSize = nullptr;
+    decltype(clipSemanticNSizeIn)* semanticNSize = nullptr;

    // Refer to either the clip or the cull distance, depending on semantic.
    switch (builtInType) {
    case EbvClipDistance:
-        clipCullVar = &clipDistanceVariable;
-        semanticNSize = &clipSemanticNSize;
+        clipCullVar = isOutput ? &clipDistanceOutput : &clipDistanceInput;
+        semanticNSize = isOutput ? &clipSemanticNSizeOut : &clipSemanticNSizeIn;
        break;
    case EbvCullDistance:
-        clipCullVar = &cullDistanceVariable;
-        semanticNSize = &cullSemanticNSize;
+        clipCullVar = isOutput ? &cullDistanceOutput : &cullDistanceInput;
+        semanticNSize = isOutput ? &cullSemanticNSizeOut : &cullSemanticNSizeIn;
        break;

    // called invalidly: we expected a clip or a cull distance.
@@ -2328,28 +2345,48 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
        vecItems += (*semanticNSize)[x];
        arrayLoc += (*semanticNSize)[x];
    }
-        
-    // array sizes, or 1 if it's not an array:
-    const int internalNodeArraySize = (internalNode->getType().isArray() ? internalNode->getType().getOuterArraySize() : 1);
+ 
+
+    // It can have up to 2 array dimensions (in the case of geometry shader inputs)
+    const TArraySizes* const internalArraySizes = internalNode->getType().getArraySizes();
+    const int internalArrayDims = internalNode->getType().isArray() ? internalArraySizes->getNumDims() : 0;
    // vector sizes:
-    const int internalNodeVectorSize = internalNode->getType().getVectorSize();
+    const int internalVectorSize = internalNode->getType().getVectorSize();
+    // array sizes, or 1 if it's not an array:
+    const int internalInnerArraySize = (internalArrayDims > 0 ? internalArraySizes->getDimSize(internalArrayDims-1) : 1);
+    const int internalOuterArraySize = (internalArrayDims > 1 ? internalArraySizes->getDimSize(0) : 1);
+
+    // The created type may be an array of arrays, e.g, for geometry shader inputs.
+    const bool isImplicitlyArrayed = (language == EShLangGeometry && !isOutput);

    // If we haven't created the output already, create it now.
    if (*clipCullVar == nullptr) {
        // ClipDistance and CullDistance are handled specially in the entry point input/output copy
        // algorithm, because they may need to be unpacked from components of vectors (or a scalar)
        // into a float array, or vice versa.  Here, we make the array the right size and type,
-        // which depends on the incoming data, which has several potential dimensions: Semantic ID
-        // vector size array size Of those, semantic ID and array size cannot appear
-        // simultaneously.
-        const int requiredArraySize = arrayLoc * internalNodeArraySize;
+        // which depends on the incoming data, which has several potential dimensions:
+        //    * Semantic ID
+        //    * vector size 
+        //    * array size
+        // Of those, semantic ID and array size cannot appear simultaneously.
+        //
+        // Also to note: for implicitly arrayed forms (e.g, geometry shader inputs), we need to create two
+        // array dimensions.  The shader's declaration may have one or two array dimensions.  One is always
+        // the geometry's dimension.
+
+        const bool useInnerSize = internalArrayDims > 1 || !isImplicitlyArrayed;
+
+        const int requiredInnerArraySize = arrayLoc * (useInnerSize ? internalInnerArraySize : 1);
+        const int requiredOuterArraySize = (internalArrayDims > 0) ? internalArraySizes->getDimSize(0) : 1;

        TType clipCullType(EbtFloat, clipCullNode->getType().getQualifier().storage, 1);
        clipCullType.getQualifier() = clipCullNode->getType().getQualifier();

        // Create required array dimension
        TArraySizes arraySizes;
-        arraySizes.addInnerSize(requiredArraySize);
+        if (isImplicitlyArrayed)
+            arraySizes.addInnerSize(requiredOuterArraySize);
+        arraySizes.addInnerSize(requiredInnerArraySize);
        clipCullType.newArraySizes(arraySizes);

        // Obtain symbol name: we'll use that for the symbol we introduce.
@@ -2369,10 +2406,13 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
    // Create symbol for the clip or cull variable.
    TIntermSymbol* clipCullSym = intermediate.addSymbol(**clipCullVar);

-    // array sizes, or 1 if it's not an array:
-    const int clipCullSymArraySize = (clipCullSym->getType().isArray() ? clipCullSym->getType().getOuterArraySize() : 1);
    // vector sizes:
-    const int clipCullSymVectorSize = clipCullSym->getType().getVectorSize();
+    const int clipCullVectorSize = clipCullSym->getType().getVectorSize();
+
+    // array sizes, or 1 if it's not an array:
+    const TArraySizes* const clipCullArraySizes = clipCullSym->getType().getArraySizes();
+    const int clipCullOuterArraySize = isImplicitlyArrayed ? clipCullArraySizes->getDimSize(0) : 1;
+    const int clipCullInnerArraySize = clipCullArraySizes->getDimSize(isImplicitlyArrayed ? 1 : 0);

    // clipCullSym has got to be an array of scalar floats, per SPIR-V semantics.
    // fixBuiltInIoType() should have handled that upstream.
@@ -2390,8 +2430,9 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
    // If the types are homomorphic, use a simple assign.  No need to mess about with 
    // individual components.
    if (clipCullSym->getType().isArray() == internalNode->getType().isArray() &&
-        clipCullSymArraySize == internalNodeArraySize &&
-        clipCullSymVectorSize == internalNodeVectorSize) {
+        clipCullInnerArraySize == internalInnerArraySize &&
+        clipCullOuterArraySize == internalOuterArraySize &&
+        clipCullVectorSize == internalVectorSize) {

        if (isOutput)
            clipCullAssign = intermediate.addAssign(op, clipCullSym, internalNode, loc);
@@ -2407,47 +2448,61 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
    // We are going to copy each component of the internal (per array element if indicated) to sequential
    // array elements of the clipCullSym.  This tracks the lhs element we're writing to as we go along.
    // We may be starting in the middle - e.g, for a non-zero semantic ID calculated above.
-    int clipCullArrayPos = semanticOffset[semanticId];
+    int clipCullInnerArrayPos = semanticOffset[semanticId];
+    int clipCullOuterArrayPos = 0;
+
+    // Lambda to add an index to a node, set the type of the result, and return the new node.
+    const auto addIndex = [this, &loc](TIntermTyped* node, int pos) -> TIntermTyped* {
+        const TType derefType(node->getType(), 0);
+        node = intermediate.addIndex(EOpIndexDirect, node, intermediate.addConstantUnion(pos, loc), loc);
+        node->setType(derefType);
+        return node;
+    };

    // Loop through every component of every element of the internal, and copy to or from the matching external.
-    for (int internalArrayPos = 0; internalArrayPos < internalNodeArraySize; ++internalArrayPos) {
-        for (int internalComponent = 0; internalComponent < internalNodeVectorSize; ++internalComponent) {
-            TIntermTyped* clipCullMember = clipCullSym;
+    for (int internalOuterArrayPos = 0; internalOuterArrayPos < internalOuterArraySize; ++internalOuterArrayPos) {
+        for (int internalInnerArrayPos = 0; internalInnerArrayPos < internalInnerArraySize; ++internalInnerArrayPos) {
+            for (int internalComponent = 0; internalComponent < internalVectorSize; ++internalComponent) {
+                // clip/cull array member to read from / write to:
+                TIntermTyped* clipCullMember = clipCullSym;

-            // array member to read from / write to:
-            {
-                const TType derefType(clipCullMember->getType(), 0);
-                clipCullMember = intermediate.addIndex(EOpIndexDirect, clipCullMember,
-                                                       intermediate.addConstantUnion(clipCullArrayPos++, loc), loc);
-                clipCullMember->setType(derefType);
+                // If implicitly arrayed, there is an outer array dimension involved
+                if (isImplicitlyArrayed)
+                    clipCullMember = addIndex(clipCullMember, clipCullOuterArrayPos);
+
+                // Index into proper array position for clip cull member
+                clipCullMember = addIndex(clipCullMember, clipCullInnerArrayPos++);
+
+                // if needed, start over with next outer array slice.
+                if (isImplicitlyArrayed && clipCullInnerArrayPos >= clipCullInnerArraySize) {
+                    clipCullInnerArrayPos = semanticOffset[semanticId];
+                    ++clipCullOuterArrayPos;
+                }
+
+                // internal member to read from / write to:
+                TIntermTyped* internalMember = internalNode;
+
+                // If internal node has outer array dimension, index appropriately.
+                if (internalArrayDims > 1)
+                    internalMember = addIndex(internalMember, internalOuterArrayPos);
+
+                // If internal node has inner array dimension, index appropriately.
+                if (internalArrayDims > 0)
+                    internalMember = addIndex(internalMember, internalInnerArrayPos);
+
+                // If internal node is a vector, extract the component of interest.
+                if (internalNode->getType().isVector())
+                    internalMember = addIndex(internalMember, internalComponent);
+
+                // Create an assignment: output from internal to clip cull, or input from clip cull to internal.
+                if (isOutput)
+                    clipCullAssign = intermediate.addAssign(op, clipCullMember, internalMember, loc);
+                else
+                    clipCullAssign = intermediate.addAssign(op, internalMember, clipCullMember, loc);
+
+                // Track assignment in the sequence.
+                assignList = intermediate.growAggregate(assignList, clipCullAssign);
            }
-
-            TIntermTyped* internalMember = internalNode;
-
-            // If internal node is an array, extract the element of interest
-            if (internalNode->getType().isArray()) {
-                const TType derefType(internalMember->getType(), 0);
-                internalMember = intermediate.addIndex(EOpIndexDirect, internalMember,
-                                                  intermediate.addConstantUnion(internalArrayPos, loc), loc);
-                internalMember->setType(derefType);
-            }
-
-            // If internal node is a vector, extract the component of interest.
-            if (internalNode->getType().isVector()) {
-                const TType derefType(internalMember->getType(), 0);
-                internalMember = intermediate.addIndex(EOpIndexDirect, internalMember,
-                                                  intermediate.addConstantUnion(internalComponent, loc), loc);
-                internalMember->setType(derefType);
-            }
-
-            // Create an assignment: output from internal to clip cull, or input from clip cull to internal.
-            if (isOutput)
-                clipCullAssign = intermediate.addAssign(op, clipCullMember, internalMember, loc);
-            else
-                clipCullAssign = intermediate.addAssign(op, internalMember, clipCullMember, loc);
-
-            // Track assignment in the sequence.
-            assignList = intermediate.growAggregate(assignList, clipCullAssign);
        }
    }