Merge pull request #1037 from LoopDawg/clip-cull-geom

HLSL: add geometry stage support for clip/cull distance
2017-08-31 12:36:06 -06:00
parent a459fc8142 5e5b12e931
commit 75e057f980
13 changed files with 3448 additions and 204 deletions
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -66,8 +66,10 @@ HlslParseContext::HlslParseContext(TSymbolTable& symbolTable, TIntermediate& int
    entryPointFunction(nullptr),
    entryPointFunctionBody(nullptr),
    gsStreamOutput(nullptr),
-    clipDistanceVariable(nullptr),
-    cullDistanceVariable(nullptr)
+    clipDistanceInput(nullptr),
+    cullDistanceInput(nullptr),
+    clipDistanceOutput(nullptr),
+    cullDistanceOutput(nullptr)
 {
    globalUniformDefaults.clear();
    globalUniformDefaults.layoutMatrix = ElmRowMajor;
@@ -80,8 +82,10 @@ HlslParseContext::HlslParseContext(TSymbolTable& symbolTable, TIntermediate& int
    globalInputDefaults.clear();
    globalOutputDefaults.clear();

-    clipSemanticNSize.fill(0);
-    cullSemanticNSize.fill(0);
+    clipSemanticNSizeIn.fill(0);
+    cullSemanticNSizeIn.fill(0);
+    clipSemanticNSizeOut.fill(0);
+    cullSemanticNSizeOut.fill(0);

    // "Shaders in the transform
    // feedback capturing mode have an initial global default of
@@ -1152,14 +1156,18 @@ void HlslParseContext::splitBuiltIn(const TString& baseName, const TType& member
    if (arraySizes != nullptr && !memberType.isArray())
        ioVar->getWritableType().newArraySizes(*arraySizes);

-    fixBuiltInIoType(ioVar->getWritableType());
-
    splitBuiltIns[tInterstageIoData(memberType.getQualifier().builtIn, outerQualifier.storage)] = ioVar;
    if (!isClipOrCullDistance(ioVar->getType()))
        trackLinkage(*ioVar);

    // Merge qualifier from the user structure
    mergeQualifiers(ioVar->getWritableType().getQualifier(), outerQualifier);
+
+    // Fix the builtin type if needed (e.g, some types require fixed array sizes, no matter how the
+    // shader declared them).  This is done after mergeQualifiers(), in case fixBuiltInIoType looks
+    // at the qualifier to determine e.g, in or out qualifications.
+    fixBuiltInIoType(ioVar->getWritableType());
+
    // But, not location, we're losing that
    ioVar->getWritableType().getQualifier().layoutLocation = TQualifier::layoutLocationEnd;
 }
@@ -1483,10 +1491,18 @@ void HlslParseContext::fixBuiltInIoType(TType& type)
        }
    default:
        if (isClipOrCullDistance(type)) {
+            const int loc = type.getQualifier().layoutLocation;
+
            if (type.getQualifier().builtIn == EbvClipDistance) {
-                clipSemanticNSize[type.getQualifier().layoutLocation] = type.getVectorSize();
+                if (type.getQualifier().storage == EvqVaryingIn)
+                    clipSemanticNSizeIn[loc] = type.getVectorSize();
+                else
+                    clipSemanticNSizeOut[loc] = type.getVectorSize();
            } else {
-                cullSemanticNSize[type.getQualifier().layoutLocation] = type.getVectorSize();
+                if (type.getQualifier().storage == EvqVaryingIn)
+                    cullSemanticNSizeIn[loc] = type.getVectorSize();
+                else
+                    cullSemanticNSizeOut[loc] = type.getVectorSize();
            }
        }

@@ -2274,6 +2290,7 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
    switch (language) {
    case EShLangFragment:
    case EShLangVertex:
+    case EShLangGeometry:
        break;
    default:
        error(loc, "unimplemented: clip/cull not currently implemented for this stage", "", "");
@@ -2292,17 +2309,17 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc

    const TBuiltInVariable builtInType = clipCullNode->getQualifier().builtIn;

-    decltype(clipSemanticNSize)* semanticNSize = nullptr;
+    decltype(clipSemanticNSizeIn)* semanticNSize = nullptr;

    // Refer to either the clip or the cull distance, depending on semantic.
    switch (builtInType) {
    case EbvClipDistance:
-        clipCullVar = &clipDistanceVariable;
-        semanticNSize = &clipSemanticNSize;
+        clipCullVar = isOutput ? &clipDistanceOutput : &clipDistanceInput;
+        semanticNSize = isOutput ? &clipSemanticNSizeOut : &clipSemanticNSizeIn;
        break;
    case EbvCullDistance:
-        clipCullVar = &cullDistanceVariable;
-        semanticNSize = &cullSemanticNSize;
+        clipCullVar = isOutput ? &cullDistanceOutput : &cullDistanceInput;
+        semanticNSize = isOutput ? &cullSemanticNSizeOut : &cullSemanticNSizeIn;
        break;

    // called invalidly: we expected a clip or a cull distance.
@@ -2328,28 +2345,48 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
        vecItems += (*semanticNSize)[x];
        arrayLoc += (*semanticNSize)[x];
    }
-        
-    // array sizes, or 1 if it's not an array:
-    const int internalNodeArraySize = (internalNode->getType().isArray() ? internalNode->getType().getOuterArraySize() : 1);
+ 
+
+    // It can have up to 2 array dimensions (in the case of geometry shader inputs)
+    const TArraySizes* const internalArraySizes = internalNode->getType().getArraySizes();
+    const int internalArrayDims = internalNode->getType().isArray() ? internalArraySizes->getNumDims() : 0;
    // vector sizes:
-    const int internalNodeVectorSize = internalNode->getType().getVectorSize();
+    const int internalVectorSize = internalNode->getType().getVectorSize();
+    // array sizes, or 1 if it's not an array:
+    const int internalInnerArraySize = (internalArrayDims > 0 ? internalArraySizes->getDimSize(internalArrayDims-1) : 1);
+    const int internalOuterArraySize = (internalArrayDims > 1 ? internalArraySizes->getDimSize(0) : 1);
+
+    // The created type may be an array of arrays, e.g, for geometry shader inputs.
+    const bool isImplicitlyArrayed = (language == EShLangGeometry && !isOutput);

    // If we haven't created the output already, create it now.
    if (*clipCullVar == nullptr) {
        // ClipDistance and CullDistance are handled specially in the entry point input/output copy
        // algorithm, because they may need to be unpacked from components of vectors (or a scalar)
        // into a float array, or vice versa.  Here, we make the array the right size and type,
-        // which depends on the incoming data, which has several potential dimensions: Semantic ID
-        // vector size array size Of those, semantic ID and array size cannot appear
-        // simultaneously.
-        const int requiredArraySize = arrayLoc * internalNodeArraySize;
+        // which depends on the incoming data, which has several potential dimensions:
+        //    * Semantic ID
+        //    * vector size 
+        //    * array size
+        // Of those, semantic ID and array size cannot appear simultaneously.
+        //
+        // Also to note: for implicitly arrayed forms (e.g, geometry shader inputs), we need to create two
+        // array dimensions.  The shader's declaration may have one or two array dimensions.  One is always
+        // the geometry's dimension.
+
+        const bool useInnerSize = internalArrayDims > 1 || !isImplicitlyArrayed;
+
+        const int requiredInnerArraySize = arrayLoc * (useInnerSize ? internalInnerArraySize : 1);
+        const int requiredOuterArraySize = (internalArrayDims > 0) ? internalArraySizes->getDimSize(0) : 1;

        TType clipCullType(EbtFloat, clipCullNode->getType().getQualifier().storage, 1);
        clipCullType.getQualifier() = clipCullNode->getType().getQualifier();

        // Create required array dimension
        TArraySizes arraySizes;
-        arraySizes.addInnerSize(requiredArraySize);
+        if (isImplicitlyArrayed)
+            arraySizes.addInnerSize(requiredOuterArraySize);
+        arraySizes.addInnerSize(requiredInnerArraySize);
        clipCullType.newArraySizes(arraySizes);

        // Obtain symbol name: we'll use that for the symbol we introduce.
@@ -2369,10 +2406,13 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
    // Create symbol for the clip or cull variable.
    TIntermSymbol* clipCullSym = intermediate.addSymbol(**clipCullVar);

-    // array sizes, or 1 if it's not an array:
-    const int clipCullSymArraySize = (clipCullSym->getType().isArray() ? clipCullSym->getType().getOuterArraySize() : 1);
    // vector sizes:
-    const int clipCullSymVectorSize = clipCullSym->getType().getVectorSize();
+    const int clipCullVectorSize = clipCullSym->getType().getVectorSize();
+
+    // array sizes, or 1 if it's not an array:
+    const TArraySizes* const clipCullArraySizes = clipCullSym->getType().getArraySizes();
+    const int clipCullOuterArraySize = isImplicitlyArrayed ? clipCullArraySizes->getDimSize(0) : 1;
+    const int clipCullInnerArraySize = clipCullArraySizes->getDimSize(isImplicitlyArrayed ? 1 : 0);

    // clipCullSym has got to be an array of scalar floats, per SPIR-V semantics.
    // fixBuiltInIoType() should have handled that upstream.
@@ -2390,8 +2430,9 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
    // If the types are homomorphic, use a simple assign.  No need to mess about with 
    // individual components.
    if (clipCullSym->getType().isArray() == internalNode->getType().isArray() &&
-        clipCullSymArraySize == internalNodeArraySize &&
-        clipCullSymVectorSize == internalNodeVectorSize) {
+        clipCullInnerArraySize == internalInnerArraySize &&
+        clipCullOuterArraySize == internalOuterArraySize &&
+        clipCullVectorSize == internalVectorSize) {

        if (isOutput)
            clipCullAssign = intermediate.addAssign(op, clipCullSym, internalNode, loc);
@@ -2407,47 +2448,61 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
    // We are going to copy each component of the internal (per array element if indicated) to sequential
    // array elements of the clipCullSym.  This tracks the lhs element we're writing to as we go along.
    // We may be starting in the middle - e.g, for a non-zero semantic ID calculated above.
-    int clipCullArrayPos = semanticOffset[semanticId];
+    int clipCullInnerArrayPos = semanticOffset[semanticId];
+    int clipCullOuterArrayPos = 0;
+
+    // Lambda to add an index to a node, set the type of the result, and return the new node.
+    const auto addIndex = [this, &loc](TIntermTyped* node, int pos) -> TIntermTyped* {
+        const TType derefType(node->getType(), 0);
+        node = intermediate.addIndex(EOpIndexDirect, node, intermediate.addConstantUnion(pos, loc), loc);
+        node->setType(derefType);
+        return node;
+    };

    // Loop through every component of every element of the internal, and copy to or from the matching external.
-    for (int internalArrayPos = 0; internalArrayPos < internalNodeArraySize; ++internalArrayPos) {
-        for (int internalComponent = 0; internalComponent < internalNodeVectorSize; ++internalComponent) {
-            TIntermTyped* clipCullMember = clipCullSym;
+    for (int internalOuterArrayPos = 0; internalOuterArrayPos < internalOuterArraySize; ++internalOuterArrayPos) {
+        for (int internalInnerArrayPos = 0; internalInnerArrayPos < internalInnerArraySize; ++internalInnerArrayPos) {
+            for (int internalComponent = 0; internalComponent < internalVectorSize; ++internalComponent) {
+                // clip/cull array member to read from / write to:
+                TIntermTyped* clipCullMember = clipCullSym;

-            // array member to read from / write to:
-            {
-                const TType derefType(clipCullMember->getType(), 0);
-                clipCullMember = intermediate.addIndex(EOpIndexDirect, clipCullMember,
-                                                       intermediate.addConstantUnion(clipCullArrayPos++, loc), loc);
-                clipCullMember->setType(derefType);
+                // If implicitly arrayed, there is an outer array dimension involved
+                if (isImplicitlyArrayed)
+                    clipCullMember = addIndex(clipCullMember, clipCullOuterArrayPos);
+
+                // Index into proper array position for clip cull member
+                clipCullMember = addIndex(clipCullMember, clipCullInnerArrayPos++);
+
+                // if needed, start over with next outer array slice.
+                if (isImplicitlyArrayed && clipCullInnerArrayPos >= clipCullInnerArraySize) {
+                    clipCullInnerArrayPos = semanticOffset[semanticId];
+                    ++clipCullOuterArrayPos;
+                }
+
+                // internal member to read from / write to:
+                TIntermTyped* internalMember = internalNode;
+
+                // If internal node has outer array dimension, index appropriately.
+                if (internalArrayDims > 1)
+                    internalMember = addIndex(internalMember, internalOuterArrayPos);
+
+                // If internal node has inner array dimension, index appropriately.
+                if (internalArrayDims > 0)
+                    internalMember = addIndex(internalMember, internalInnerArrayPos);
+
+                // If internal node is a vector, extract the component of interest.
+                if (internalNode->getType().isVector())
+                    internalMember = addIndex(internalMember, internalComponent);
+
+                // Create an assignment: output from internal to clip cull, or input from clip cull to internal.
+                if (isOutput)
+                    clipCullAssign = intermediate.addAssign(op, clipCullMember, internalMember, loc);
+                else
+                    clipCullAssign = intermediate.addAssign(op, internalMember, clipCullMember, loc);
+
+                // Track assignment in the sequence.
+                assignList = intermediate.growAggregate(assignList, clipCullAssign);
            }
-
-            TIntermTyped* internalMember = internalNode;
-
-            // If internal node is an array, extract the element of interest
-            if (internalNode->getType().isArray()) {
-                const TType derefType(internalMember->getType(), 0);
-                internalMember = intermediate.addIndex(EOpIndexDirect, internalMember,
-                                                  intermediate.addConstantUnion(internalArrayPos, loc), loc);
-                internalMember->setType(derefType);
-            }
-
-            // If internal node is a vector, extract the component of interest.
-            if (internalNode->getType().isVector()) {
-                const TType derefType(internalMember->getType(), 0);
-                internalMember = intermediate.addIndex(EOpIndexDirect, internalMember,
-                                                  intermediate.addConstantUnion(internalComponent, loc), loc);
-                internalMember->setType(derefType);
-            }
-
-            // Create an assignment: output from internal to clip cull, or input from clip cull to internal.
-            if (isOutput)
-                clipCullAssign = intermediate.addAssign(op, clipCullMember, internalMember, loc);
-            else
-                clipCullAssign = intermediate.addAssign(op, internalMember, clipCullMember, loc);
-
-            // Track assignment in the sequence.
-            assignList = intermediate.growAggregate(assignList, clipCullAssign);
        }
    }