HLSL: add geometry stage support for clip/cull distance

Changes:

(1) Allow clip/cull builtins as both input and output in the same shader stage.  Previously,
not enough data was tracked to handle this.

(2) Handle the extra array dimension in GS inputs.  The synthesized external variable can
now be created with the extra array dimension if needed, and the form conversion code is
able to handle it as well.

For example, both of these GS inputs would result in the same synthesized external type:

    triangle in float4 clip[3] : SV_ClipDistance

    triangle in float2 clip[3][2] : SV_ClipDistance

In the second case, the inner array dimension packs with the 2-vector of floats into an array[4],
which there is an array[3] of due to the triangle geometry.
This commit is contained in:
LoopDawg
2017-08-28 14:02:19 -06:00
parent ea0c1643ab
commit 5e5b12e931
13 changed files with 3448 additions and 204 deletions

View File

@@ -66,8 +66,10 @@ HlslParseContext::HlslParseContext(TSymbolTable& symbolTable, TIntermediate& int
entryPointFunction(nullptr),
entryPointFunctionBody(nullptr),
gsStreamOutput(nullptr),
clipDistanceVariable(nullptr),
cullDistanceVariable(nullptr)
clipDistanceInput(nullptr),
cullDistanceInput(nullptr),
clipDistanceOutput(nullptr),
cullDistanceOutput(nullptr)
{
globalUniformDefaults.clear();
globalUniformDefaults.layoutMatrix = ElmRowMajor;
@@ -80,8 +82,10 @@ HlslParseContext::HlslParseContext(TSymbolTable& symbolTable, TIntermediate& int
globalInputDefaults.clear();
globalOutputDefaults.clear();
clipSemanticNSize.fill(0);
cullSemanticNSize.fill(0);
clipSemanticNSizeIn.fill(0);
cullSemanticNSizeIn.fill(0);
clipSemanticNSizeOut.fill(0);
cullSemanticNSizeOut.fill(0);
// "Shaders in the transform
// feedback capturing mode have an initial global default of
@@ -1152,14 +1156,18 @@ void HlslParseContext::splitBuiltIn(const TString& baseName, const TType& member
if (arraySizes != nullptr && !memberType.isArray())
ioVar->getWritableType().newArraySizes(*arraySizes);
fixBuiltInIoType(ioVar->getWritableType());
splitBuiltIns[tInterstageIoData(memberType.getQualifier().builtIn, outerQualifier.storage)] = ioVar;
if (!isClipOrCullDistance(ioVar->getType()))
trackLinkage(*ioVar);
// Merge qualifier from the user structure
mergeQualifiers(ioVar->getWritableType().getQualifier(), outerQualifier);
// Fix the builtin type if needed (e.g, some types require fixed array sizes, no matter how the
// shader declared them). This is done after mergeQualifiers(), in case fixBuiltInIoType looks
// at the qualifier to determine e.g, in or out qualifications.
fixBuiltInIoType(ioVar->getWritableType());
// But, not location, we're losing that
ioVar->getWritableType().getQualifier().layoutLocation = TQualifier::layoutLocationEnd;
}
@@ -1483,10 +1491,18 @@ void HlslParseContext::fixBuiltInIoType(TType& type)
}
default:
if (isClipOrCullDistance(type)) {
const int loc = type.getQualifier().layoutLocation;
if (type.getQualifier().builtIn == EbvClipDistance) {
clipSemanticNSize[type.getQualifier().layoutLocation] = type.getVectorSize();
if (type.getQualifier().storage == EvqVaryingIn)
clipSemanticNSizeIn[loc] = type.getVectorSize();
else
clipSemanticNSizeOut[loc] = type.getVectorSize();
} else {
cullSemanticNSize[type.getQualifier().layoutLocation] = type.getVectorSize();
if (type.getQualifier().storage == EvqVaryingIn)
cullSemanticNSizeIn[loc] = type.getVectorSize();
else
cullSemanticNSizeOut[loc] = type.getVectorSize();
}
}
@@ -2274,6 +2290,7 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
switch (language) {
case EShLangFragment:
case EShLangVertex:
case EShLangGeometry:
break;
default:
error(loc, "unimplemented: clip/cull not currently implemented for this stage", "", "");
@@ -2292,17 +2309,17 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
const TBuiltInVariable builtInType = clipCullNode->getQualifier().builtIn;
decltype(clipSemanticNSize)* semanticNSize = nullptr;
decltype(clipSemanticNSizeIn)* semanticNSize = nullptr;
// Refer to either the clip or the cull distance, depending on semantic.
switch (builtInType) {
case EbvClipDistance:
clipCullVar = &clipDistanceVariable;
semanticNSize = &clipSemanticNSize;
clipCullVar = isOutput ? &clipDistanceOutput : &clipDistanceInput;
semanticNSize = isOutput ? &clipSemanticNSizeOut : &clipSemanticNSizeIn;
break;
case EbvCullDistance:
clipCullVar = &cullDistanceVariable;
semanticNSize = &cullSemanticNSize;
clipCullVar = isOutput ? &cullDistanceOutput : &cullDistanceInput;
semanticNSize = isOutput ? &cullSemanticNSizeOut : &cullSemanticNSizeIn;
break;
// called invalidly: we expected a clip or a cull distance.
@@ -2328,28 +2345,48 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
vecItems += (*semanticNSize)[x];
arrayLoc += (*semanticNSize)[x];
}
// array sizes, or 1 if it's not an array:
const int internalNodeArraySize = (internalNode->getType().isArray() ? internalNode->getType().getOuterArraySize() : 1);
// It can have up to 2 array dimensions (in the case of geometry shader inputs)
const TArraySizes* const internalArraySizes = internalNode->getType().getArraySizes();
const int internalArrayDims = internalNode->getType().isArray() ? internalArraySizes->getNumDims() : 0;
// vector sizes:
const int internalNodeVectorSize = internalNode->getType().getVectorSize();
const int internalVectorSize = internalNode->getType().getVectorSize();
// array sizes, or 1 if it's not an array:
const int internalInnerArraySize = (internalArrayDims > 0 ? internalArraySizes->getDimSize(internalArrayDims-1) : 1);
const int internalOuterArraySize = (internalArrayDims > 1 ? internalArraySizes->getDimSize(0) : 1);
// The created type may be an array of arrays, e.g, for geometry shader inputs.
const bool isImplicitlyArrayed = (language == EShLangGeometry && !isOutput);
// If we haven't created the output already, create it now.
if (*clipCullVar == nullptr) {
// ClipDistance and CullDistance are handled specially in the entry point input/output copy
// algorithm, because they may need to be unpacked from components of vectors (or a scalar)
// into a float array, or vice versa. Here, we make the array the right size and type,
// which depends on the incoming data, which has several potential dimensions: Semantic ID
// vector size array size Of those, semantic ID and array size cannot appear
// simultaneously.
const int requiredArraySize = arrayLoc * internalNodeArraySize;
// which depends on the incoming data, which has several potential dimensions:
// * Semantic ID
// * vector size
// * array size
// Of those, semantic ID and array size cannot appear simultaneously.
//
// Also to note: for implicitly arrayed forms (e.g, geometry shader inputs), we need to create two
// array dimensions. The shader's declaration may have one or two array dimensions. One is always
// the geometry's dimension.
const bool useInnerSize = internalArrayDims > 1 || !isImplicitlyArrayed;
const int requiredInnerArraySize = arrayLoc * (useInnerSize ? internalInnerArraySize : 1);
const int requiredOuterArraySize = (internalArrayDims > 0) ? internalArraySizes->getDimSize(0) : 1;
TType clipCullType(EbtFloat, clipCullNode->getType().getQualifier().storage, 1);
clipCullType.getQualifier() = clipCullNode->getType().getQualifier();
// Create required array dimension
TArraySizes arraySizes;
arraySizes.addInnerSize(requiredArraySize);
if (isImplicitlyArrayed)
arraySizes.addInnerSize(requiredOuterArraySize);
arraySizes.addInnerSize(requiredInnerArraySize);
clipCullType.newArraySizes(arraySizes);
// Obtain symbol name: we'll use that for the symbol we introduce.
@@ -2369,10 +2406,13 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
// Create symbol for the clip or cull variable.
TIntermSymbol* clipCullSym = intermediate.addSymbol(**clipCullVar);
// array sizes, or 1 if it's not an array:
const int clipCullSymArraySize = (clipCullSym->getType().isArray() ? clipCullSym->getType().getOuterArraySize() : 1);
// vector sizes:
const int clipCullSymVectorSize = clipCullSym->getType().getVectorSize();
const int clipCullVectorSize = clipCullSym->getType().getVectorSize();
// array sizes, or 1 if it's not an array:
const TArraySizes* const clipCullArraySizes = clipCullSym->getType().getArraySizes();
const int clipCullOuterArraySize = isImplicitlyArrayed ? clipCullArraySizes->getDimSize(0) : 1;
const int clipCullInnerArraySize = clipCullArraySizes->getDimSize(isImplicitlyArrayed ? 1 : 0);
// clipCullSym has got to be an array of scalar floats, per SPIR-V semantics.
// fixBuiltInIoType() should have handled that upstream.
@@ -2390,8 +2430,9 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
// If the types are homomorphic, use a simple assign. No need to mess about with
// individual components.
if (clipCullSym->getType().isArray() == internalNode->getType().isArray() &&
clipCullSymArraySize == internalNodeArraySize &&
clipCullSymVectorSize == internalNodeVectorSize) {
clipCullInnerArraySize == internalInnerArraySize &&
clipCullOuterArraySize == internalOuterArraySize &&
clipCullVectorSize == internalVectorSize) {
if (isOutput)
clipCullAssign = intermediate.addAssign(op, clipCullSym, internalNode, loc);
@@ -2407,47 +2448,61 @@ TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc
// We are going to copy each component of the internal (per array element if indicated) to sequential
// array elements of the clipCullSym. This tracks the lhs element we're writing to as we go along.
// We may be starting in the middle - e.g, for a non-zero semantic ID calculated above.
int clipCullArrayPos = semanticOffset[semanticId];
int clipCullInnerArrayPos = semanticOffset[semanticId];
int clipCullOuterArrayPos = 0;
// Lambda to add an index to a node, set the type of the result, and return the new node.
const auto addIndex = [this, &loc](TIntermTyped* node, int pos) -> TIntermTyped* {
const TType derefType(node->getType(), 0);
node = intermediate.addIndex(EOpIndexDirect, node, intermediate.addConstantUnion(pos, loc), loc);
node->setType(derefType);
return node;
};
// Loop through every component of every element of the internal, and copy to or from the matching external.
for (int internalArrayPos = 0; internalArrayPos < internalNodeArraySize; ++internalArrayPos) {
for (int internalComponent = 0; internalComponent < internalNodeVectorSize; ++internalComponent) {
TIntermTyped* clipCullMember = clipCullSym;
for (int internalOuterArrayPos = 0; internalOuterArrayPos < internalOuterArraySize; ++internalOuterArrayPos) {
for (int internalInnerArrayPos = 0; internalInnerArrayPos < internalInnerArraySize; ++internalInnerArrayPos) {
for (int internalComponent = 0; internalComponent < internalVectorSize; ++internalComponent) {
// clip/cull array member to read from / write to:
TIntermTyped* clipCullMember = clipCullSym;
// array member to read from / write to:
{
const TType derefType(clipCullMember->getType(), 0);
clipCullMember = intermediate.addIndex(EOpIndexDirect, clipCullMember,
intermediate.addConstantUnion(clipCullArrayPos++, loc), loc);
clipCullMember->setType(derefType);
// If implicitly arrayed, there is an outer array dimension involved
if (isImplicitlyArrayed)
clipCullMember = addIndex(clipCullMember, clipCullOuterArrayPos);
// Index into proper array position for clip cull member
clipCullMember = addIndex(clipCullMember, clipCullInnerArrayPos++);
// if needed, start over with next outer array slice.
if (isImplicitlyArrayed && clipCullInnerArrayPos >= clipCullInnerArraySize) {
clipCullInnerArrayPos = semanticOffset[semanticId];
++clipCullOuterArrayPos;
}
// internal member to read from / write to:
TIntermTyped* internalMember = internalNode;
// If internal node has outer array dimension, index appropriately.
if (internalArrayDims > 1)
internalMember = addIndex(internalMember, internalOuterArrayPos);
// If internal node has inner array dimension, index appropriately.
if (internalArrayDims > 0)
internalMember = addIndex(internalMember, internalInnerArrayPos);
// If internal node is a vector, extract the component of interest.
if (internalNode->getType().isVector())
internalMember = addIndex(internalMember, internalComponent);
// Create an assignment: output from internal to clip cull, or input from clip cull to internal.
if (isOutput)
clipCullAssign = intermediate.addAssign(op, clipCullMember, internalMember, loc);
else
clipCullAssign = intermediate.addAssign(op, internalMember, clipCullMember, loc);
// Track assignment in the sequence.
assignList = intermediate.growAggregate(assignList, clipCullAssign);
}
TIntermTyped* internalMember = internalNode;
// If internal node is an array, extract the element of interest
if (internalNode->getType().isArray()) {
const TType derefType(internalMember->getType(), 0);
internalMember = intermediate.addIndex(EOpIndexDirect, internalMember,
intermediate.addConstantUnion(internalArrayPos, loc), loc);
internalMember->setType(derefType);
}
// If internal node is a vector, extract the component of interest.
if (internalNode->getType().isVector()) {
const TType derefType(internalMember->getType(), 0);
internalMember = intermediate.addIndex(EOpIndexDirect, internalMember,
intermediate.addConstantUnion(internalComponent, loc), loc);
internalMember->setType(derefType);
}
// Create an assignment: output from internal to clip cull, or input from clip cull to internal.
if (isOutput)
clipCullAssign = intermediate.addAssign(op, clipCullMember, internalMember, loc);
else
clipCullAssign = intermediate.addAssign(op, internalMember, clipCullMember, loc);
// Track assignment in the sequence.
assignList = intermediate.growAggregate(assignList, clipCullAssign);
}
}