Merge pull request #619 from steve-lunarg/opcode-specific-promote

HLSL: opcode specific promotion rules for interlocked ops
This commit is contained in:
John Kessenich 2016-12-08 11:17:21 -07:00 committed by GitHub
commit 302e619e4e
7 changed files with 169 additions and 8 deletions

View File

@ -0,0 +1,109 @@
hlsl.promote.atomic.frag
Shader version: 450
gl_FragCoord origin is upper left
0:? Sequence
0:5 Function Definition: main( (temp 4-component vector of float)
0:5 Function Parameters:
0:? Sequence
0:13 move second child to first child (temp int)
0:13 'Orig' (temp int)
0:13 Convert uint to int (temp int)
0:13 imageAtomicAdd (temp uint)
0:13 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
0:13 'Loc' (temp int)
0:13 Convert int to uint (temp uint)
0:13 'Inc' (temp int)
0:15 Sequence
0:15 move second child to first child (temp 4-component vector of float)
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
0:? Constant:
0:? 0.000000
0:? 0.000000
0:? 0.000000
0:? 0.000000
0:15 Branch: Return
0:? Linker Objects
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
0:? 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
Linked fragment stage:
Shader version: 450
gl_FragCoord origin is upper left
0:? Sequence
0:5 Function Definition: main( (temp 4-component vector of float)
0:5 Function Parameters:
0:? Sequence
0:13 move second child to first child (temp int)
0:13 'Orig' (temp int)
0:13 Convert uint to int (temp int)
0:13 imageAtomicAdd (temp uint)
0:13 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
0:13 'Loc' (temp int)
0:13 Convert int to uint (temp uint)
0:13 'Inc' (temp int)
0:15 Sequence
0:15 move second child to first child (temp 4-component vector of float)
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
0:? Constant:
0:? 0.000000
0:? 0.000000
0:? 0.000000
0:? 0.000000
0:15 Branch: Return
0:? Linker Objects
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
0:? 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 31
Capability Shader
Capability SampledBuffer
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint Fragment 4 "main" 27
ExecutionMode 4 OriginUpperLeft
Name 4 "main"
Name 8 "Orig"
Name 12 "s_uintbuff"
Name 13 "Loc"
Name 15 "Inc"
Name 27 "@entryPointOutput"
Decorate 12(s_uintbuff) DescriptorSet 0
Decorate 27(@entryPointOutput) Location 0
2: TypeVoid
3: TypeFunction 2
6: TypeInt 32 1
7: TypePointer Function 6(int)
9: TypeInt 32 0
10: TypeImage 9(int) Buffer nonsampled format:R32ui
11: TypePointer UniformConstant 10
12(s_uintbuff): 11(ptr) Variable UniformConstant
18: 9(int) Constant 0
19: TypePointer Image 9(int)
21: 9(int) Constant 1
24: TypeFloat 32
25: TypeVector 24(float) 4
26: TypePointer Output 25(fvec4)
27(@entryPointOutput): 26(ptr) Variable Output
28: 24(float) Constant 0
29: 25(fvec4) ConstantComposite 28 28 28 28
4(main): 2 Function None 3
5: Label
8(Orig): 7(ptr) Variable Function
13(Loc): 7(ptr) Variable Function
15(Inc): 7(ptr) Variable Function
14: 6(int) Load 13(Loc)
16: 6(int) Load 15(Inc)
17: 9(int) Bitcast 16
20: 19(ptr) ImageTexelPointer 12(s_uintbuff) 14 18
22: 9(int) AtomicIAdd 20 21 18 17
23: 6(int) Bitcast 22
Store 8(Orig) 23
Store 27(@entryPointOutput) 29
Return
FunctionEnd

View File

@ -0,0 +1,17 @@
RWBuffer<uint> s_uintbuff; // UINT RWBuffer ...
float4 main() : SV_Target
{
int Loc; // ... with INT variables
int Inc;
int Orig;
// This must select the uint flavor of SPIR-V atomic op, and promote
// the other arguments as required. The output value from the
// imageAtomicAdd AST will be converted to an int for 'Orig'.
InterlockedAdd(s_uintbuff[Loc], Inc, Orig);
return float4(0,0,0,0);
}

View File

@ -304,7 +304,7 @@ TVariable* TParseContextBase::getEditableVariable(const char* name)
const TFunction* TParseContextBase::selectFunction(
const TVector<const TFunction*> candidateList,
const TFunction& call,
std::function<bool(const TType& from, const TType& to)> convertible,
std::function<bool(const TType& from, const TType& to, TOperator op, int arg)> convertible,
std::function<bool(const TType& from, const TType& to1, const TType& to2)> better,
/* output */ bool& tie)
{
@ -356,13 +356,13 @@ const TFunction* TParseContextBase::selectFunction(
bool viable = true;
for (int param = 0; param < candidate.getParamCount(); ++param) {
if (candidate[param].type->getQualifier().isParamInput()) {
if (! convertible(*call[param].type, *candidate[param].type)) {
if (! convertible(*call[param].type, *candidate[param].type, candidate.getBuiltInOp(), param)) {
viable = false;
break;
}
}
if (candidate[param].type->getQualifier().isParamOutput()) {
if (! convertible(*candidate[param].type, *call[param].type)) {
if (! convertible(*candidate[param].type, *call[param].type, candidate.getBuiltInOp(), param)) {
viable = false;
break;
}

View File

@ -4875,7 +4875,7 @@ const TFunction* TParseContext::findFunction400(const TSourceLoc& loc, const TFu
symbolTable.findFunctionNameList(call.getMangledName(), candidateList, builtIn);
// can 'from' convert to 'to'?
const auto convertible = [this](const TType& from, const TType& to) -> bool {
const auto convertible = [this](const TType& from, const TType& to, TOperator, int) -> bool {
if (from == to)
return true;
if (from.isArray() || to.isArray() || ! from.sameElementShape(to))

View File

@ -167,7 +167,7 @@ protected:
// see implementation for detail
const TFunction* selectFunction(const TVector<const TFunction*>, const TFunction&,
std::function<bool(const TType&, const TType&)>,
std::function<bool(const TType&, const TType&, TOperator, int arg)>,
std::function<bool(const TType&, const TType&, const TType&)>,
/* output */ bool& tie);

View File

@ -163,6 +163,7 @@ INSTANTIATE_TEST_CASE_P(
{"hlsl.partialInit.frag", "PixelShaderFunction"},
{"hlsl.pp.line.frag", "main"},
{"hlsl.precise.frag", "main"},
{"hlsl.promote.atomic.frag", "main"},
{"hlsl.promote.binary.frag", "main"},
{"hlsl.promote.vec1.frag", "main"},
{"hlsl.promotions.frag", "main"},

View File

@ -4394,8 +4394,10 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
return candidateList[0];
}
bool allowOnlyUpConversions = true;
// can 'from' convert to 'to'?
const auto convertible = [this](const TType& from, const TType& to) -> bool {
const auto convertible = [&](const TType& from, const TType& to, TOperator op, int arg) -> bool {
if (from == to)
return true;
@ -4404,7 +4406,31 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
from.isStruct() || to.isStruct())
return false;
switch (op) {
case EOpInterlockedAdd:
case EOpInterlockedAnd:
case EOpInterlockedCompareExchange:
case EOpInterlockedCompareStore:
case EOpInterlockedExchange:
case EOpInterlockedMax:
case EOpInterlockedMin:
case EOpInterlockedOr:
case EOpInterlockedXor:
// We do not promote the texture or image type for these ocodes. Normally that would not
// be an issue because it's a buffer, but we haven't decomposed the opcode yet, and at this
// stage it's merely e.g, a basic integer type.
//
// Instead, we want to promote other arguments, but stay within the same family. In other
// words, InterlockedAdd(RWBuffer<int>, ...) will always use the int flavor, never the uint flavor,
// but it is allowed to promote its other arguments.
if (arg == 0)
return false;
default:
break;
}
// basic types have to be convertible
if (allowOnlyUpConversions)
if (! intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall))
return false;
@ -4472,6 +4498,14 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
// send to the generic selector
const TFunction* bestMatch = selectFunction(candidateList, call, convertible, better, tie);
if (bestMatch == nullptr) {
// If there is nothing selected by allowing only up-conversions (to a larger linearize() value),
// we instead try down-conversions, which are valid in HLSL, but not preferred if there are any
// upconversions possible.
allowOnlyUpConversions = false;
bestMatch = selectFunction(candidateList, call, convertible, better, tie);
}
if (bestMatch == nullptr) {
error(loc, "no matching overloaded function found", call.getName().c_str(), "");
return nullptr;