Merge pull request #619 from steve-lunarg/opcode-specific-promote
HLSL: opcode specific promotion rules for interlocked ops
This commit is contained in:
commit
302e619e4e
109
Test/baseResults/hlsl.promote.atomic.frag.out
Normal file
109
Test/baseResults/hlsl.promote.atomic.frag.out
Normal file
@ -0,0 +1,109 @@
|
||||
hlsl.promote.atomic.frag
|
||||
Shader version: 450
|
||||
gl_FragCoord origin is upper left
|
||||
0:? Sequence
|
||||
0:5 Function Definition: main( (temp 4-component vector of float)
|
||||
0:5 Function Parameters:
|
||||
0:? Sequence
|
||||
0:13 move second child to first child (temp int)
|
||||
0:13 'Orig' (temp int)
|
||||
0:13 Convert uint to int (temp int)
|
||||
0:13 imageAtomicAdd (temp uint)
|
||||
0:13 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
|
||||
0:13 'Loc' (temp int)
|
||||
0:13 Convert int to uint (temp uint)
|
||||
0:13 'Inc' (temp int)
|
||||
0:15 Sequence
|
||||
0:15 move second child to first child (temp 4-component vector of float)
|
||||
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
|
||||
0:? Constant:
|
||||
0:? 0.000000
|
||||
0:? 0.000000
|
||||
0:? 0.000000
|
||||
0:? 0.000000
|
||||
0:15 Branch: Return
|
||||
0:? Linker Objects
|
||||
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
|
||||
0:? 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
|
||||
|
||||
|
||||
Linked fragment stage:
|
||||
|
||||
|
||||
Shader version: 450
|
||||
gl_FragCoord origin is upper left
|
||||
0:? Sequence
|
||||
0:5 Function Definition: main( (temp 4-component vector of float)
|
||||
0:5 Function Parameters:
|
||||
0:? Sequence
|
||||
0:13 move second child to first child (temp int)
|
||||
0:13 'Orig' (temp int)
|
||||
0:13 Convert uint to int (temp int)
|
||||
0:13 imageAtomicAdd (temp uint)
|
||||
0:13 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
|
||||
0:13 'Loc' (temp int)
|
||||
0:13 Convert int to uint (temp uint)
|
||||
0:13 'Inc' (temp int)
|
||||
0:15 Sequence
|
||||
0:15 move second child to first child (temp 4-component vector of float)
|
||||
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
|
||||
0:? Constant:
|
||||
0:? 0.000000
|
||||
0:? 0.000000
|
||||
0:? 0.000000
|
||||
0:? 0.000000
|
||||
0:15 Branch: Return
|
||||
0:? Linker Objects
|
||||
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
|
||||
0:? 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
|
||||
|
||||
// Module Version 10000
|
||||
// Generated by (magic number): 80001
|
||||
// Id's are bound by 31
|
||||
|
||||
Capability Shader
|
||||
Capability SampledBuffer
|
||||
1: ExtInstImport "GLSL.std.450"
|
||||
MemoryModel Logical GLSL450
|
||||
EntryPoint Fragment 4 "main" 27
|
||||
ExecutionMode 4 OriginUpperLeft
|
||||
Name 4 "main"
|
||||
Name 8 "Orig"
|
||||
Name 12 "s_uintbuff"
|
||||
Name 13 "Loc"
|
||||
Name 15 "Inc"
|
||||
Name 27 "@entryPointOutput"
|
||||
Decorate 12(s_uintbuff) DescriptorSet 0
|
||||
Decorate 27(@entryPointOutput) Location 0
|
||||
2: TypeVoid
|
||||
3: TypeFunction 2
|
||||
6: TypeInt 32 1
|
||||
7: TypePointer Function 6(int)
|
||||
9: TypeInt 32 0
|
||||
10: TypeImage 9(int) Buffer nonsampled format:R32ui
|
||||
11: TypePointer UniformConstant 10
|
||||
12(s_uintbuff): 11(ptr) Variable UniformConstant
|
||||
18: 9(int) Constant 0
|
||||
19: TypePointer Image 9(int)
|
||||
21: 9(int) Constant 1
|
||||
24: TypeFloat 32
|
||||
25: TypeVector 24(float) 4
|
||||
26: TypePointer Output 25(fvec4)
|
||||
27(@entryPointOutput): 26(ptr) Variable Output
|
||||
28: 24(float) Constant 0
|
||||
29: 25(fvec4) ConstantComposite 28 28 28 28
|
||||
4(main): 2 Function None 3
|
||||
5: Label
|
||||
8(Orig): 7(ptr) Variable Function
|
||||
13(Loc): 7(ptr) Variable Function
|
||||
15(Inc): 7(ptr) Variable Function
|
||||
14: 6(int) Load 13(Loc)
|
||||
16: 6(int) Load 15(Inc)
|
||||
17: 9(int) Bitcast 16
|
||||
20: 19(ptr) ImageTexelPointer 12(s_uintbuff) 14 18
|
||||
22: 9(int) AtomicIAdd 20 21 18 17
|
||||
23: 6(int) Bitcast 22
|
||||
Store 8(Orig) 23
|
||||
Store 27(@entryPointOutput) 29
|
||||
Return
|
||||
FunctionEnd
|
||||
17
Test/hlsl.promote.atomic.frag
Normal file
17
Test/hlsl.promote.atomic.frag
Normal file
@ -0,0 +1,17 @@
|
||||
|
||||
RWBuffer<uint> s_uintbuff; // UINT RWBuffer ...
|
||||
|
||||
float4 main() : SV_Target
|
||||
{
|
||||
int Loc; // ... with INT variables
|
||||
int Inc;
|
||||
int Orig;
|
||||
|
||||
// This must select the uint flavor of SPIR-V atomic op, and promote
|
||||
// the other arguments as required. The output value from the
|
||||
// imageAtomicAdd AST will be converted to an int for 'Orig'.
|
||||
InterlockedAdd(s_uintbuff[Loc], Inc, Orig);
|
||||
|
||||
return float4(0,0,0,0);
|
||||
}
|
||||
|
||||
@ -304,7 +304,7 @@ TVariable* TParseContextBase::getEditableVariable(const char* name)
|
||||
const TFunction* TParseContextBase::selectFunction(
|
||||
const TVector<const TFunction*> candidateList,
|
||||
const TFunction& call,
|
||||
std::function<bool(const TType& from, const TType& to)> convertible,
|
||||
std::function<bool(const TType& from, const TType& to, TOperator op, int arg)> convertible,
|
||||
std::function<bool(const TType& from, const TType& to1, const TType& to2)> better,
|
||||
/* output */ bool& tie)
|
||||
{
|
||||
@ -356,13 +356,13 @@ const TFunction* TParseContextBase::selectFunction(
|
||||
bool viable = true;
|
||||
for (int param = 0; param < candidate.getParamCount(); ++param) {
|
||||
if (candidate[param].type->getQualifier().isParamInput()) {
|
||||
if (! convertible(*call[param].type, *candidate[param].type)) {
|
||||
if (! convertible(*call[param].type, *candidate[param].type, candidate.getBuiltInOp(), param)) {
|
||||
viable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (candidate[param].type->getQualifier().isParamOutput()) {
|
||||
if (! convertible(*candidate[param].type, *call[param].type)) {
|
||||
if (! convertible(*candidate[param].type, *call[param].type, candidate.getBuiltInOp(), param)) {
|
||||
viable = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -4875,7 +4875,7 @@ const TFunction* TParseContext::findFunction400(const TSourceLoc& loc, const TFu
|
||||
symbolTable.findFunctionNameList(call.getMangledName(), candidateList, builtIn);
|
||||
|
||||
// can 'from' convert to 'to'?
|
||||
const auto convertible = [this](const TType& from, const TType& to) -> bool {
|
||||
const auto convertible = [this](const TType& from, const TType& to, TOperator, int) -> bool {
|
||||
if (from == to)
|
||||
return true;
|
||||
if (from.isArray() || to.isArray() || ! from.sameElementShape(to))
|
||||
|
||||
@ -167,7 +167,7 @@ protected:
|
||||
|
||||
// see implementation for detail
|
||||
const TFunction* selectFunction(const TVector<const TFunction*>, const TFunction&,
|
||||
std::function<bool(const TType&, const TType&)>,
|
||||
std::function<bool(const TType&, const TType&, TOperator, int arg)>,
|
||||
std::function<bool(const TType&, const TType&, const TType&)>,
|
||||
/* output */ bool& tie);
|
||||
|
||||
|
||||
@ -163,6 +163,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
{"hlsl.partialInit.frag", "PixelShaderFunction"},
|
||||
{"hlsl.pp.line.frag", "main"},
|
||||
{"hlsl.precise.frag", "main"},
|
||||
{"hlsl.promote.atomic.frag", "main"},
|
||||
{"hlsl.promote.binary.frag", "main"},
|
||||
{"hlsl.promote.vec1.frag", "main"},
|
||||
{"hlsl.promotions.frag", "main"},
|
||||
|
||||
@ -4394,8 +4394,10 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
|
||||
return candidateList[0];
|
||||
}
|
||||
|
||||
bool allowOnlyUpConversions = true;
|
||||
|
||||
// can 'from' convert to 'to'?
|
||||
const auto convertible = [this](const TType& from, const TType& to) -> bool {
|
||||
const auto convertible = [&](const TType& from, const TType& to, TOperator op, int arg) -> bool {
|
||||
if (from == to)
|
||||
return true;
|
||||
|
||||
@ -4404,9 +4406,33 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
|
||||
from.isStruct() || to.isStruct())
|
||||
return false;
|
||||
|
||||
switch (op) {
|
||||
case EOpInterlockedAdd:
|
||||
case EOpInterlockedAnd:
|
||||
case EOpInterlockedCompareExchange:
|
||||
case EOpInterlockedCompareStore:
|
||||
case EOpInterlockedExchange:
|
||||
case EOpInterlockedMax:
|
||||
case EOpInterlockedMin:
|
||||
case EOpInterlockedOr:
|
||||
case EOpInterlockedXor:
|
||||
// We do not promote the texture or image type for these ocodes. Normally that would not
|
||||
// be an issue because it's a buffer, but we haven't decomposed the opcode yet, and at this
|
||||
// stage it's merely e.g, a basic integer type.
|
||||
//
|
||||
// Instead, we want to promote other arguments, but stay within the same family. In other
|
||||
// words, InterlockedAdd(RWBuffer<int>, ...) will always use the int flavor, never the uint flavor,
|
||||
// but it is allowed to promote its other arguments.
|
||||
if (arg == 0)
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// basic types have to be convertible
|
||||
if (! intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall))
|
||||
return false;
|
||||
if (allowOnlyUpConversions)
|
||||
if (! intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall))
|
||||
return false;
|
||||
|
||||
// shapes have to be convertible
|
||||
if ((from.isScalarOrVec1() && to.isScalarOrVec1()) ||
|
||||
@ -4472,6 +4498,14 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
|
||||
// send to the generic selector
|
||||
const TFunction* bestMatch = selectFunction(candidateList, call, convertible, better, tie);
|
||||
|
||||
if (bestMatch == nullptr) {
|
||||
// If there is nothing selected by allowing only up-conversions (to a larger linearize() value),
|
||||
// we instead try down-conversions, which are valid in HLSL, but not preferred if there are any
|
||||
// upconversions possible.
|
||||
allowOnlyUpConversions = false;
|
||||
bestMatch = selectFunction(candidateList, call, convertible, better, tie);
|
||||
}
|
||||
|
||||
if (bestMatch == nullptr) {
|
||||
error(loc, "no matching overloaded function found", call.getName().c_str(), "");
|
||||
return nullptr;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user