Merge pull request #1029 from amdrexu/feature2

Implement extension GL_AMD_shader_image_load_store_lod
This commit is contained in:
John Kessenich 2017-08-23 23:13:19 -06:00 committed by GitHub
commit fc3436941e
12 changed files with 298 additions and 1 deletions

View File

@ -33,7 +33,7 @@ enum Decoration;
enum Op;
static const int GLSLextAMDVersion = 100;
static const int GLSLextAMDRevision = 4;
static const int GLSLextAMDRevision = 5;
// SPV_AMD_shader_ballot
static const char* const E_SPV_AMD_shader_ballot = "SPV_AMD_shader_ballot";
@ -101,4 +101,9 @@ static const char* const E_SPV_AMD_texture_gather_bias_lod = "SPV_AMD_texture_ga
// SPV_AMD_gpu_shader_int16
static const char* const E_SPV_AMD_gpu_shader_int16 = "SPV_AMD_gpu_shader_int16";
// SPV_AMD_shader_image_load_store_lod
static const char* const E_SPV_AMD_shader_image_load_store_lod = "SPV_AMD_shader_image_load_store_lod";
static const Capability CapabilityImageReadWriteLodAMD = static_cast<Capability>(5015);
#endif // #ifndef GLSLextAMD_H

View File

@ -1453,7 +1453,11 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
builder.setAccessChainRValue(result);
return false;
#ifdef AMD_EXTENSIONS
} else if (node->getOp() == glslang::EOpImageStore || node->getOp() == glslang::EOpImageStoreLod) {
#else
} else if (node->getOp() == glslang::EOpImageStore) {
#endif
// "imageStore" is a special case, which has no result
return false;
}
@ -3137,6 +3141,10 @@ void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate&
if (i == 4)
lvalue = true;
break;
case glslang::EOpSparseImageLoadLod:
if (i == 3)
lvalue = true;
break;
#endif
default:
break;
@ -3239,26 +3247,55 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
}
operands.push_back(*(opIt++));
#ifdef AMD_EXTENSIONS
if (node->getOp() == glslang::EOpImageLoad || node->getOp() == glslang::EOpImageLoadLod) {
#else
if (node->getOp() == glslang::EOpImageLoad) {
#endif
if (sampler.ms) {
operands.push_back(spv::ImageOperandsSampleMask);
operands.push_back(*opIt);
#ifdef AMD_EXTENSIONS
} else if (cracked.lod) {
builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod);
builder.addCapability(spv::CapabilityImageReadWriteLodAMD);
operands.push_back(spv::ImageOperandsLodMask);
operands.push_back(*opIt);
#endif
}
if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown)
builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat);
return builder.createOp(spv::OpImageRead, resultType(), operands);
#ifdef AMD_EXTENSIONS
} else if (node->getOp() == glslang::EOpImageStore || node->getOp() == glslang::EOpImageStoreLod) {
#else
} else if (node->getOp() == glslang::EOpImageStore) {
#endif
if (sampler.ms) {
operands.push_back(*(opIt + 1));
operands.push_back(spv::ImageOperandsSampleMask);
operands.push_back(*opIt);
#ifdef AMD_EXTENSIONS
} else if (cracked.lod) {
builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod);
builder.addCapability(spv::CapabilityImageReadWriteLodAMD);
operands.push_back(*(opIt + 1));
operands.push_back(spv::ImageOperandsLodMask);
operands.push_back(*opIt);
#endif
} else
operands.push_back(*opIt);
builder.createNoResultOp(spv::OpImageWrite, operands);
if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown)
builder.addCapability(spv::CapabilityStorageImageWriteWithoutFormat);
return spv::NoResult;
#ifdef AMD_EXTENSIONS
} else if (node->getOp() == glslang::EOpSparseImageLoad || node->getOp() == glslang::EOpSparseImageLoadLod) {
#else
} else if (node->getOp() == glslang::EOpSparseImageLoad) {
#endif
builder.addCapability(spv::CapabilitySparseResidency);
if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown)
builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat);
@ -3266,6 +3303,14 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
if (sampler.ms) {
operands.push_back(spv::ImageOperandsSampleMask);
operands.push_back(*opIt++);
#ifdef AMD_EXTENSIONS
} else if (cracked.lod) {
builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod);
builder.addCapability(spv::CapabilityImageReadWriteLodAMD);
operands.push_back(spv::ImageOperandsLodMask);
operands.push_back(*opIt++);
#endif
}
// Create the return type that was a special structure

View File

@ -847,6 +847,7 @@ const char* CapabilityString(int info)
#ifdef AMD_EXTENSIONS
case 5009: return "ImageGatherBiasLodAMD";
case 5015: return "ImageReadWriteLodAMD";
#endif
case 4445: return "AtomicStorageOps";

View File

@ -0,0 +1,135 @@
spv.imageLoadStoreLod.frag
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 82
Capability Shader
Capability ImageCubeArray
Capability SparseResidency
Capability Image1D
Capability ImageReadWriteLodAMD
Extension "SPV_AMD_shader_image_load_store_lod"
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint Fragment 4 "main" 77
ExecutionMode 4 OriginUpperLeft
Source GLSL 450
SourceExtension "GL_AMD_shader_image_load_store_lod"
Name 4 "main"
Name 9 "f4"
Name 14 "i1D"
Name 24 "i2D"
Name 34 "i3D"
Name 46 "iiCube"
Name 53 "ii1DArray"
Name 60 "ui2DArray"
Name 64 "u4"
Name 65 "ResType"
Name 71 "uiCubeArray"
Name 77 "fragColor"
Decorate 14(i1D) DescriptorSet 0
Decorate 14(i1D) Binding 0
Decorate 24(i2D) DescriptorSet 0
Decorate 24(i2D) Binding 1
Decorate 34(i3D) DescriptorSet 0
Decorate 34(i3D) Binding 2
Decorate 46(iiCube) DescriptorSet 0
Decorate 46(iiCube) Binding 3
Decorate 53(ii1DArray) DescriptorSet 0
Decorate 53(ii1DArray) Binding 4
Decorate 60(ui2DArray) DescriptorSet 0
Decorate 60(ui2DArray) Binding 5
Decorate 71(uiCubeArray) DescriptorSet 0
Decorate 71(uiCubeArray) Binding 6
Decorate 77(fragColor) Location 0
2: TypeVoid
3: TypeFunction 2
6: TypeFloat 32
7: TypeVector 6(float) 4
8: TypePointer Function 7(fvec4)
10: 6(float) Constant 0
11: 7(fvec4) ConstantComposite 10 10 10 10
12: TypeImage 6(float) 1D nonsampled format:Rgba32f
13: TypePointer UniformConstant 12
14(i1D): 13(ptr) Variable UniformConstant
16: TypeInt 32 1
17: 16(int) Constant 1
18: 16(int) Constant 3
22: TypeImage 6(float) 2D nonsampled format:Rgba32f
23: TypePointer UniformConstant 22
24(i2D): 23(ptr) Variable UniformConstant
26: TypeVector 16(int) 2
27: 16(int) Constant 2
28: 26(ivec2) ConstantComposite 27 18
32: TypeImage 6(float) 3D nonsampled format:Rgba32f
33: TypePointer UniformConstant 32
34(i3D): 33(ptr) Variable UniformConstant
36: TypeVector 16(int) 3
37: 16(int) Constant 4
38: 16(int) Constant 5
39: 16(int) Constant 6
40: 36(ivec3) ConstantComposite 37 38 39
44: TypeImage 16(int) Cube nonsampled format:Rgba32i
45: TypePointer UniformConstant 44
46(iiCube): 45(ptr) Variable UniformConstant
49: TypeVector 16(int) 4
51: TypeImage 16(int) 1D array nonsampled format:Rgba32i
52: TypePointer UniformConstant 51
53(ii1DArray): 52(ptr) Variable UniformConstant
57: TypeInt 32 0
58: TypeImage 57(int) 2D array nonsampled format:Rgba32ui
59: TypePointer UniformConstant 58
60(ui2DArray): 59(ptr) Variable UniformConstant
62: TypeVector 57(int) 4
63: TypePointer Function 62(ivec4)
65(ResType): TypeStruct 16(int) 62(ivec4)
69: TypeImage 57(int) Cube array nonsampled format:Rgba32ui
70: TypePointer UniformConstant 69
71(uiCubeArray): 70(ptr) Variable UniformConstant
76: TypePointer Output 7(fvec4)
77(fragColor): 76(ptr) Variable Output
4(main): 2 Function None 3
5: Label
9(f4): 8(ptr) Variable Function
64(u4): 63(ptr) Variable Function
Store 9(f4) 11
15: 12 Load 14(i1D)
19: 7(fvec4) ImageRead 15 17 Lod 18
20: 7(fvec4) Load 9(f4)
21: 7(fvec4) FAdd 20 19
Store 9(f4) 21
25: 22 Load 24(i2D)
29: 7(fvec4) ImageRead 25 28 Lod 18
30: 7(fvec4) Load 9(f4)
31: 7(fvec4) FAdd 30 29
Store 9(f4) 31
35: 32 Load 34(i3D)
41: 7(fvec4) ImageRead 35 40 Lod 18
42: 7(fvec4) Load 9(f4)
43: 7(fvec4) FAdd 42 41
Store 9(f4) 43
47: 44 Load 46(iiCube)
48: 7(fvec4) Load 9(f4)
50: 49(ivec4) ConvertFToS 48
ImageWrite 47 40 50 Lod 18
54: 51 Load 53(ii1DArray)
55: 7(fvec4) Load 9(f4)
56: 49(ivec4) ConvertFToS 55
ImageWrite 54 28 56 Lod 18
61: 58 Load 60(ui2DArray)
66: 65(ResType) ImageSparseRead 61 40 Lod 18
67: 62(ivec4) CompositeExtract 66 1
Store 64(u4) 67
68: 16(int) CompositeExtract 66 0
72: 69 Load 71(uiCubeArray)
73: 65(ResType) ImageSparseRead 72 40 Lod 18
74: 62(ivec4) CompositeExtract 73 1
Store 64(u4) 74
75: 16(int) CompositeExtract 73 0
78: 7(fvec4) Load 9(f4)
79: 62(ivec4) Load 64(u4)
80: 7(fvec4) ConvertUToF 79
81: 7(fvec4) FAdd 78 80
Store 77(fragColor) 81
Return
FunctionEnd

View File

@ -0,0 +1,36 @@
#version 450 core
#extension GL_AMD_shader_image_load_store_lod: enable
layout(rgba32f, binding = 0) uniform image1D i1D;
layout(rgba32f, binding = 1) uniform image2D i2D;
layout(rgba32f, binding = 2) uniform image3D i3D;
layout(rgba32i, binding = 3) uniform iimageCube iiCube;
layout(rgba32i, binding = 4) uniform iimage1DArray ii1DArray;
layout(rgba32ui, binding = 5) uniform uimage2DArray ui2DArray;
layout(rgba32ui, binding = 6) uniform uimageCubeArray uiCubeArray;
layout(location = 0) out vec4 fragColor;
void main()
{
const int c1 = 1;
const ivec2 c2 = ivec2(2, 3);
const ivec3 c3 = ivec3(4, 5, 6);
const int lod = 3;
vec4 f4 = vec4(0.0);
f4 += imageLoadLodAMD(i1D, c1, lod);
f4 += imageLoadLodAMD(i2D, c2, lod);
f4 += imageLoadLodAMD(i3D, c3, lod);
imageStoreLodAMD(iiCube, c3, lod, ivec4(f4));
imageStoreLodAMD(ii1DArray, c2, lod, ivec4(f4));
uvec4 u4;
sparseImageLoadLodAMD(ui2DArray, c3, lod, u4);
sparseImageLoadLodAMD(uiCubeArray, c3, lod, u4);
fragColor = f4 + vec4(u4);
}

View File

@ -593,6 +593,10 @@ enum TOperator {
EOpImageQuerySamples,
EOpImageLoad,
EOpImageStore,
#ifdef AMD_EXTENSIONS
EOpImageLoadLod,
EOpImageStoreLod,
#endif
EOpImageAtomicAdd,
EOpImageAtomicMin,
EOpImageAtomicMax,
@ -605,6 +609,9 @@ enum TOperator {
EOpSubpassLoad,
EOpSubpassLoadMS,
EOpSparseImageLoad,
#ifdef AMD_EXTENSIONS
EOpSparseImageLoadLod,
#endif
EOpImageGuardEnd,
@ -1198,6 +1205,11 @@ public:
cracked.offsets = true;
cracked.lod = true;
break;
case EOpImageLoadLod:
case EOpImageStoreLod:
case EOpSparseImageLoadLod:
cracked.lod = true;
break;
#endif
case EOpSubpassLoad:
case EOpSubpassLoadMS:

View File

@ -4238,6 +4238,43 @@ void TBuiltIns::addImageFunctions(TSampler sampler, const TString& typeName, int
}
}
}
#ifdef AMD_EXTENSIONS
if (sampler.dim == EsdRect || sampler.dim == EsdBuffer || sampler.shadow || sampler.ms)
return;
if (profile == EEsProfile || version < 450)
return;
TString imageLodParams = typeName;
if (dims == 1)
imageLodParams.append(", int");
else {
imageLodParams.append(", ivec");
imageLodParams.append(postfixes[dims]);
}
imageLodParams.append(", int");
commonBuiltins.append(prefixes[sampler.type]);
commonBuiltins.append("vec4 imageLoadLodAMD(readonly volatile coherent ");
commonBuiltins.append(imageLodParams);
commonBuiltins.append(");\n");
commonBuiltins.append("void imageStoreLodAMD(writeonly volatile coherent ");
commonBuiltins.append(imageLodParams);
commonBuiltins.append(", ");
commonBuiltins.append(prefixes[sampler.type]);
commonBuiltins.append("vec4);\n");
if (sampler.dim != Esd1D) {
commonBuiltins.append("int sparseImageLoadLodAMD(readonly volatile coherent ");
commonBuiltins.append(imageLodParams);
commonBuiltins.append(", out ");
commonBuiltins.append(prefixes[sampler.type]);
commonBuiltins.append("vec4");
commonBuiltins.append(");\n");
}
#endif
}
//
@ -5710,6 +5747,13 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
symbolTable.setFunctionExtensions("sparseTextureGatherLodOffsetAMD", 1, &E_GL_AMD_texture_gather_bias_lod);
symbolTable.setFunctionExtensions("sparseTextureGatherLodOffsetsAMD", 1, &E_GL_AMD_texture_gather_bias_lod);
}
// E_GL_AMD_shader_image_load_store_lod
if (profile != EEsProfile) {
symbolTable.setFunctionExtensions("imageLoadLodAMD", 1, &E_GL_AMD_shader_image_load_store_lod);
symbolTable.setFunctionExtensions("imageStoreLodAMD", 1, &E_GL_AMD_shader_image_load_store_lod);
symbolTable.setFunctionExtensions("sparseImageLoadLodAMD", 1, &E_GL_AMD_shader_image_load_store_lod);
}
#endif
symbolTable.setVariableExtensions("gl_FragDepthEXT", 1, &E_GL_EXT_frag_depth);
@ -6146,6 +6190,10 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
symbolTable.relateToOperator("sparseTextureGatherLodAMD", EOpSparseTextureGatherLod);
symbolTable.relateToOperator("sparseTextureGatherLodOffsetAMD", EOpSparseTextureGatherLodOffset);
symbolTable.relateToOperator("sparseTextureGatherLodOffsetsAMD", EOpSparseTextureGatherLodOffsets);
symbolTable.relateToOperator("imageLoadLodAMD", EOpImageLoadLod);
symbolTable.relateToOperator("imageStoreLodAMD", EOpImageStoreLod);
symbolTable.relateToOperator("sparseImageLoadLodAMD", EOpSparseImageLoadLod);
#endif
}
if (profile == EEsProfile) {

View File

@ -1144,7 +1144,13 @@ void TParseContext::computeBuiltinPrecisions(TIntermTyped& node, const TFunction
operationPrecision = std::max(operationPrecision, function[arg].type->getQualifier().precision);
}
// compute the result precision
#ifdef AMD_EXTENSIONS
if (agg->isSampling() ||
agg->getOp() == EOpImageLoad || agg->getOp() == EOpImageStore ||
agg->getOp() == EOpImageLoadLod || agg->getOp() == EOpImageStoreLod)
#else
if (agg->isSampling() || agg->getOp() == EOpImageLoad || agg->getOp() == EOpImageStore)
#endif
resultPrecision = sequence[0]->getAsTyped()->getQualifier().precision;
else if (function.getType().getBasicType() != EbtBool)
resultPrecision = function.getType().getQualifier().precision == EpqNone ?

View File

@ -200,6 +200,7 @@ void TParseVersions::initializeExtensionBehavior()
extensionBehavior[E_GL_AMD_gpu_shader_half_float] = EBhDisable;
extensionBehavior[E_GL_AMD_texture_gather_bias_lod] = EBhDisable;
extensionBehavior[E_GL_AMD_gpu_shader_int16] = EBhDisable;
extensionBehavior[E_GL_AMD_shader_image_load_store_lod] = EBhDisable;
#endif
#ifdef NV_EXTENSIONS
@ -331,6 +332,7 @@ void TParseVersions::getPreamble(std::string& preamble)
"#define GL_AMD_gpu_shader_half_float 1\n"
"#define GL_AMD_texture_gather_bias_lod 1\n"
"#define GL_AMD_gpu_shader_int16 1\n"
"#define GL_AMD_shader_image_load_store_lod 1\n"
#endif
#ifdef NV_EXTENSIONS

View File

@ -170,6 +170,7 @@ const char* const E_GL_AMD_gcn_shader = "GL_AMD_gcn_sh
const char* const E_GL_AMD_gpu_shader_half_float = "GL_AMD_gpu_shader_half_float";
const char* const E_GL_AMD_texture_gather_bias_lod = "GL_AMD_texture_gather_bias_lod";
const char* const E_GL_AMD_gpu_shader_int16 = "GL_AMD_gpu_shader_int16";
const char* const E_GL_AMD_shader_image_load_store_lod = "GL_AMD_shader_image_load_store_lod";
#endif
#ifdef NV_EXTENSIONS

View File

@ -704,6 +704,10 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
case EOpImageAtomicXor: out.debug << "imageAtomicXor"; break;
case EOpImageAtomicExchange: out.debug << "imageAtomicExchange"; break;
case EOpImageAtomicCompSwap: out.debug << "imageAtomicCompSwap"; break;
#ifdef AMD_EXTENSIONS
case EOpImageLoadLod: out.debug << "imageLoadLod"; break;
case EOpImageStoreLod: out.debug << "imageStoreLod"; break;
#endif
case EOpTextureQuerySize: out.debug << "textureSize"; break;
case EOpTextureQueryLod: out.debug << "textureQueryLod"; break;
@ -756,6 +760,7 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
case EOpSparseTextureGatherLod: out.debug << "sparseTextureGatherLod"; break;
case EOpSparseTextureGatherLodOffset: out.debug << "sparseTextureGatherLodOffset"; break;
case EOpSparseTextureGatherLodOffsets: out.debug << "sparseTextureGatherLodOffsets"; break;
case EOpSparseImageLoadLod: out.debug << "sparseImageLoadLod"; break;
#endif
case EOpAddCarry: out.debug << "addCarry"; break;

View File

@ -406,6 +406,7 @@ INSTANTIATE_TEST_CASE_P(
Glsl, CompileVulkanToSpirvTestAMD,
::testing::ValuesIn(std::vector<std::string>({
"spv.float16.frag",
"spv.imageLoadStoreLod.frag",
"spv.int16.frag",
"spv.shaderBallotAMD.comp",
"spv.textureGatherBiasLod.frag"