diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp index ad16396f..745dd7f2 100755 --- a/SPIRV/GlslangToSpv.cpp +++ b/SPIRV/GlslangToSpv.cpp @@ -2608,6 +2608,17 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt atomic = true; break; + case glslang::EOpAbsDifference: + case glslang::EOpAddSaturate: + case glslang::EOpSubSaturate: + case glslang::EOpAverage: + case glslang::EOpAverageRounded: + case glslang::EOpMul32x16: + builder.addCapability(spv::CapabilityIntegerFunctions2INTEL); + builder.addExtension("SPV_INTEL_shader_integer_functions2"); + binOp = node->getOp(); + break; + case glslang::EOpIgnoreIntersectionNV: case glslang::EOpTerminateRayNV: case glslang::EOpTraceNV: @@ -5231,6 +5242,30 @@ spv::Id TGlslangToSpvTraverser::createBinaryOperation(glslang::TOperator op, OpD binOp = spv::OpLogicalNotEqual; break; + case glslang::EOpAbsDifference: + binOp = isUnsigned ? spv::OpAbsUSubINTEL : spv::OpAbsISubINTEL; + break; + + case glslang::EOpAddSaturate: + binOp = isUnsigned ? spv::OpUAddSatINTEL : spv::OpIAddSatINTEL; + break; + + case glslang::EOpSubSaturate: + binOp = isUnsigned ? spv::OpUSubSatINTEL : spv::OpISubSatINTEL; + break; + + case glslang::EOpAverage: + binOp = isUnsigned ? spv::OpUAverageINTEL : spv::OpIAverageINTEL; + break; + + case glslang::EOpAverageRounded: + binOp = isUnsigned ? spv::OpUAverageRoundedINTEL : spv::OpIAverageRoundedINTEL; + break; + + case glslang::EOpMul32x16: + binOp = isUnsigned ? spv::OpUMul32x16INTEL : spv::OpIMul32x16INTEL; + break; + case glslang::EOpLessThan: case glslang::EOpGreaterThan: case glslang::EOpLessThanEqual: @@ -5745,6 +5780,18 @@ spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, OpDe libCall = spv::GLSLstd450FindSMsb; break; + case glslang::EOpCountLeadingZeros: + builder.addCapability(spv::CapabilityIntegerFunctions2INTEL); + builder.addExtension("SPV_INTEL_shader_integer_functions2"); + unaryOp = spv::OpUCountLeadingZerosINTEL; + break; + + case glslang::EOpCountTrailingZeros: + builder.addCapability(spv::CapabilityIntegerFunctions2INTEL); + builder.addExtension("SPV_INTEL_shader_integer_functions2"); + unaryOp = spv::OpUCountTrailingZerosINTEL; + break; + case glslang::EOpBallot: case glslang::EOpReadFirstInvocation: case glslang::EOpAnyInvocation: diff --git a/SPIRV/doc.cpp b/SPIRV/doc.cpp index b0bcdfbe..bee5c797 100644 --- a/SPIRV/doc.cpp +++ b/SPIRV/doc.cpp @@ -931,6 +931,8 @@ const char* CapabilityString(int info) case CapabilityDemoteToHelperInvocationEXT: return "DemoteToHelperInvocationEXT"; case CapabilityShaderClockKHR: return "ShaderClockKHR"; + case CapabilityIntegerFunctions2INTEL: return "CapabilityIntegerFunctions2INTEL"; + default: return "Bad"; } } diff --git a/Test/baseResults/spv.specConstant.vert.out b/Test/baseResults/spv.specConstant.vert.out index 1be39501..3b04e7cc 100644 --- a/Test/baseResults/spv.specConstant.vert.out +++ b/Test/baseResults/spv.specConstant.vert.out @@ -11,7 +11,7 @@ spv.specConstant.vert Source GLSL 400 Name 4 "main" Name 9 "arraySize" - Name 14 "foo(vf4[s4393];" + Name 14 "foo(vf4[s4529];" Name 13 "p" Name 17 "builtin_spec_constant(" Name 20 "color" @@ -102,10 +102,10 @@ spv.specConstant.vert Store 20(color) 46 48: 10 Load 22(ucol) Store 47(param) 48 - 49: 2 FunctionCall 14(foo(vf4[s4393];) 47(param) + 49: 2 FunctionCall 14(foo(vf4[s4529];) 47(param) Return FunctionEnd -14(foo(vf4[s4393];): 2 Function None 12 +14(foo(vf4[s4529];): 2 Function None 12 13(p): 11(ptr) FunctionParameter 15: Label 54: 24(ptr) AccessChain 53(dupUcol) 23 diff --git a/glslang/Include/intermediate.h b/glslang/Include/intermediate.h index f82115be..29d58ca6 100644 --- a/glslang/Include/intermediate.h +++ b/glslang/Include/intermediate.h @@ -899,6 +899,15 @@ enum TOperator { EOpFindLSB, EOpFindMSB, + EOpCountLeadingZeros, + EOpCountTrailingZeros, + EOpAbsDifference, + EOpAddSaturate, + EOpSubSaturate, + EOpAverage, + EOpAverageRounded, + EOpMul32x16, + EOpTraceNV, EOpReportIntersectionNV, EOpIgnoreIntersectionNV, diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp index ef50af20..80adc937 100644 --- a/glslang/MachineIndependent/Initialize.cpp +++ b/glslang/MachineIndependent/Initialize.cpp @@ -2854,6 +2854,181 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "\n"); } + if ((profile != EEsProfile && version >= 130) || + (profile == EEsProfile && version >= 300)) { + commonBuiltins.append( + "uint countLeadingZeros(uint);" + "uvec2 countLeadingZeros(uvec2);" + "uvec3 countLeadingZeros(uvec3);" + "uvec4 countLeadingZeros(uvec4);" + + "uint countTrailingZeros(uint);" + "uvec2 countTrailingZeros(uvec2);" + "uvec3 countTrailingZeros(uvec3);" + "uvec4 countTrailingZeros(uvec4);" + + "uint absoluteDifference(int, int);" + "uvec2 absoluteDifference(ivec2, ivec2);" + "uvec3 absoluteDifference(ivec3, ivec3);" + "uvec4 absoluteDifference(ivec4, ivec4);" + + "uint16_t absoluteDifference(int16_t, int16_t);" + "u16vec2 absoluteDifference(i16vec2, i16vec2);" + "u16vec3 absoluteDifference(i16vec3, i16vec3);" + "u16vec4 absoluteDifference(i16vec4, i16vec4);" + + "uint64_t absoluteDifference(int64_t, int64_t);" + "u64vec2 absoluteDifference(i64vec2, i64vec2);" + "u64vec3 absoluteDifference(i64vec3, i64vec3);" + "u64vec4 absoluteDifference(i64vec4, i64vec4);" + + "uint absoluteDifference(uint, uint);" + "uvec2 absoluteDifference(uvec2, uvec2);" + "uvec3 absoluteDifference(uvec3, uvec3);" + "uvec4 absoluteDifference(uvec4, uvec4);" + + "uint16_t absoluteDifference(uint16_t, uint16_t);" + "u16vec2 absoluteDifference(u16vec2, u16vec2);" + "u16vec3 absoluteDifference(u16vec3, u16vec3);" + "u16vec4 absoluteDifference(u16vec4, u16vec4);" + + "uint64_t absoluteDifference(uint64_t, uint64_t);" + "u64vec2 absoluteDifference(u64vec2, u64vec2);" + "u64vec3 absoluteDifference(u64vec3, u64vec3);" + "u64vec4 absoluteDifference(u64vec4, u64vec4);" + + "int addSaturate(int, int);" + "ivec2 addSaturate(ivec2, ivec2);" + "ivec3 addSaturate(ivec3, ivec3);" + "ivec4 addSaturate(ivec4, ivec4);" + + "int16_t addSaturate(int16_t, int16_t);" + "i16vec2 addSaturate(i16vec2, i16vec2);" + "i16vec3 addSaturate(i16vec3, i16vec3);" + "i16vec4 addSaturate(i16vec4, i16vec4);" + + "int64_t addSaturate(int64_t, int64_t);" + "i64vec2 addSaturate(i64vec2, i64vec2);" + "i64vec3 addSaturate(i64vec3, i64vec3);" + "i64vec4 addSaturate(i64vec4, i64vec4);" + + "uint addSaturate(uint, uint);" + "uvec2 addSaturate(uvec2, uvec2);" + "uvec3 addSaturate(uvec3, uvec3);" + "uvec4 addSaturate(uvec4, uvec4);" + + "uint16_t addSaturate(uint16_t, uint16_t);" + "u16vec2 addSaturate(u16vec2, u16vec2);" + "u16vec3 addSaturate(u16vec3, u16vec3);" + "u16vec4 addSaturate(u16vec4, u16vec4);" + + "uint64_t addSaturate(uint64_t, uint64_t);" + "u64vec2 addSaturate(u64vec2, u64vec2);" + "u64vec3 addSaturate(u64vec3, u64vec3);" + "u64vec4 addSaturate(u64vec4, u64vec4);" + + "int subtractSaturate(int, int);" + "ivec2 subtractSaturate(ivec2, ivec2);" + "ivec3 subtractSaturate(ivec3, ivec3);" + "ivec4 subtractSaturate(ivec4, ivec4);" + + "int16_t subtractSaturate(int16_t, int16_t);" + "i16vec2 subtractSaturate(i16vec2, i16vec2);" + "i16vec3 subtractSaturate(i16vec3, i16vec3);" + "i16vec4 subtractSaturate(i16vec4, i16vec4);" + + "int64_t subtractSaturate(int64_t, int64_t);" + "i64vec2 subtractSaturate(i64vec2, i64vec2);" + "i64vec3 subtractSaturate(i64vec3, i64vec3);" + "i64vec4 subtractSaturate(i64vec4, i64vec4);" + + "uint subtractSaturate(uint, uint);" + "uvec2 subtractSaturate(uvec2, uvec2);" + "uvec3 subtractSaturate(uvec3, uvec3);" + "uvec4 subtractSaturate(uvec4, uvec4);" + + "uint16_t subtractSaturate(uint16_t, uint16_t);" + "u16vec2 subtractSaturate(u16vec2, u16vec2);" + "u16vec3 subtractSaturate(u16vec3, u16vec3);" + "u16vec4 subtractSaturate(u16vec4, u16vec4);" + + "uint64_t subtractSaturate(uint64_t, uint64_t);" + "u64vec2 subtractSaturate(u64vec2, u64vec2);" + "u64vec3 subtractSaturate(u64vec3, u64vec3);" + "u64vec4 subtractSaturate(u64vec4, u64vec4);" + + "int average(int, int);" + "ivec2 average(ivec2, ivec2);" + "ivec3 average(ivec3, ivec3);" + "ivec4 average(ivec4, ivec4);" + + "int16_t average(int16_t, int16_t);" + "i16vec2 average(i16vec2, i16vec2);" + "i16vec3 average(i16vec3, i16vec3);" + "i16vec4 average(i16vec4, i16vec4);" + + "int64_t average(int64_t, int64_t);" + "i64vec2 average(i64vec2, i64vec2);" + "i64vec3 average(i64vec3, i64vec3);" + "i64vec4 average(i64vec4, i64vec4);" + + "uint average(uint, uint);" + "uvec2 average(uvec2, uvec2);" + "uvec3 average(uvec3, uvec3);" + "uvec4 average(uvec4, uvec4);" + + "uint16_t average(uint16_t, uint16_t);" + "u16vec2 average(u16vec2, u16vec2);" + "u16vec3 average(u16vec3, u16vec3);" + "u16vec4 average(u16vec4, u16vec4);" + + "uint64_t average(uint64_t, uint64_t);" + "u64vec2 average(u64vec2, u64vec2);" + "u64vec3 average(u64vec3, u64vec3);" + "u64vec4 average(u64vec4, u64vec4);" + + "int averageRounded(int, int);" + "ivec2 averageRounded(ivec2, ivec2);" + "ivec3 averageRounded(ivec3, ivec3);" + "ivec4 averageRounded(ivec4, ivec4);" + + "int16_t averageRounded(int16_t, int16_t);" + "i16vec2 averageRounded(i16vec2, i16vec2);" + "i16vec3 averageRounded(i16vec3, i16vec3);" + "i16vec4 averageRounded(i16vec4, i16vec4);" + + "int64_t averageRounded(int64_t, int64_t);" + "i64vec2 averageRounded(i64vec2, i64vec2);" + "i64vec3 averageRounded(i64vec3, i64vec3);" + "i64vec4 averageRounded(i64vec4, i64vec4);" + + "uint averageRounded(uint, uint);" + "uvec2 averageRounded(uvec2, uvec2);" + "uvec3 averageRounded(uvec3, uvec3);" + "uvec4 averageRounded(uvec4, uvec4);" + + "uint16_t averageRounded(uint16_t, uint16_t);" + "u16vec2 averageRounded(u16vec2, u16vec2);" + "u16vec3 averageRounded(u16vec3, u16vec3);" + "u16vec4 averageRounded(u16vec4, u16vec4);" + + "uint64_t averageRounded(uint64_t, uint64_t);" + "u64vec2 averageRounded(u64vec2, u64vec2);" + "u64vec3 averageRounded(u64vec3, u64vec3);" + "u64vec4 averageRounded(u64vec4, u64vec4);" + + "int multiply32x16(int, int);" + "ivec2 multiply32x16(ivec2, ivec2);" + "ivec3 multiply32x16(ivec3, ivec3);" + "ivec4 multiply32x16(ivec4, ivec4);" + + "uint multiply32x16(uint, uint);" + "uvec2 multiply32x16(uvec2, uvec2);" + "uvec3 multiply32x16(uvec3, uvec3);" + "uvec4 multiply32x16(uvec4, uvec4);" + "\n"); + } + if ((profile != EEsProfile && version >= 450) || (profile == EEsProfile && version >= 320)) { commonBuiltins.append( @@ -6974,6 +7149,15 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.setFunctionExtensions("fragmentFetchAMD", 1, &E_GL_AMD_shader_fragment_mask); } + symbolTable.setFunctionExtensions("countLeadingZeros", 1, &E_GL_INTEL_shader_integer_functions2); + symbolTable.setFunctionExtensions("countTrailingZeros", 1, &E_GL_INTEL_shader_integer_functions2); + symbolTable.setFunctionExtensions("absoluteDifference", 1, &E_GL_INTEL_shader_integer_functions2); + symbolTable.setFunctionExtensions("addSaturate", 1, &E_GL_INTEL_shader_integer_functions2); + symbolTable.setFunctionExtensions("subtractSaturate", 1, &E_GL_INTEL_shader_integer_functions2); + symbolTable.setFunctionExtensions("average", 1, &E_GL_INTEL_shader_integer_functions2); + symbolTable.setFunctionExtensions("averageRounded", 1, &E_GL_INTEL_shader_integer_functions2); + symbolTable.setFunctionExtensions("multiply32x16", 1, &E_GL_INTEL_shader_integer_functions2); + symbolTable.setFunctionExtensions("textureFootprintNV", 1, &E_GL_NV_shader_texture_footprint); symbolTable.setFunctionExtensions("textureFootprintClampNV", 1, &E_GL_NV_shader_texture_footprint); symbolTable.setFunctionExtensions("textureFootprintLodNV", 1, &E_GL_NV_shader_texture_footprint); @@ -8217,6 +8401,15 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.relateToOperator("helperInvocationEXT", EOpIsHelperInvocation); + symbolTable.relateToOperator("countLeadingZeros", EOpCountLeadingZeros); + symbolTable.relateToOperator("countTrailingZeros", EOpCountTrailingZeros); + symbolTable.relateToOperator("absoluteDifference", EOpAbsDifference); + symbolTable.relateToOperator("addSaturate", EOpAddSaturate); + symbolTable.relateToOperator("subtractSaturate", EOpSubSaturate); + symbolTable.relateToOperator("average", EOpAverage); + symbolTable.relateToOperator("averageRounded", EOpAverageRounded); + symbolTable.relateToOperator("multiply32x16", EOpMul32x16); + if (PureOperatorBuiltins) { symbolTable.relateToOperator("imageSize", EOpImageQuerySize); symbolTable.relateToOperator("imageSamples", EOpImageQuerySamples); diff --git a/glslang/MachineIndependent/Versions.cpp b/glslang/MachineIndependent/Versions.cpp index 518ba6cd..23cc5302 100644 --- a/glslang/MachineIndependent/Versions.cpp +++ b/glslang/MachineIndependent/Versions.cpp @@ -236,6 +236,8 @@ void TParseVersions::initializeExtensionBehavior() extensionBehavior[E_GL_AMD_shader_fragment_mask] = EBhDisable; extensionBehavior[E_GL_AMD_gpu_shader_half_float_fetch] = EBhDisable; + extensionBehavior[E_GL_INTEL_shader_integer_functions2] = EBhDisable; + extensionBehavior[E_GL_NV_sample_mask_override_coverage] = EBhDisable; extensionBehavior[E_SPV_NV_geometry_shader_passthrough] = EBhDisable; extensionBehavior[E_GL_NV_viewport_array2] = EBhDisable; @@ -434,6 +436,8 @@ void TParseVersions::getPreamble(std::string& preamble) "#define GL_AMD_shader_fragment_mask 1\n" "#define GL_AMD_gpu_shader_half_float_fetch 1\n" + "#define GL_INTEL_shader_integer_functions2 1\n" + "#define GL_NV_sample_mask_override_coverage 1\n" "#define GL_NV_geometry_shader_passthrough 1\n" "#define GL_NV_viewport_array2 1\n" diff --git a/glslang/MachineIndependent/Versions.h b/glslang/MachineIndependent/Versions.h index 84fafd77..5048ad7d 100644 --- a/glslang/MachineIndependent/Versions.h +++ b/glslang/MachineIndependent/Versions.h @@ -206,6 +206,8 @@ const char* const E_GL_AMD_shader_image_load_store_lod = "GL_AMD_shader const char* const E_GL_AMD_shader_fragment_mask = "GL_AMD_shader_fragment_mask"; const char* const E_GL_AMD_gpu_shader_half_float_fetch = "GL_AMD_gpu_shader_half_float_fetch"; +const char* const E_GL_INTEL_shader_integer_functions2 = "GL_INTEL_shader_integer_functions2"; + const char* const E_GL_NV_sample_mask_override_coverage = "GL_NV_sample_mask_override_coverage"; const char* const E_SPV_NV_geometry_shader_passthrough = "GL_NV_geometry_shader_passthrough"; const char* const E_GL_NV_viewport_array2 = "GL_NV_viewport_array2"; diff --git a/glslang/MachineIndependent/intermOut.cpp b/glslang/MachineIndependent/intermOut.cpp index fbfede07..3a93aeda 100644 --- a/glslang/MachineIndependent/intermOut.cpp +++ b/glslang/MachineIndependent/intermOut.cpp @@ -213,6 +213,13 @@ bool TOutputTraverser::visitBinary(TVisit /* visit */, TIntermBinary* node) case EOpLogicalXor: out.debug << "logical-xor"; break; case EOpLogicalAnd: out.debug << "logical-and"; break; + case EOpAbsDifference: out.debug << "absoluteDifference"; break; + case EOpAddSaturate: out.debug << "addSaturate"; break; + case EOpSubSaturate: out.debug << "subtractSaturate"; break; + case EOpAverage: out.debug << "average"; break; + case EOpAverageRounded: out.debug << "averageRounded"; break; + case EOpMul32x16: out.debug << "multiply32x16"; break; + default: out.debug << ""; } @@ -557,6 +564,9 @@ bool TOutputTraverser::visitUnary(TVisit /* visit */, TIntermUnary* node) case EOpFindLSB: out.debug << "findLSB"; break; case EOpFindMSB: out.debug << "findMSB"; break; + case EOpCountLeadingZeros: out.debug << "countLeadingZeros"; break; + case EOpCountTrailingZeros: out.debug << "countTrailingZeros"; break; + case EOpNoise: out.debug << "noise"; break; case EOpBallot: out.debug << "ballot"; break;