Merge pull request #1806 from KhronosGroup/nan-clamp
SPV: Add a switch for favoring non-NaN operands in min, max, and clamp.
This commit is contained in:
		
						commit
						a9b00ac5d5
					
				| @ -224,6 +224,7 @@ protected: | |||||||
|     bool linkageOnly;                  // true when visiting the set of objects in the AST present only for establishing interface, whether or not they were statically used
 |     bool linkageOnly;                  // true when visiting the set of objects in the AST present only for establishing interface, whether or not they were statically used
 | ||||||
|     std::set<spv::Id> iOSet;           // all input/output variables from either static use or declaration of interface
 |     std::set<spv::Id> iOSet;           // all input/output variables from either static use or declaration of interface
 | ||||||
|     const glslang::TIntermediate* glslangIntermediate; |     const glslang::TIntermediate* glslangIntermediate; | ||||||
|  |     bool nanMinMaxClamp;               // true if use NMin/NMax/NClamp instead of FMin/FMax/FClamp
 | ||||||
|     spv::Id stdBuiltins; |     spv::Id stdBuiltins; | ||||||
|     std::unordered_map<const char*, spv::Id> extBuiltinMap; |     std::unordered_map<const char*, spv::Id> extBuiltinMap; | ||||||
| 
 | 
 | ||||||
| @ -1313,7 +1314,8 @@ TGlslangToSpvTraverser::TGlslangToSpvTraverser(unsigned int spvVersion, const gl | |||||||
|       sequenceDepth(0), logger(buildLogger), |       sequenceDepth(0), logger(buildLogger), | ||||||
|       builder(spvVersion, (glslang::GetKhronosToolId() << 16) | glslang::GetSpirvGeneratorVersion(), logger), |       builder(spvVersion, (glslang::GetKhronosToolId() << 16) | glslang::GetSpirvGeneratorVersion(), logger), | ||||||
|       inEntryPoint(false), entryPointTerminated(false), linkageOnly(false), |       inEntryPoint(false), entryPointTerminated(false), linkageOnly(false), | ||||||
|       glslangIntermediate(glslangIntermediate) |       glslangIntermediate(glslangIntermediate), | ||||||
|  |       nanMinMaxClamp(glslangIntermediate->getNanMinMaxClamp()) | ||||||
| { | { | ||||||
|     spv::ExecutionModel executionModel = TranslateExecutionModel(glslangIntermediate->getStage()); |     spv::ExecutionModel executionModel = TranslateExecutionModel(glslangIntermediate->getStage()); | ||||||
| 
 | 
 | ||||||
| @ -7014,7 +7016,7 @@ spv::Id TGlslangToSpvTraverser::createMiscOperation(glslang::TOperator op, spv:: | |||||||
|     switch (op) { |     switch (op) { | ||||||
|     case glslang::EOpMin: |     case glslang::EOpMin: | ||||||
|         if (isFloat) |         if (isFloat) | ||||||
|             libCall = spv::GLSLstd450FMin; |             libCall = nanMinMaxClamp ? spv::GLSLstd450NMin : spv::GLSLstd450FMin; | ||||||
|         else if (isUnsigned) |         else if (isUnsigned) | ||||||
|             libCall = spv::GLSLstd450UMin; |             libCall = spv::GLSLstd450UMin; | ||||||
|         else |         else | ||||||
| @ -7026,7 +7028,7 @@ spv::Id TGlslangToSpvTraverser::createMiscOperation(glslang::TOperator op, spv:: | |||||||
|         break; |         break; | ||||||
|     case glslang::EOpMax: |     case glslang::EOpMax: | ||||||
|         if (isFloat) |         if (isFloat) | ||||||
|             libCall = spv::GLSLstd450FMax; |             libCall = nanMinMaxClamp ? spv::GLSLstd450NMax : spv::GLSLstd450FMax; | ||||||
|         else if (isUnsigned) |         else if (isUnsigned) | ||||||
|             libCall = spv::GLSLstd450UMax; |             libCall = spv::GLSLstd450UMax; | ||||||
|         else |         else | ||||||
| @ -7045,7 +7047,7 @@ spv::Id TGlslangToSpvTraverser::createMiscOperation(glslang::TOperator op, spv:: | |||||||
| 
 | 
 | ||||||
|     case glslang::EOpClamp: |     case glslang::EOpClamp: | ||||||
|         if (isFloat) |         if (isFloat) | ||||||
|             libCall = spv::GLSLstd450FClamp; |             libCall = nanMinMaxClamp ? spv::GLSLstd450NClamp : spv::GLSLstd450FClamp; | ||||||
|         else if (isUnsigned) |         else if (isUnsigned) | ||||||
|             libCall = spv::GLSLstd450UClamp; |             libCall = spv::GLSLstd450UClamp; | ||||||
|         else |         else | ||||||
|  | |||||||
| @ -648,6 +648,9 @@ static void GLSLstd450GetDebugNames(const char** names) | |||||||
|     names[GLSLstd450InterpolateAtCentroid]   = "InterpolateAtCentroid"; |     names[GLSLstd450InterpolateAtCentroid]   = "InterpolateAtCentroid"; | ||||||
|     names[GLSLstd450InterpolateAtSample]     = "InterpolateAtSample"; |     names[GLSLstd450InterpolateAtSample]     = "InterpolateAtSample"; | ||||||
|     names[GLSLstd450InterpolateAtOffset]     = "InterpolateAtOffset"; |     names[GLSLstd450InterpolateAtOffset]     = "InterpolateAtOffset"; | ||||||
|  |     names[GLSLstd450NMin]                    = "NMin"; | ||||||
|  |     names[GLSLstd450NMax]                    = "NMax"; | ||||||
|  |     names[GLSLstd450NClamp]                  = "NClamp"; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef AMD_EXTENSIONS | #ifdef AMD_EXTENSIONS | ||||||
|  | |||||||
| @ -104,6 +104,7 @@ enum TOptions { | |||||||
| bool targetHlslFunctionality1 = false; | bool targetHlslFunctionality1 = false; | ||||||
| bool SpvToolsDisassembler = false; | bool SpvToolsDisassembler = false; | ||||||
| bool SpvToolsValidate = false; | bool SpvToolsValidate = false; | ||||||
|  | bool NaNClamp = false; | ||||||
| 
 | 
 | ||||||
| //
 | //
 | ||||||
| // Return codes from main/exit().
 | // Return codes from main/exit().
 | ||||||
| @ -522,6 +523,8 @@ void ProcessArguments(std::vector<std::unique_ptr<glslang::TWorkItem>>& workItem | |||||||
|                     } else if (lowerword == "keep-uncalled" || // synonyms
 |                     } else if (lowerword == "keep-uncalled" || // synonyms
 | ||||||
|                                lowerword == "ku") { |                                lowerword == "ku") { | ||||||
|                         Options |= EOptionKeepUncalled; |                         Options |= EOptionKeepUncalled; | ||||||
|  |                     } else if (lowerword == "nan-clamp") { | ||||||
|  |                         NaNClamp = true; | ||||||
|                     } else if (lowerword == "no-storage-format" || // synonyms
 |                     } else if (lowerword == "no-storage-format" || // synonyms
 | ||||||
|                                lowerword == "nsf") { |                                lowerword == "nsf") { | ||||||
|                         Options |= EOptionNoStorageFormat; |                         Options |= EOptionNoStorageFormat; | ||||||
| @ -981,6 +984,7 @@ void CompileAndLinkShaderUnits(std::vector<ShaderCompUnit> compUnits) | |||||||
| 
 | 
 | ||||||
|         shader->setFlattenUniformArrays((Options & EOptionFlattenUniformArrays) != 0); |         shader->setFlattenUniformArrays((Options & EOptionFlattenUniformArrays) != 0); | ||||||
|         shader->setNoStorageFormat((Options & EOptionNoStorageFormat) != 0); |         shader->setNoStorageFormat((Options & EOptionNoStorageFormat) != 0); | ||||||
|  |         shader->setNanMinMaxClamp(NaNClamp); | ||||||
|         shader->setResourceSetBinding(baseResourceSetBinding[compUnit.stage]); |         shader->setResourceSetBinding(baseResourceSetBinding[compUnit.stage]); | ||||||
| 
 | 
 | ||||||
|         if (Options & EOptionHlslIoMapping) |         if (Options & EOptionHlslIoMapping) | ||||||
| @ -1533,9 +1537,11 @@ void usage() | |||||||
|            "                                    works independently of source language\n" |            "                                    works independently of source language\n" | ||||||
|            "  --hlsl-iomap                      perform IO mapping in HLSL register space\n" |            "  --hlsl-iomap                      perform IO mapping in HLSL register space\n" | ||||||
|            "  --hlsl-enable-16bit-types         allow 16-bit types in SPIR-V for HLSL\n" |            "  --hlsl-enable-16bit-types         allow 16-bit types in SPIR-V for HLSL\n" | ||||||
|            "  --hlsl-dx9-compatible             interprets sampler declarations as a texture/sampler combo like DirectX9 would." |            "  --hlsl-dx9-compatible             interprets sampler declarations as a\n" | ||||||
|  |            "                                    texture/sampler combo like DirectX9 would.\n" | ||||||
|            "  --invert-y | --iy                 invert position.Y output in vertex shader\n" |            "  --invert-y | --iy                 invert position.Y output in vertex shader\n" | ||||||
|            "  --keep-uncalled | --ku            don't eliminate uncalled functions\n" |            "  --keep-uncalled | --ku            don't eliminate uncalled functions\n" | ||||||
|  |            "  --nan-clamp                       favor non-NaN operand in min, max, and clamp\n" | ||||||
|            "  --no-storage-format | --nsf       use Unknown image format\n" |            "  --no-storage-format | --nsf       use Unknown image format\n" | ||||||
|            "  --reflect-strict-array-suffix     use strict array suffix rules when\n" |            "  --reflect-strict-array-suffix     use strict array suffix rules when\n" | ||||||
|            "                                    reflecting\n" |            "                                    reflecting\n" | ||||||
|  | |||||||
							
								
								
									
										1402
									
								
								Test/baseResults/spv.400.frag.nanclamp.out
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1402
									
								
								Test/baseResults/spv.400.frag.nanclamp.out
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -239,6 +239,13 @@ $EXE -D -E hlsl.pp.expand.frag > $TARGETDIR/hlsl.pp.expand.frag.out 2> $TARGETDI | |||||||
| diff -b $BASEDIR/hlsl.pp.expand.frag.out $TARGETDIR/hlsl.pp.expand.frag.out || HASERROR=1 | diff -b $BASEDIR/hlsl.pp.expand.frag.out $TARGETDIR/hlsl.pp.expand.frag.out || HASERROR=1 | ||||||
| diff -b $BASEDIR/hlsl.pp.expand.frag.err $TARGETDIR/hlsl.pp.expand.frag.err || HASERROR=1 | diff -b $BASEDIR/hlsl.pp.expand.frag.err $TARGETDIR/hlsl.pp.expand.frag.err || HASERROR=1 | ||||||
| 
 | 
 | ||||||
|  | # | ||||||
|  | # Test --nan-clamp | ||||||
|  | # | ||||||
|  | echo "Testing nan-clamp" | ||||||
|  | $EXE --nan-clamp -H --aml --amb spv.400.frag > $TARGETDIR/spv.400.frag.nanclamp.out | ||||||
|  | diff -b $BASEDIR/spv.400.frag.nanclamp.out $TARGETDIR/spv.400.frag.nanclamp.out || HASERROR=1 | ||||||
|  | 
 | ||||||
| # | # | ||||||
| # Final checking | # Final checking | ||||||
| # | # | ||||||
|  | |||||||
| @ -1791,6 +1791,7 @@ void TShader::setUniformLocationBase(int base) | |||||||
| void TShader::setHlslIoMapping(bool hlslIoMap)          { intermediate->setHlslIoMapping(hlslIoMap); } | void TShader::setHlslIoMapping(bool hlslIoMap)          { intermediate->setHlslIoMapping(hlslIoMap); } | ||||||
| void TShader::setFlattenUniformArrays(bool flatten)     { intermediate->setFlattenUniformArrays(flatten); } | void TShader::setFlattenUniformArrays(bool flatten)     { intermediate->setFlattenUniformArrays(flatten); } | ||||||
| void TShader::setNoStorageFormat(bool useUnknownFormat) { intermediate->setNoStorageFormat(useUnknownFormat); } | void TShader::setNoStorageFormat(bool useUnknownFormat) { intermediate->setNoStorageFormat(useUnknownFormat); } | ||||||
|  | void TShader::setNanMinMaxClamp(bool useNonNan)         { intermediate->setNanMinMaxClamp(useNonNan); } | ||||||
| void TShader::setResourceSetBinding(const std::vector<std::string>& base)   { intermediate->setResourceSetBinding(base); } | void TShader::setResourceSetBinding(const std::vector<std::string>& base)   { intermediate->setResourceSetBinding(base); } | ||||||
| void TShader::setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode) { intermediate->setTextureSamplerTransformMode(mode); } | void TShader::setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode) { intermediate->setTextureSamplerTransformMode(mode); } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -266,7 +266,8 @@ public: | |||||||
|         needToLegalize(false), |         needToLegalize(false), | ||||||
|         binaryDoubleOutput(false), |         binaryDoubleOutput(false), | ||||||
|         usePhysicalStorageBuffer(false), |         usePhysicalStorageBuffer(false), | ||||||
|         uniformLocationBase(0) |         uniformLocationBase(0), | ||||||
|  |         nanMinMaxClamp(false) | ||||||
|     { |     { | ||||||
|         localSize[0] = 1; |         localSize[0] = 1; | ||||||
|         localSize[1] = 1; |         localSize[1] = 1; | ||||||
| @ -767,6 +768,9 @@ public: | |||||||
|     void setUniformLocationBase(int base) { uniformLocationBase = base; } |     void setUniformLocationBase(int base) { uniformLocationBase = base; } | ||||||
|     int getUniformLocationBase() const { return uniformLocationBase; } |     int getUniformLocationBase() const { return uniformLocationBase; } | ||||||
| 
 | 
 | ||||||
|  |     void setNanMinMaxClamp(bool setting) { nanMinMaxClamp = setting; } | ||||||
|  |     bool getNanMinMaxClamp() const { return nanMinMaxClamp; } | ||||||
|  | 
 | ||||||
|     void setNeedsLegalization() { needToLegalize = true; } |     void setNeedsLegalization() { needToLegalize = true; } | ||||||
|     bool needsLegalization() const { return needToLegalize; } |     bool needsLegalization() const { return needToLegalize; } | ||||||
| 
 | 
 | ||||||
| @ -900,6 +904,7 @@ protected: | |||||||
| 
 | 
 | ||||||
|     std::unordered_map<std::string, int> uniformLocationOverrides; |     std::unordered_map<std::string, int> uniformLocationOverrides; | ||||||
|     int uniformLocationBase; |     int uniformLocationBase; | ||||||
|  |     bool nanMinMaxClamp;            // true if desiring min/max/clamp to favor non-NaN over NaN
 | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     void operator=(TIntermediate&); // prevent assignments
 |     void operator=(TIntermediate&); // prevent assignments
 | ||||||
|  | |||||||
| @ -435,6 +435,7 @@ public: | |||||||
|     void setHlslIoMapping(bool hlslIoMap); |     void setHlslIoMapping(bool hlslIoMap); | ||||||
|     void setFlattenUniformArrays(bool flatten); |     void setFlattenUniformArrays(bool flatten); | ||||||
|     void setNoStorageFormat(bool useUnknownFormat); |     void setNoStorageFormat(bool useUnknownFormat); | ||||||
|  |     void setNanMinMaxClamp(bool nanMinMaxClamp); | ||||||
|     void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode); |     void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode); | ||||||
| 
 | 
 | ||||||
|     // For setting up the environment (cleared to nothingness in the constructor).
 |     // For setting up the environment (cleared to nothingness in the constructor).
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 John Kessenich
						John Kessenich