- Add support for invocation functions with "InclusiveScan" and "ExclusiveScan" modes. - Add support for invocation functions taking int64/uint64/doube/float16 as inout data types.
		
			
				
	
	
		
			166 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			166 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
#version 450
 | 
						|
 | 
						|
#extension GL_ARB_gpu_shader_int64: enable
 | 
						|
#extension GL_AMD_gpu_shader_half_float: enable
 | 
						|
#extension GL_AMD_shader_ballot: enable
 | 
						|
 | 
						|
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 | 
						|
 | 
						|
layout(binding = 0) buffer Buffers
 | 
						|
{
 | 
						|
    int     i;
 | 
						|
    uvec2   uv;
 | 
						|
    vec3    fv;
 | 
						|
    dvec4   dv;
 | 
						|
    int64_t i64;
 | 
						|
    u64vec2 u64v;
 | 
						|
    f16vec3 f16v;
 | 
						|
};
 | 
						|
 | 
						|
void main()
 | 
						|
{
 | 
						|
	i    = minInvocationsAMD(i);
 | 
						|
    uv   = minInvocationsAMD(uv);
 | 
						|
    fv   = minInvocationsAMD(fv);
 | 
						|
    dv   = minInvocationsAMD(dv);
 | 
						|
    i64  = minInvocationsAMD(i64);
 | 
						|
    u64v = minInvocationsAMD(u64v);
 | 
						|
    f16v = minInvocationsAMD(f16v);
 | 
						|
 | 
						|
    i    = maxInvocationsAMD(i);
 | 
						|
    uv   = maxInvocationsAMD(uv);
 | 
						|
    fv   = maxInvocationsAMD(fv);
 | 
						|
    dv   = maxInvocationsAMD(dv);
 | 
						|
    i64  = maxInvocationsAMD(i64);
 | 
						|
    u64v = maxInvocationsAMD(u64v);
 | 
						|
    f16v = maxInvocationsAMD(f16v);
 | 
						|
 | 
						|
    i    = addInvocationsAMD(i);
 | 
						|
    uv   = addInvocationsAMD(uv);
 | 
						|
    fv   = addInvocationsAMD(fv);
 | 
						|
    dv   = addInvocationsAMD(dv);
 | 
						|
    i64  = addInvocationsAMD(i64);
 | 
						|
    u64v = addInvocationsAMD(u64v);
 | 
						|
    f16v = addInvocationsAMD(f16v);
 | 
						|
 | 
						|
	i    = minInvocationsNonUniformAMD(i);
 | 
						|
    uv   = minInvocationsNonUniformAMD(uv);
 | 
						|
    fv   = minInvocationsNonUniformAMD(fv);
 | 
						|
    dv   = minInvocationsNonUniformAMD(dv);
 | 
						|
    i64  = minInvocationsNonUniformAMD(i64);
 | 
						|
    u64v = minInvocationsNonUniformAMD(u64v);
 | 
						|
    f16v = minInvocationsNonUniformAMD(f16v);
 | 
						|
 | 
						|
    i    = maxInvocationsNonUniformAMD(i);
 | 
						|
    uv   = maxInvocationsNonUniformAMD(uv);
 | 
						|
    fv   = maxInvocationsNonUniformAMD(fv);
 | 
						|
    dv   = maxInvocationsNonUniformAMD(dv);
 | 
						|
    i64  = maxInvocationsNonUniformAMD(i64);
 | 
						|
    u64v = maxInvocationsNonUniformAMD(u64v);
 | 
						|
    f16v = maxInvocationsNonUniformAMD(f16v);
 | 
						|
 | 
						|
    i    = addInvocationsNonUniformAMD(i);
 | 
						|
    uv   = addInvocationsNonUniformAMD(uv);
 | 
						|
    fv   = addInvocationsNonUniformAMD(fv);
 | 
						|
    dv   = addInvocationsNonUniformAMD(dv);
 | 
						|
    i64  = addInvocationsNonUniformAMD(i64);
 | 
						|
    u64v = addInvocationsNonUniformAMD(u64v);
 | 
						|
    f16v = addInvocationsNonUniformAMD(f16v);
 | 
						|
 | 
						|
    i    = minInvocationsInclusiveScanAMD(i);
 | 
						|
    uv   = minInvocationsInclusiveScanAMD(uv);
 | 
						|
    fv   = minInvocationsInclusiveScanAMD(fv);
 | 
						|
    dv   = minInvocationsInclusiveScanAMD(dv);
 | 
						|
    i64  = minInvocationsInclusiveScanAMD(i64);
 | 
						|
    u64v = minInvocationsInclusiveScanAMD(u64v);
 | 
						|
    f16v = minInvocationsInclusiveScanAMD(f16v);
 | 
						|
 | 
						|
    i    = maxInvocationsInclusiveScanAMD(i);
 | 
						|
    uv   = maxInvocationsInclusiveScanAMD(uv);
 | 
						|
    fv   = maxInvocationsInclusiveScanAMD(fv);
 | 
						|
    dv   = maxInvocationsInclusiveScanAMD(dv);
 | 
						|
    i64  = maxInvocationsInclusiveScanAMD(i64);
 | 
						|
    u64v = maxInvocationsInclusiveScanAMD(u64v);
 | 
						|
    f16v = maxInvocationsInclusiveScanAMD(f16v);
 | 
						|
 | 
						|
    i    = addInvocationsInclusiveScanAMD(i);
 | 
						|
    uv   = addInvocationsInclusiveScanAMD(uv);
 | 
						|
    fv   = addInvocationsInclusiveScanAMD(fv);
 | 
						|
    dv   = addInvocationsInclusiveScanAMD(dv);
 | 
						|
    i64  = addInvocationsInclusiveScanAMD(i64);
 | 
						|
    u64v = addInvocationsInclusiveScanAMD(u64v);
 | 
						|
    f16v = addInvocationsInclusiveScanAMD(f16v);
 | 
						|
 | 
						|
    i    = minInvocationsExclusiveScanAMD(i);
 | 
						|
    uv   = minInvocationsExclusiveScanAMD(uv);
 | 
						|
    fv   = minInvocationsExclusiveScanAMD(fv);
 | 
						|
    dv   = minInvocationsExclusiveScanAMD(dv);
 | 
						|
    i64  = minInvocationsExclusiveScanAMD(i64);
 | 
						|
    u64v = minInvocationsExclusiveScanAMD(u64v);
 | 
						|
    f16v = minInvocationsExclusiveScanAMD(f16v);
 | 
						|
 | 
						|
    i    = maxInvocationsExclusiveScanAMD(i);
 | 
						|
    uv   = maxInvocationsExclusiveScanAMD(uv);
 | 
						|
    fv   = maxInvocationsExclusiveScanAMD(fv);
 | 
						|
    dv   = maxInvocationsExclusiveScanAMD(dv);
 | 
						|
    i64  = maxInvocationsExclusiveScanAMD(i64);
 | 
						|
    u64v = maxInvocationsExclusiveScanAMD(u64v);
 | 
						|
    f16v = maxInvocationsExclusiveScanAMD(f16v);
 | 
						|
 | 
						|
    i    = addInvocationsExclusiveScanAMD(i);
 | 
						|
    uv   = addInvocationsExclusiveScanAMD(uv);
 | 
						|
    fv   = addInvocationsExclusiveScanAMD(fv);
 | 
						|
    dv   = addInvocationsExclusiveScanAMD(dv);
 | 
						|
    i64  = addInvocationsExclusiveScanAMD(i64);
 | 
						|
    u64v = addInvocationsExclusiveScanAMD(u64v);
 | 
						|
    f16v = addInvocationsExclusiveScanAMD(f16v);
 | 
						|
 | 
						|
    i    = minInvocationsInclusiveScanNonUniformAMD(i);
 | 
						|
    uv   = minInvocationsInclusiveScanNonUniformAMD(uv);
 | 
						|
    fv   = minInvocationsInclusiveScanNonUniformAMD(fv);
 | 
						|
    dv   = minInvocationsInclusiveScanNonUniformAMD(dv);
 | 
						|
    i64  = minInvocationsInclusiveScanNonUniformAMD(i64);
 | 
						|
    u64v = minInvocationsInclusiveScanNonUniformAMD(u64v);
 | 
						|
    f16v = minInvocationsInclusiveScanNonUniformAMD(f16v);
 | 
						|
 | 
						|
    i    = maxInvocationsInclusiveScanNonUniformAMD(i);
 | 
						|
    uv   = maxInvocationsInclusiveScanNonUniformAMD(uv);
 | 
						|
    fv   = maxInvocationsInclusiveScanNonUniformAMD(fv);
 | 
						|
    dv   = maxInvocationsInclusiveScanNonUniformAMD(dv);
 | 
						|
    i64  = maxInvocationsInclusiveScanNonUniformAMD(i64);
 | 
						|
    u64v = maxInvocationsInclusiveScanNonUniformAMD(u64v);
 | 
						|
    f16v = maxInvocationsInclusiveScanNonUniformAMD(f16v);
 | 
						|
 | 
						|
    i    = addInvocationsInclusiveScanNonUniformAMD(i);
 | 
						|
    uv   = addInvocationsInclusiveScanNonUniformAMD(uv);
 | 
						|
    fv   = addInvocationsInclusiveScanNonUniformAMD(fv);
 | 
						|
    dv   = addInvocationsInclusiveScanNonUniformAMD(dv);
 | 
						|
    i64  = addInvocationsInclusiveScanNonUniformAMD(i64);
 | 
						|
    u64v = addInvocationsInclusiveScanNonUniformAMD(u64v);
 | 
						|
    f16v = addInvocationsInclusiveScanNonUniformAMD(f16v);
 | 
						|
 | 
						|
    i    = minInvocationsExclusiveScanNonUniformAMD(i);
 | 
						|
    uv   = minInvocationsExclusiveScanNonUniformAMD(uv);
 | 
						|
    fv   = minInvocationsExclusiveScanNonUniformAMD(fv);
 | 
						|
    dv   = minInvocationsExclusiveScanNonUniformAMD(dv);
 | 
						|
    i64  = minInvocationsExclusiveScanNonUniformAMD(i64);
 | 
						|
    u64v = minInvocationsExclusiveScanNonUniformAMD(u64v);
 | 
						|
    f16v = minInvocationsExclusiveScanNonUniformAMD(f16v);
 | 
						|
 | 
						|
    i    = maxInvocationsExclusiveScanNonUniformAMD(i);
 | 
						|
    uv   = maxInvocationsExclusiveScanNonUniformAMD(uv);
 | 
						|
    fv   = maxInvocationsExclusiveScanNonUniformAMD(fv);
 | 
						|
    dv   = maxInvocationsExclusiveScanNonUniformAMD(dv);
 | 
						|
    i64  = maxInvocationsExclusiveScanNonUniformAMD(i64);
 | 
						|
    u64v = maxInvocationsExclusiveScanNonUniformAMD(u64v);
 | 
						|
    f16v = maxInvocationsExclusiveScanNonUniformAMD(f16v);
 | 
						|
 | 
						|
    i    = addInvocationsExclusiveScanNonUniformAMD(i);
 | 
						|
    uv   = addInvocationsExclusiveScanNonUniformAMD(uv);
 | 
						|
    fv   = addInvocationsExclusiveScanNonUniformAMD(fv);
 | 
						|
    dv   = addInvocationsExclusiveScanNonUniformAMD(dv);
 | 
						|
    i64  = addInvocationsExclusiveScanNonUniformAMD(i64);
 | 
						|
    u64v = addInvocationsExclusiveScanNonUniformAMD(u64v);
 | 
						|
    f16v = addInvocationsExclusiveScanNonUniformAMD(f16v);
 | 
						|
}
 |