307 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			GLSL
		
	
	
	
	
	
			
		
		
	
	
			307 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			GLSL
		
	
	
	
	
	
| #version 450 core
 | |
| 
 | |
| #extension GL_AMD_gpu_shader_half_float: enable
 | |
| #extension GL_ARB_gpu_shader_int64: enable
 | |
| 
 | |
| void main()
 | |
| {
 | |
| }
 | |
| 
 | |
| // Half float literals
 | |
| void literal()
 | |
| {
 | |
|     const float16_t f16c  = 0.000001hf;
 | |
|     const f16vec2   f16cv = f16vec2(-0.25HF, 0.03HF);
 | |
| 
 | |
|     f16vec2 f16v;
 | |
|     f16v.x  = f16c;
 | |
|     f16v   += f16cv;
 | |
| }
 | |
| 
 | |
| // Block memory layout
 | |
| struct S
 | |
| {
 | |
|     float16_t  x;   // rule 1: align = 2, takes offsets 0-1
 | |
|     f16vec2    y;   // rule 2: align = 4, takes offsets 4-7
 | |
|     f16vec3    z;   // rule 3: align = 8, takes offsets 8-13
 | |
| };
 | |
| 
 | |
| layout(column_major, std140) uniform B1
 | |
| {
 | |
|     float16_t  a;     // rule 1:  align = 2, takes offsets 0-1
 | |
|     f16vec2    b;     // rule 2:  align = 4, takes offsets 4-7
 | |
|     f16vec3    c;     // rule 3:  align = 8, takes offsets 8-15
 | |
|     float16_t  d[2];  // rule 4:  align = 16, array stride = 16,
 | |
|                       //          takes offsets 16-47
 | |
|     f16mat2x3  e;     // rule 5:  align = 16, matrix stride = 16,
 | |
|                       //          takes offsets 48-79
 | |
|     f16mat2x3  f[2];  // rule 6:  align = 16, matrix stride = 16,
 | |
|                       //          array stride = 32, f[0] takes
 | |
|                       //          offsets 80-111, f[1] takes offsets
 | |
|                       //          112-143
 | |
|     S          g;     // rule 9:  align = 16, g.x takes offsets
 | |
|                       //          144-145, g.y takes offsets 148-151,
 | |
|                       //          g.z takes offsets 152-159
 | |
|     S          h[2];  // rule 10: align = 16, array stride = 16, h[0]
 | |
|                       //          takes offsets 160-175, h[1] takes
 | |
|                       //          offsets 176-191
 | |
| };
 | |
| 
 | |
| layout(row_major, std430) buffer B2
 | |
| {
 | |
|     float16_t  o;     // rule 1:  align = 2, takes offsets 0-1
 | |
|     f16vec2    p;     // rule 2:  align = 4, takes offsets 4-7
 | |
|     f16vec3    q;     // rule 3:  align = 8, takes offsets 8-13
 | |
|     float16_t  r[2];  // rule 4:  align = 2, array stride = 2, takes
 | |
|                       //          offsets 14-17
 | |
|     f16mat2x3  s;     // rule 7:  align = 4, matrix stride = 4, takes
 | |
|                       //          offsets 20-31
 | |
|     f16mat2x3  t[2];  // rule 8:  align = 4, matrix stride = 4, array
 | |
|                       //          stride = 12, t[0] takes offsets
 | |
|                       //          32-43, t[1] takes offsets 44-55
 | |
|     S          u;     // rule 9:  align = 8, u.x takes offsets
 | |
|                       //          56-57, u.y takes offsets 60-63, u.z
 | |
|                       //          takes offsets 64-69
 | |
|     S          v[2];  // rule 10: align = 8, array stride = 16, v[0]
 | |
|                       //          takes offsets 72-87, v[1] takes
 | |
|                       //          offsets 88-103
 | |
| };
 | |
| 
 | |
| // Specialization constant
 | |
| layout(constant_id = 100) const float16_t sf16 = 0.125hf;
 | |
| layout(constant_id = 101) const float     sf   = 0.25;
 | |
| layout(constant_id = 102) const double    sd   = 0.5lf;
 | |
| 
 | |
| const float  f16_to_f = float(sf16);
 | |
| const double f16_to_d = float(sf16);
 | |
| 
 | |
| const float16_t f_to_f16 = float16_t(sf);
 | |
| const float16_t d_to_f16 = float16_t(sd);
 | |
| 
 | |
| void operators()
 | |
| {
 | |
|     float16_t f16;
 | |
|     f16vec2   f16v;
 | |
|     f16mat2x2 f16m;
 | |
|     bool      b;
 | |
| 
 | |
|     // Arithmetic
 | |
|     f16v += f16v;
 | |
|     f16v -= f16v;
 | |
|     f16v *= f16v;
 | |
|     f16v /= f16v;
 | |
|     f16v++;
 | |
|     f16v--;
 | |
|     ++f16m;
 | |
|     --f16m;
 | |
|     f16v = -f16v;
 | |
|     f16m = -f16m;
 | |
| 
 | |
|     f16 = f16v.x + f16v.y;
 | |
|     f16 = f16v.x - f16v.y;
 | |
|     f16 = f16v.x * f16v.y;
 | |
|     f16 = f16v.x / f16v.y;
 | |
| 
 | |
|     // Relational
 | |
|     b = (f16v.x != f16);
 | |
|     b = (f16v.y == f16);
 | |
|     b = (f16v.x >  f16);
 | |
|     b = (f16v.y <  f16);
 | |
|     b = (f16v.x >= f16);
 | |
|     b = (f16v.y <= f16);
 | |
| 
 | |
|     // Vector/matrix operations
 | |
|     f16v = f16v * f16;
 | |
|     f16m = f16m * f16;
 | |
|     f16v = f16m * f16v;
 | |
|     f16v = f16v * f16m;
 | |
|     f16m = f16m * f16m;
 | |
| }
 | |
| 
 | |
| void typeCast()
 | |
| {
 | |
|     bvec3   bv;
 | |
|     vec3    fv;
 | |
|     dvec3   dv;
 | |
|     ivec3   iv;
 | |
|     uvec3   uv;
 | |
|     i64vec3 i64v;
 | |
|     u64vec3 u64v;
 | |
| 
 | |
|     f16vec3 f16v;
 | |
| 
 | |
|     f16v = f16vec3(bv);     // bool -> float16
 | |
|     bv   = bvec3(f16v);     // float16 -> bool
 | |
| 
 | |
|     f16v = f16vec3(fv);     // float -> float16
 | |
|     fv   = vec3(f16v);      // float16 -> float
 | |
| 
 | |
|     f16v = f16vec3(dv);     // double -> float16
 | |
|     dv   = dvec3(dv);       // float16 -> double
 | |
| 
 | |
|     f16v = f16vec3(iv);     // int -> float16
 | |
|     iv   = ivec3(f16v);     // float16 -> int
 | |
| 
 | |
|     f16v = f16vec3(uv);     // uint -> float16
 | |
|     uv   = uvec3(f16v);     // float16 -> uint
 | |
| 
 | |
|     f16v = f16vec3(i64v);   // int64 -> float16
 | |
|     i64v = i64vec3(f16v);   // float16 -> int64
 | |
| 
 | |
|     f16v = f16vec3(u64v);   // uint64 -> float16
 | |
|     u64v = u64vec3(f16v);   // float16 -> uint64
 | |
| }
 | |
| 
 | |
| void builtinAngleTrigFuncs()
 | |
| {
 | |
|     f16vec4 f16v1, f16v2;
 | |
| 
 | |
|     f16v2 = radians(f16v1);
 | |
|     f16v2 = degrees(f16v1);
 | |
|     f16v2 = sin(f16v1);
 | |
|     f16v2 = cos(f16v1);
 | |
|     f16v2 = tan(f16v1);
 | |
|     f16v2 = asin(f16v1);
 | |
|     f16v2 = acos(f16v1);
 | |
|     f16v2 = atan(f16v1, f16v2);
 | |
|     f16v2 = atan(f16v1);
 | |
|     f16v2 = sinh(f16v1);
 | |
|     f16v2 = cosh(f16v1);
 | |
|     f16v2 = tanh(f16v1);
 | |
|     f16v2 = asinh(f16v1);
 | |
|     f16v2 = acosh(f16v1);
 | |
|     f16v2 = atanh(f16v1);
 | |
| }
 | |
| 
 | |
| void builtinExpFuncs()
 | |
| {
 | |
|     f16vec2 f16v1, f16v2;
 | |
| 
 | |
|     f16v2 = pow(f16v1, f16v2);
 | |
|     f16v2 = exp(f16v1);
 | |
|     f16v2 = log(f16v1);
 | |
|     f16v2 = exp2(f16v1);
 | |
|     f16v2 = log2(f16v1);
 | |
|     f16v2 = sqrt(f16v1);
 | |
|     f16v2 = inversesqrt(f16v1);
 | |
| }
 | |
| 
 | |
| void builtinCommonFuncs()
 | |
| {
 | |
|     f16vec3   f16v1, f16v2, f16v3;
 | |
|     float16_t f16;
 | |
|     bool  b;
 | |
|     bvec3 bv;
 | |
|     ivec3 iv;
 | |
| 
 | |
|     f16v2 = abs(f16v1);
 | |
|     f16v2 = sign(f16v1);
 | |
|     f16v2 = floor(f16v1);
 | |
|     f16v2 = trunc(f16v1);
 | |
|     f16v2 = round(f16v1);
 | |
|     f16v2 = roundEven(f16v1);
 | |
|     f16v2 = ceil(f16v1);
 | |
|     f16v2 = fract(f16v1);
 | |
|     f16v2 = mod(f16v1, f16v2);
 | |
|     f16v2 = mod(f16v1, f16);
 | |
|     f16v3 = modf(f16v1, f16v2);
 | |
|     f16v3 = min(f16v1, f16v2);
 | |
|     f16v3 = min(f16v1, f16);
 | |
|     f16v3 = max(f16v1, f16v2);
 | |
|     f16v3 = max(f16v1, f16);
 | |
|     f16v3 = clamp(f16v1, f16, f16v2.x);
 | |
|     f16v3 = clamp(f16v1, f16v2, f16vec3(f16));
 | |
|     f16v3 = mix(f16v1, f16v2, f16);
 | |
|     f16v3 = mix(f16v1, f16v2, f16v3);
 | |
|     f16v3 = mix(f16v1, f16v2, bv);
 | |
|     f16v3 = step(f16v1, f16v2);
 | |
|     f16v3 = step(f16, f16v3);
 | |
|     f16v3 = smoothstep(f16v1, f16v2, f16v3);
 | |
|     f16v3 = smoothstep(f16, f16v1.x, f16v2);
 | |
|     b     = isnan(f16);
 | |
|     bv    = isinf(f16v1);
 | |
|     f16v3 = fma(f16v1, f16v2, f16v3);
 | |
|     f16v2 = frexp(f16v1, iv);
 | |
|     f16v2 = ldexp(f16v1, iv);
 | |
| }
 | |
| 
 | |
| void builtinPackUnpackFuncs()
 | |
| {
 | |
|     uint u;
 | |
|     f16vec2 f16v;
 | |
| 
 | |
|     u    = packFloat2x16(f16v);
 | |
|     f16v = unpackFloat2x16(u);
 | |
| }
 | |
| 
 | |
| void builtinGeometryFuncs()
 | |
| {
 | |
|     float16_t f16;
 | |
|     f16vec3   f16v1, f16v2, f16v3;
 | |
| 
 | |
|     f16   = length(f16v1);
 | |
|     f16   = distance(f16v1, f16v2);
 | |
|     f16   = dot(f16v1, f16v2);
 | |
|     f16v3 = cross(f16v1, f16v2);
 | |
|     f16v2 = normalize(f16v1);
 | |
|     f16v3 = faceforward(f16v1, f16v2, f16v3);
 | |
|     f16v3 = reflect(f16v1, f16v2);
 | |
|     f16v3 = refract(f16v1, f16v2, f16);
 | |
| }
 | |
| 
 | |
| void builtinMatrixFuncs()
 | |
| {
 | |
|     f16mat2x3 f16m1, f16m2, f16m3;
 | |
|     f16mat3x2 f16m4;
 | |
|     f16mat3   f16m5;
 | |
|     f16mat4   f16m6, f16m7;
 | |
| 
 | |
|     f16vec3 f16v1;
 | |
|     f16vec2 f16v2;
 | |
| 
 | |
|     float16_t f16;
 | |
| 
 | |
|     f16m3 = matrixCompMult(f16m1, f16m2);
 | |
|     f16m1 = outerProduct(f16v1, f16v2);
 | |
|     f16m4 = transpose(f16m1);
 | |
|     f16   = determinant(f16m5);
 | |
|     f16m6 = inverse(f16m7);
 | |
| }
 | |
| 
 | |
| void builtinVecRelFuncs()
 | |
| {
 | |
|     f16vec3 f16v1, f16v2;
 | |
|     bvec3   bv;
 | |
| 
 | |
|     bv = lessThan(f16v1, f16v2);
 | |
|     bv = lessThanEqual(f16v1, f16v2);
 | |
|     bv = greaterThan(f16v1, f16v2);
 | |
|     bv = greaterThanEqual(f16v1, f16v2);
 | |
|     bv = equal(f16v1, f16v2);
 | |
|     bv = notEqual(f16v1, f16v2);
 | |
| }
 | |
| 
 | |
| in f16vec3 if16v;
 | |
| 
 | |
| void builtinFragProcFuncs()
 | |
| {
 | |
|     f16vec3 f16v;
 | |
| 
 | |
|     // Derivative
 | |
|     f16v.x  = dFdx(if16v.x);
 | |
|     f16v.y  = dFdy(if16v.y);
 | |
|     f16v.xy = dFdxFine(if16v.xy);
 | |
|     f16v.xy = dFdyFine(if16v.xy);
 | |
|     f16v    = dFdxCoarse(if16v);
 | |
|     f16v    = dFdxCoarse(if16v);
 | |
| 
 | |
|     f16v.x  = fwidth(if16v.x);
 | |
|     f16v.xy = fwidthFine(if16v.xy);
 | |
|     f16v    = fwidthCoarse(if16v);
 | |
| 
 | |
|     // Interpolation
 | |
|     f16v.x  = interpolateAtCentroid(if16v.x);
 | |
|     f16v.xy = interpolateAtSample(if16v.xy, 1);
 | |
|     f16v    = interpolateAtOffset(if16v, f16vec2(0.5hf));
 | |
| }
 | 
