Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement floating-point pack/unpack intrinsics for all targets #6503

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/user-guide/a3-02-reference-capability-atoms.md
Original file line number Diff line number Diff line change
Expand Up @@ -1076,6 +1076,9 @@ Compound Capabilities
`shader5_sm_5_0`
> Capabilities required to use sm_5_0 features apart of GL_ARB_gpu_shader5

`pack_vector`
> Capabilities required to use pack/unpack intrinsics on packed vector data

`subgroup_basic`
> Capabilities required to use GLSL-style subgroup operations 'subgroup_basic'

Expand Down
224 changes: 10 additions & 214 deletions source/slang/glsl.meta.slang
Original file line number Diff line number Diff line change
Expand Up @@ -617,230 +617,34 @@ public vector<float, N> uintBitsToFloat(highp vector<uint, N> x)

[__readNone]
[ForceInline]
uint packUnorm1x16(float c)
{
return uint(round(clamp(c, 0.0, 1.0) * 65535.0));
}

[__readNone]
[ForceInline]
uint packSnorm1x16(float v)
{
return uint(round(clamp(v ,-1.0, 1.0) * 32767.0));
}

[__readNone]
[ForceInline]
uint packUnorm1x8(float c)
{
return uint(round(clamp(c, 0.0, 1.0) * 255.0));
}

[__readNone]
[ForceInline]
uint packSnorm1x8(float c)
{
return uint(round(clamp(c, -1.0, 1.0) * 127.0));
}

[__readNone]
[ForceInline]
float unpackUnorm1x16(uint p)
{
const uint wordMask = 0xffff;
return float(p & wordMask) / 65535.0;
}

[__readNone]
[ForceInline]
float unpackSnorm1x16(uint p)
{
const uint wordMask = 0xffff;
return clamp(float(p & wordMask) / 32767.0, -1.0, 1.0);
}

[__readNone]
[ForceInline]
float unpackUnorm1x8(uint p)
{
const uint byteMask = 0xff;
return float(p & byteMask) / 255.0;
}

[__readNone]
[ForceInline]
float unpackSnorm1x8(uint p)
{
const uint byteMask = 0xff;
return clamp(float(p & byteMask) / 127.0, -1.0, 1.0);
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
uint float2half(float f)
{
uint u = floatBitsToUint(f);
uint s = ((u >> uint(16)) & uint(0x8000));
uint e = 0;
uint m = ((u >> uint(13)) & uint(0x03ff));
if (m != 0)
{
e = ((((u & uint(0x7f800000)) - uint(0x38000000)) >> uint(13)) & uint(0x7c00));
}
return (s | e | m);
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
public uint packUnorm2x16(vec2 v)
{
__target_switch
{
case glsl: __intrinsic_asm "packUnorm2x16";
case spirv: return spirv_asm {
result:$$uint = OpExtInst glsl450 PackUnorm2x16 $v
};
default:
return packUnorm1x16(v.x) | (packUnorm1x16(v.y) << uint(16));
}
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
public uint packSnorm2x16(vec2 v)
{
__target_switch
{
case glsl: __intrinsic_asm "packSnorm2x16";
case spirv: return spirv_asm {
result:$$uint = OpExtInst glsl450 PackSnorm2x16 $v
};
default:
return packSnorm1x16(v.x) | (packSnorm1x16(v.y) << uint(16));
}
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
public uint packUnorm4x8(vec4 v)
{
__target_switch
{
case glsl: __intrinsic_asm "packUnorm4x8";
case spirv: return spirv_asm {
result:$$uint = OpExtInst glsl450 PackUnorm4x8 $v
};
default:
return packUnorm1x8(v.x) | (packUnorm1x8(v.y) << uint(8)) | (packUnorm1x8(v.z) << uint(16)) | (packUnorm1x8(v.w) << uint(24));
}
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
public uint packSnorm4x8(vec4 v)
{
__target_switch
{
case glsl: __intrinsic_asm "packSnorm4x8";
case spirv: return spirv_asm {
result:$$uint = OpExtInst glsl450 PackSnorm4x8 $v
};
default:
return packSnorm1x8(v.x) | (packSnorm1x8(v.y) << uint(8)) | (packSnorm1x8(v.z) << uint(16)) | (packSnorm1x8(v.w) << uint(24));
}
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)]
public vec2 unpackUnorm2x16(uint p)
{
__target_switch
{
case glsl: __intrinsic_asm "unpackUnorm2x16";
case spirv: return spirv_asm {
result:$$vec2 = OpExtInst glsl450 UnpackUnorm2x16 $p
};
default:
return vec2(unpackUnorm1x16(p & uint(0xffff)), unpackUnorm1x16(p >> uint(16)));
}
return unpackUnorm2x16ToFloat(p);
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)]
public vec2 unpackSnorm2x16(uint p)
{
__target_switch
{
case glsl: __intrinsic_asm "unpackSnorm2x16";
case spirv: return spirv_asm {
result:$$vec2 = OpExtInst glsl450 UnpackSnorm2x16 $p
};
default:
return vec2(unpackSnorm1x16(p & uint(0xffff)), unpackSnorm1x16(p >> uint(16)));
}
return unpackSnorm2x16ToFloat(p);
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)]
public vec4 unpackUnorm4x8(highp uint p)
{
__target_switch
{
case glsl: __intrinsic_asm "unpackUnorm4x8";
case spirv: return spirv_asm {
result:$$vec4 = OpExtInst glsl450 UnpackUnorm4x8 $p
};
default:
return vec4(
unpackUnorm1x8(p),
unpackUnorm1x8(p >> 8),
unpackUnorm1x8(p >> 16),
unpackUnorm1x8(p >> 24));
}
return unpackUnorm4x8ToFloat(p);
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)]
public vec4 unpackSnorm4x8(highp uint p)
{
__target_switch
{
case glsl: __intrinsic_asm "unpackSnorm4x8";
case spirv: return spirv_asm {
result:$$vec4 = OpExtInst glsl450 UnpackSnorm4x8 $p
};
default:
return vec4(
unpackSnorm1x8(p),
unpackSnorm1x8(p >> 8),
unpackSnorm1x8(p >> 16),
unpackSnorm1x8(p >> 24));
}
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
public uint packHalf2x16(vec2 v)
{
__target_switch
{
case glsl: __intrinsic_asm "packHalf2x16";
case spirv: return spirv_asm {
result:$$uint = OpExtInst glsl450 PackHalf2x16 $v
};
default:
return float2half(v.x) | (float2half(v.y) << uint(16));
}
return unpackSnorm4x8ToFloat(p);
}

[__readNone]
Expand All @@ -865,18 +669,10 @@ public float half2float(uint h)

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)]
public vec2 unpackHalf2x16(uint p)
{
__target_switch
{
case glsl: __intrinsic_asm "unpackHalf2x16";
case spirv: return spirv_asm {
result:$$vec2 = OpExtInst glsl450 UnpackHalf2x16 $p
};
default:
return vec2(half2float(p & uint(0xffff)), half2float(p >> uint(16)));
}
return unpackHalf2x16ToFloat(p);
}

[__readNone]
Expand Down
Loading