From 77c613b38e70250f5457a8f81fb8186c81a6b95b Mon Sep 17 00:00:00 2001 From: fairywreath Date: Sat, 1 Mar 2025 15:52:29 -0500 Subject: [PATCH 1/3] Implement floating-point pack/unpack intrinsics --- .../a3-02-reference-capability-atoms.md | 3 + source/slang/glsl.meta.slang | 148 +----- source/slang/hlsl.meta.slang | 472 +++++++++++++++++- source/slang/slang-capabilities.capdef | 10 + tests/glsl-intrinsic/unpack-float.slang | 64 +++ .../packed/pack-unpack-float.slang | 178 +++++++ .../hlsl-intrinsic/packed/unpack-float.slang | 64 +++ 7 files changed, 776 insertions(+), 163 deletions(-) create mode 100644 tests/glsl-intrinsic/unpack-float.slang create mode 100644 tests/hlsl-intrinsic/packed/pack-unpack-float.slang create mode 100644 tests/hlsl-intrinsic/packed/unpack-float.slang diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index e7de2cfa93..b676db166c 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -1076,6 +1076,9 @@ Compound Capabilities `shader5_sm_5_0` > Capabilities required to use sm_5_0 features apart of GL_ARB_gpu_shader5 +`pack_unpack` +> Capabilities required to use pack/unpack intrinsics + `subgroup_basic` > Capabilities required to use GLSL-style subgroup operations 'subgroup_basic' diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 2a89f2b66d..fd2832d2f3 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -693,154 +693,34 @@ uint float2half(float f) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packUnorm2x16(vec2 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packUnorm2x16"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackUnorm2x16 $v - }; - default: - return packUnorm1x16(v.x) | (packUnorm1x16(v.y) << uint(16)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packSnorm2x16(vec2 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packSnorm2x16"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackSnorm2x16 $v - }; - default: - return packSnorm1x16(v.x) | (packSnorm1x16(v.y) << uint(16)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packUnorm4x8(vec4 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packUnorm4x8"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackUnorm4x8 $v - }; - default: - return packUnorm1x8(v.x) | (packUnorm1x8(v.y) << uint(8)) | (packUnorm1x8(v.z) << uint(16)) | (packUnorm1x8(v.w) << uint(24)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packSnorm4x8(vec4 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packSnorm4x8"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackSnorm4x8 $v - }; - default: - return packSnorm1x8(v.x) | (packSnorm1x8(v.y) << uint(8)) | (packSnorm1x8(v.z) << uint(16)) | (packSnorm1x8(v.w) << uint(24)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] public vec2 unpackUnorm2x16(uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackUnorm2x16"; - case spirv: return spirv_asm { - result:$$vec2 = OpExtInst glsl450 UnpackUnorm2x16 $p - }; - default: - return vec2(unpackUnorm1x16(p & uint(0xffff)), unpackUnorm1x16(p >> uint(16))); - } + return unpackUnorm2x16ToFloat(p); } [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] public vec2 unpackSnorm2x16(uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackSnorm2x16"; - case spirv: return spirv_asm { - result:$$vec2 = OpExtInst glsl450 UnpackSnorm2x16 $p - }; - default: - return vec2(unpackSnorm1x16(p & uint(0xffff)), unpackSnorm1x16(p >> uint(16))); - } + return unpackSnorm2x16ToFloat(p); } [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] public vec4 unpackUnorm4x8(highp uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackUnorm4x8"; - case spirv: return spirv_asm { - result:$$vec4 = OpExtInst glsl450 UnpackUnorm4x8 $p - }; - default: - return vec4( - unpackUnorm1x8(p), - unpackUnorm1x8(p >> 8), - unpackUnorm1x8(p >> 16), - unpackUnorm1x8(p >> 24)); - } + return unpackUnorm4x8ToFloat(p); } [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] public vec4 unpackSnorm4x8(highp uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackSnorm4x8"; - case spirv: return spirv_asm { - result:$$vec4 = OpExtInst glsl450 UnpackSnorm4x8 $p - }; - default: - return vec4( - unpackSnorm1x8(p), - unpackSnorm1x8(p >> 8), - unpackSnorm1x8(p >> 16), - unpackSnorm1x8(p >> 24)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packHalf2x16(vec2 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packHalf2x16"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackHalf2x16 $v - }; - default: - return float2half(v.x) | (float2half(v.y) << uint(16)); - } + return unpackSnorm4x8ToFloat(p); } [__readNone] @@ -865,18 +745,10 @@ public float half2float(uint h) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] public vec2 unpackHalf2x16(uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackHalf2x16"; - case spirv: return spirv_asm { - result:$$vec2 = OpExtInst glsl450 UnpackHalf2x16 $p - }; - default: - return vec2(half2float(p & uint(0xffff)), half2float(p >> uint(16))); - } + return unpackHalf2x16ToFloat(p); } [__readNone] diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index d2abfc7fee..f8f18c175f 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -24226,7 +24226,7 @@ typealias int8_t4_packed = uint; /// Unpack 4 signed 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] int16_t4 unpack_s8s16(int8_t4_packed packed) { return unpackInt4x8ToInt16(packed); @@ -24235,7 +24235,7 @@ int16_t4 unpack_s8s16(int8_t4_packed packed) /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint16_t4 unpack_u8u16(uint8_t4_packed packed) { return unpackUint4x8ToUint16(packed); @@ -24244,7 +24244,7 @@ uint16_t4 unpack_u8u16(uint8_t4_packed packed) /// Unpack 4 signed 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] int32_t4 unpack_s8s32(int8_t4_packed packed) { return unpackInt4x8ToInt32(packed); @@ -24253,7 +24253,7 @@ int32_t4 unpack_s8s32(int8_t4_packed packed) /// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint32_t4 unpack_u8u32(uint8_t4_packed packed) { return unpackUint4x8ToUint32(packed); @@ -24262,7 +24262,7 @@ uint32_t4 unpack_u8u32(uint8_t4_packed packed) /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint8_t4_packed pack_u8(uint32_t4 unpackedValue) { return packUint4x8(unpackedValue); @@ -24271,7 +24271,7 @@ uint8_t4_packed pack_u8(uint32_t4 unpackedValue) /// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] int8_t4_packed pack_s8(int32_t4 unpackedValue) { return packInt4x8(unpackedValue); @@ -24280,7 +24280,7 @@ int8_t4_packed pack_s8(int32_t4 unpackedValue) /// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint8_t4_packed pack_u8(uint16_t4 unpackedValue) { return packUint4x8(unpackedValue); @@ -24289,7 +24289,7 @@ uint8_t4_packed pack_u8(uint16_t4 unpackedValue) /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] int8_t4_packed pack_s8(int16_t4 unpackedValue) { return packInt4x8(unpackedValue); @@ -24299,7 +24299,7 @@ int8_t4_packed pack_s8(int16_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue) { return packUint4x8Clamp(unpackedValue); @@ -24309,7 +24309,7 @@ uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue) { return packInt4x8Clamp(unpackedValue); @@ -24319,7 +24319,7 @@ int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue) { return packUint4x8Clamp(unpackedValue); @@ -24329,7 +24329,7 @@ uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue) { return packInt4x8Clamp(unpackedValue); @@ -24404,13 +24404,41 @@ int32_t __lsbAsInt32(uint32_t val) return int32_t(__lsbAsInt8(val)); } +[__readNone] +[ForceInline] +uint32_t2 __unpackUint2x16ToUint32(uint packedValue) +{ + return uint32_t2(packedValue & 0xFFFFU, packedValue >> 16U); +} + +[__readNone] +[ForceInline] +int32_t2 __unpackInt2x16ToInt32(uint packedValue) +{ + int signedValue = int(packedValue); + return int32_t2(signedValue << 16U, signedValue) >> 16U; +} + +[__readNone] +[ForceInline] +uint __packUint2x16(uint32_t2 unpackedValue) +{ + return unpackedValue.x | (unpackedValue.y << 16U); +} + +[__readNone] +[ForceInline] +uint __packInt2x16(int32_t2 unpackedValue) +{ + return uint(unpackedValue.x | (unpackedValue.y << 16U)); +} + //@public: /// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint32_t4 unpackUint4x8ToUint32(uint packedValue) { __target_switch @@ -24437,7 +24465,7 @@ uint32_t4 unpackUint4x8ToUint32(uint packedValue) /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint16_t4 unpackUint4x8ToUint16(uint packedValue) { __target_switch @@ -24463,7 +24491,7 @@ uint16_t4 unpackUint4x8ToUint16(uint packedValue) /// Unpack 4 signed 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] int32_t4 unpackInt4x8ToInt32(uint packedValue) { __target_switch @@ -24490,7 +24518,7 @@ int32_t4 unpackInt4x8ToInt32(uint packedValue) /// Unpack 4 signed 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] int16_t4 unpackInt4x8ToInt16(uint packedValue) { __target_switch @@ -24516,7 +24544,7 @@ int16_t4 unpackInt4x8ToInt16(uint packedValue) /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint packUint4x8(uint32_t4 unpackedValue) { __target_switch @@ -24534,7 +24562,7 @@ uint packUint4x8(uint32_t4 unpackedValue) /// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint packUint4x8(uint16_t4 unpackedValue) { __target_switch @@ -24548,7 +24576,7 @@ uint packUint4x8(uint16_t4 unpackedValue) /// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint packInt4x8(int32_t4 unpackedValue) { __target_switch @@ -24563,7 +24591,7 @@ uint packInt4x8(int32_t4 unpackedValue) /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint packInt4x8(int16_t4 unpackedValue) { __target_switch @@ -24578,7 +24606,7 @@ uint packInt4x8(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint packUint4x8Clamp(int32_t4 unpackedValue) { __target_switch @@ -24594,7 +24622,7 @@ uint packUint4x8Clamp(int32_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint packUint4x8Clamp(int16_t4 unpackedValue) { __target_switch @@ -24609,7 +24637,7 @@ uint packUint4x8Clamp(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint packInt4x8Clamp(int32_t4 unpackedValue) { __target_switch @@ -24625,7 +24653,7 @@ uint packInt4x8Clamp(int32_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] uint packInt4x8Clamp(int16_t4 unpackedValue) { __target_switch @@ -24635,3 +24663,397 @@ uint packInt4x8Clamp(int16_t4 unpackedValue) return packInt4x8(clamp(unpackedValue, -128, 127)); } } + +// +// Floating-point Pack/Unpack Intrinsics +// + +// @public: + +/// Unpack a 32-bit unsigned integer into four 8-bit unsigned integers. +/// Then, each 8-bit value is converted to a normalized single-precision +/// floating-point value to generate a 4-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +float4 unpackUnorm4x8ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackUnorm4x8"; + case metal: __intrinsic_asm "unpack_unorm4x8_to_float"; + case spirv: + return spirv_asm + { + result:$$float4 = OpExtInst glsl450 UnpackUnorm4x8 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack4x8unorm"; + default: + uint4 unpackedIntegers = unpackUint4x8ToUint32(packedValue); + return float4(unpackedIntegers) / 255.0; + } +} + +/// Unpack a 32-bit unsigned integer into four 8-bit unsigned integers. +/// Then, each 8-bit value is converted to a normalized half-precision +/// floating-point value to generate a 4-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +half4 unpackUnorm4x8ToHalf(uint packedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "unpack_unorm4x8_to_half"; + default: + return half4(unpackUnorm4x8ToFloat(packedValue)); + } +} + +/// Unpack a 32-bit unsigned integer into four 8-bit signed integers. +/// Then, each 8-bit value is converted to a normalized single-precision +/// floating-point value to generate a 4-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +float4 unpackSnorm4x8ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackSnorm4x8"; + case metal: __intrinsic_asm "unpack_snorm4x8_to_float"; + case spirv: + return spirv_asm + { + result:$$float4 = OpExtInst glsl450 UnpackSnorm4x8 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack4x8snorm"; + default: + int4 unpackedIntegers = unpackInt4x8ToInt32(packedValue); + return clamp(float4(unpackedIntegers) / 127.0, -1.0, 1.0); + } +} + +/// Unpack a 32-bit unsigned integer into four 8-bit signed integers. +/// Then, each 8-bit value is converted to a normalized half-precision +/// floating-point value to generate a 4-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +half4 unpackSnorm4x8ToHalf(uint packedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "unpack_snorm4x8_to_half"; + default: + return half4(unpackSnorm4x8ToFloat(packedValue)); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit usigned integers. +/// Then, each 16-bit value is converted to a normalized single-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +float2 unpackUnorm2x16ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackUnorm2x16"; + case metal: __intrinsic_asm "unpack_unorm2x16_to_float"; + case spirv: + return spirv_asm + { + result:$$float2 = OpExtInst glsl450 UnpackUnorm2x16 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack2x16unorm"; + default: + uint2 unpackedIntegers = __unpackUint2x16ToUint32(packedValue); + return float2(unpackedIntegers) / 65535.0; + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit usigned integers. +/// Then, each 16-bit value is converted to a normalized half-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +half2 unpackUnorm2x16ToHalf(uint packedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "unpack_unorm2x16_to_half"; + default: + return half2(unpackUnorm2x16ToFloat(packedValue)); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit signed integers. +/// Then, each 16-bit value is converted to a normalized single-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +float2 unpackSnorm2x16ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackSnorm2x16"; + case metal: __intrinsic_asm "unpack_snorm2x16_to_float"; + case spirv: + return spirv_asm + { + result:$$float2 = OpExtInst glsl450 UnpackSnorm2x16 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack2x16snorm"; + default: + int2 unpackedIntegers = __unpackInt2x16ToInt32(packedValue); + return clamp(float2(unpackedIntegers) / 32767.0, -1.0, 1.0); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit signed integers. +/// Then, each 16-bit value is converted to a normalized half-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +half2 unpackSnorm2x16ToHalf(uint packedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "unpack_snorm2x16_to_half"; + default: + return half2(unpackSnorm2x16ToFloat(packedValue)); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit signed integers. +/// Then, each 16-bit value is converted to an IEEE-754 binary16 single-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +float2 unpackHalf2x16ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackHalf2x16"; + case spirv: + return spirv_asm + { + result:$$float2 = OpExtInst glsl450 UnpackHalf2x16 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack2x16float"; + default: + uint2 unpackedIntegers = __unpackUint2x16ToUint32(packedValue); + return f16tof32(unpackedIntegers); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit signed integers. +/// Then, each 16-bit value is converted to an IEEE-754 binary16 half-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +half2 unpackHalf2x16ToHalf(uint packedValue) +{ + return half2(unpackHalf2x16ToFloat(packedValue)); +} + +/// Convert a 4-component vector of normalized unsigned single-precision floating-point +/// values to four 8-bit integer values, then pack these 8-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packUnorm4x8(float4 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packUnorm4x8"; + case metal: __intrinsic_asm "pack_float_to_unorm4x8"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackUnorm4x8 $unpackedValue + }; + case wgsl: __intrinsic_asm "pack4x8unorm"; + default: + uint4 values = uint4(round(saturate(unpackedValue) * 255.0)); + return packUint4x8(values); + } +} + +/// Convert a 4-component vector of normalized unsigned half-precision floating-point +/// values to four 8-bit integer values, then pack these 8-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packUnorm4x8(half4 unpackedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "pack_half_to_unorm4x8"; + default: + return packUnorm4x8(float4(unpackedValue)); + } +} + +/// Convert a 4-component vector of normalized signed single-precision floating-point +/// values to four 8-bit integer values, then pack these 8-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packSnorm4x8(float4 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packSnorm4x8"; + case metal: __intrinsic_asm "pack_float_to_snorm4x8"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackSnorm4x8 $unpackedValue + }; + case wgsl: __intrinsic_asm "pack4x8snorm"; + default: + int4 values = int4(round(clamp(unpackedValue, -1.0, 1.0) * 127.0)) & 0xFF; + return packInt4x8(values); + } +} + +/// Convert a 4-component vector of normalized signed half-precision floating-point +/// values to four 8-bit integer values, then pack these 8-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packSnorm4x8(half4 unpackedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "pack_half_to_snorm4x8"; + default: + return packSnorm4x8(float4(unpackedValue)); + } +} + +/// Convert a 2-component vector of normalized unsigned single-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packUnorm2x16(float2 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packUnorm2x16"; + case metal: __intrinsic_asm "pack_float_to_unorm2x16"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackUnorm2x16 $unpackedValue; + }; + case wgsl: __intrinsic_asm "pack2x16unorm"; + default: + uint2 values = uint2(round(saturate(unpackedValue) * 65535.0)); + return __packUint2x16(values); + } +} + +/// Convert a 2-component vector of normalized unsigned half-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packUnorm2x16(half2 unpackedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "pack_half_to_unorm2x16"; + default: + return packUnorm2x16(float2(unpackedValue)); + } +} + +/// Convert a 2-component vector of normalized signed single-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packSnorm2x16(float2 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packSnorm2x16"; + case metal: __intrinsic_asm "pack_float_to_snorm2x16"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackSnorm2x16 $unpackedValue; + }; + case wgsl: __intrinsic_asm "pack2x16snorm"; + default: + int2 values = int2(round(clamp(unpackedValue, -1.0, 1.0) * 32767.0)) & 0xFFFF; + return __packInt2x16(values); + } +} + +/// Convert a 2-component vector of normalized signed half-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packSnorm2x16(half2 unpackedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "pack_half_to_snorm2x16"; + default: + return packSnorm2x16(float2(unpackedValue)); + } +} + +/// Convert a 2-component vector of IEEE-754 binary16 single-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packHalf2x16(float2 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packHalf2x16"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackHalf2x16 $unpackedValue; + }; + case wgsl: __intrinsic_asm "pack2x16float"; + default: + uint2 values = f32tof16(unpackedValue); + return __packUint2x16(values); + } +} + +/// Convert a 2-component vector of IEEE-754 binary16 half-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +uint packHalf2x16(half2 unpackedValue) +{ + return packHalf2x16(float2(unpackedValue)); +} diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 130439fe12..9a3125268b 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -1937,6 +1937,16 @@ alias shader5_sm_4_0 = GL_ARB_gpu_shader5 | sm_4_0_version; /// [Compound] alias shader5_sm_5_0 = GL_ARB_gpu_shader5 | sm_5_0_version; +/// Capabilities required to use pack/unpack intrinsics +/// [Compound] +alias pack_unpack = GL_ARB_gpu_shader5 + | _sm_6_6 + | _cuda_sm_9_0 + | wgsl + | metal + | cpp + ; + /// Capabilities required to use GLSL-style subgroup operations 'subgroup_basic' /// [Compound] alias subgroup_basic = GL_KHR_shader_subgroup_basic diff --git a/tests/glsl-intrinsic/unpack-float.slang b/tests/glsl-intrinsic/unpack-float.slang new file mode 100644 index 0000000000..c7eb5ebc08 --- /dev/null +++ b/tests/glsl-intrinsic/unpack-float.slang @@ -0,0 +1,64 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -emit-spirv-via-glsl -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -emit-spirv-directly -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -shaderobj -render-feature hardware-device -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-metal -compute -shaderobj -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute -shaderobj -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-wgpu -compute -shaderobj -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute -g0 -allow-glsl + +//TEST_INPUT:ubuffer(data=[0x12345678], stride=4):name inputBuffer +StructuredBuffer inputBuffer; + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +bool verifyResult (T expected, T actual, T tolerance) +{ + return (expected - tolerance) <= actual && actual <= (expected + tolerance); +} + +bool verifyResultVector(vector expected, vector actual, T tolerance = T(0.01)) +{ + bool isValid = true; + for (int i = 0; i < N; ++i) + isValid = isValid && verifyResult(expected[i], actual[i], tolerance); + return isValid; +} + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint packed = inputBuffer[0]; + uint index = 0U; + + // + // Test GLSL intrinsics for unpacking floating points. + // Packing intrinsics are tested in `tests/hlsl-intrinsic/packed/pack-unpack-float.slang`. + // + + float4 u4x8Expected = float4(0.4706, 0.3373, 0.2039, 0.0706); + float4 u4x8Float = unpackUnorm4x8(packed); + // BUF: 1 + outputBuffer[index++] = verifyResultVector(u4x8Expected, u4x8Float); + + float4 s4x8Expected = float4(0.9449, 0.6772, 0.4094, 0.1417); + float4 s4x8Float = unpackSnorm4x8(packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(s4x8Expected, s4x8Float); + + float2 u2x16Expected = float2(0.3377, 0.0711); + float2 u2x16Float = unpackUnorm2x16(packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(u2x16Expected, u2x16Float); + + float2 s2x16Expected = float2(0.6756, 0.1422); + float2 s2x16Float = unpackSnorm2x16(packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(s2x16Expected, s2x16Float); + + float2 h2x16Expected = float2(103.5, 0.000757); + float2 h2x16Float = unpackHalf2x16(packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(h2x16Expected, h2x16Float); +} diff --git a/tests/hlsl-intrinsic/packed/pack-unpack-float.slang b/tests/hlsl-intrinsic/packed/pack-unpack-float.slang new file mode 100644 index 0000000000..1cbd0cb498 --- /dev/null +++ b/tests/hlsl-intrinsic/packed/pack-unpack-float.slang @@ -0,0 +1,178 @@ +//TEST(compute):SIMPLE(filecheck=CHECK_SPV): -target spirv +//TEST(compute):SIMPLE(filecheck=CHECK_GLSL): -target glsl +//TEST(compute):SIMPLE(filecheck=CHECK_METAL): -target metal +//TEST(compute):SIMPLE(filecheck=CHECK_WGSL): -target wgsl + +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -emit-spirv-via-glsl -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -emit-spirv-directly -output-using-type -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -shaderobj -render-feature hardware-device -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-metal -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute -shaderobj -output-using-type -allow-glsl + +// 16 bit variants are not supported by WGSL. +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-wgpu -compute -shaderobj -xslang -DWGSL -output-using-type +// Debug info for inlining errors can be given out, so disable them for this test. +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute -g0 -output-using-type + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +bool verifyResult (T expected, T actual, T tolerance) +{ + return (expected - tolerance) <= actual && actual <= (expected + tolerance); +} + +bool verifyResultVector(vector expected, vector actual, T tolerance = T(0.01)) +{ + bool isValid = true; + for (int i = 0; i < N; ++i) + isValid = isValid && verifyResult(expected[i], actual[i], tolerance); + return isValid; +} + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint index = 0U; + + // + // Unorm4x8 + // + // CHECK_SPV: OpExtInst{{.*}} PackUnorm4x8 + // CHECK_SPV: OpExtInst{{.*}} UnpackUnorm4x8 + // + // CHECK_GLSL: (unpackUnorm4x8 + // CHECK_GLSL: (packUnorm4x8 + // + // CHECK_METAL: unpack_unorm4x8_to_float + // CHECK_METAL: pack_float_to_unorm4x8 + // CHECK_METAL: unpack_unorm4x8_to_half + // CHECK_METAL: pack_half_to_unorm4x8 + // + // CHECK_WGSL: (unpack4x8unorm + // CHECK_WGSL: (pack4x8unorm + // + float4 unorm4x8Expected = float4(0.777, 0.233, 0.931, 0.777); + uint unorm4x8Packed = packUnorm4x8(unorm4x8Expected); + float4 unorm4x8Actual = unpackUnorm4x8ToFloat(unorm4x8Packed); + // BUF: 1 + outputBuffer[index++] = verifyResultVector(unorm4x8Expected, unorm4x8Actual); + + half4 unorm4x8HalfExpected = half4(0.123h, 0.456h, 0.789h, 0.321h); + uint unorm4x8HalfPacked = packUnorm4x8(unorm4x8HalfExpected); + half4 unorm4x8HalfActual = unpackUnorm4x8ToHalf(unorm4x8HalfPacked); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(unorm4x8HalfExpected, unorm4x8HalfActual); + + // + // Snorm4x8 + // + // CHECK_SPV: OpExtInst{{.*}} PackSnorm4x8 + // CHECK_SPV: OpExtInst{{.*}} UnpackSnorm4x8 + // + // CHECK_GLSL: (unpackSnorm4x8 + // CHECK_GLSL: (packSnorm4x8 + // + // CHECK_METAL: unpack_snorm4x8_to_float + // CHECK_METAL: pack_float_to_snorm4x8 + // CHECK_METAL: unpack_snorm4x8_to_half + // CHECK_METAL: pack_half_to_snorm4x8 + // + // CHECK_WGSL: (unpack4x8snorm + // CHECK_WGSL: (pack4x8snorm + // + float4 snorm4x8Expected = float4(-0.500, 0.250, -0.750, 0.999); + uint snorm4x8Packed = packSnorm4x8(snorm4x8Expected); + float4 snorm4x8Actual = unpackSnorm4x8ToFloat(snorm4x8Packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(snorm4x8Expected, snorm4x8Actual); + + half4 snorm4x8HalfExpected = half4(-0.333h, 0.666h, -1.000h, 0.500h); + uint snorm4x8HalfPacked = packSnorm4x8(snorm4x8HalfExpected); + half4 snorm4x8HalfActual = unpackSnorm4x8ToHalf(snorm4x8HalfPacked); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(snorm4x8HalfExpected, snorm4x8HalfActual); + + // + // Unorm2x16 + // + // CHECK_SPV: OpExtInst{{.*}} PackUnorm2x16 + // CHECK_SPV: OpExtInst{{.*}} UnpackUnorm2x16 + // + // CHECK_GLSL: (unpackUnorm2x16 + // CHECK_GLSL: (packUnorm2x16 + // + // CHECK_METAL: unpack_unorm2x16_to_float + // CHECK_METAL: pack_float_to_unorm2x16 + // CHECK_METAL: unpack_unorm2x16_to_half + // CHECK_METAL: pack_half_to_unorm2x16 + // + // CHECK_WGSL: (unpack2x16unorm + // CHECK_WGSL: (pack2x16unorm + // + float2 unorm2x16Expected = float2(0.1234, 0.8765); + uint unorm2x16Packed = packUnorm2x16(unorm2x16Expected); + float2 unorm2x16Actual = unpackUnorm2x16ToFloat(unorm2x16Packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(unorm2x16Expected, unorm2x16Actual); + + half2 unorm2x16HalfExpected = half2(0.7777h, 0.7777h); + uint unorm2x16HalfPacked = packUnorm2x16(unorm2x16HalfExpected); + half2 unorm2x16HalfActual = unpackUnorm2x16ToHalf(unorm2x16HalfPacked); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(unorm2x16HalfExpected, unorm2x16HalfActual); + + // + // Snorm2x16 + // + // CHECK_SPV: OpExtInst{{.*}} UnpackSnorm2x16 + // CHECK_SPV: OpExtInst{{.*}} PackSnorm2x16 + // + // CHECK_GLSL: (unpackSnorm2x16 + // CHECK_GLSL: (packSnorm2x16 + // + // CHECK_METAL: unpack_snorm2x16_to_float + // CHECK_METAL: pack_float_to_snorm2x16 + // CHECK_METAL: unpack_snorm2x16_to_half + // CHECK_METAL: pack_half_to_snorm2x16 + // + // CHECK_WGSL: (unpack2x16snorm + // CHECK_WGSL: (pack2x16snorm + // + float2 snorm2x16Expected = float2(-0.4444, 0.8888); + uint snorm2x16Packed = packSnorm2x16(snorm2x16Expected); + float2 snorm2x16Actual = unpackSnorm2x16ToFloat(snorm2x16Packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(snorm2x16Expected, snorm2x16Actual); + + half2 snorm2x16HalfExpected = half2(-0.9999h, 0.3333h); + uint snorm2x16HalfPacked = packSnorm2x16(snorm2x16HalfExpected); + half2 snorm2x16HalfActual = unpackSnorm2x16ToHalf(snorm2x16HalfPacked); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(snorm2x16HalfExpected, snorm2x16HalfActual); + + // + // Half2x16 + // + // CHECK_SPV: OpExtInst{{.*}} UnpackHalf2x16 + // CHECK_SPV: OpExtInst{{.*}} PackHalf2x16 + // + // CHECK_GLSL: (unpackHalf2x16 + // CHECK_GLSL: (packHalf2x16 + // + // CHECK_WGSL: (unpack2x16float + // CHECK_WGSL: (pack2x16float + // + float2 half2x16Expected = float2(130.32, -12.12); + uint half2x16Packed = packHalf2x16(half2x16Expected); + float2 half2x16Actual = unpackHalf2x16ToFloat(half2x16Packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(half2x16Expected.x, half2x16Actual.x, 0.1); + + half2 half2x16HalfExpected = half2(-2.1111h, 4450.9999h); + uint half2x16HalfPacked = packHalf2x16(half2x16HalfExpected); + half2 half2x16HalfActual = unpackHalf2x16ToHalf(half2x16HalfPacked); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(half2x16HalfExpected, half2x16HalfActual); +} diff --git a/tests/hlsl-intrinsic/packed/unpack-float.slang b/tests/hlsl-intrinsic/packed/unpack-float.slang new file mode 100644 index 0000000000..c7eb5ebc08 --- /dev/null +++ b/tests/hlsl-intrinsic/packed/unpack-float.slang @@ -0,0 +1,64 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -emit-spirv-via-glsl -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -emit-spirv-directly -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -shaderobj -render-feature hardware-device -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-metal -compute -shaderobj -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute -shaderobj -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-wgpu -compute -shaderobj -allow-glsl +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute -g0 -allow-glsl + +//TEST_INPUT:ubuffer(data=[0x12345678], stride=4):name inputBuffer +StructuredBuffer inputBuffer; + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +bool verifyResult (T expected, T actual, T tolerance) +{ + return (expected - tolerance) <= actual && actual <= (expected + tolerance); +} + +bool verifyResultVector(vector expected, vector actual, T tolerance = T(0.01)) +{ + bool isValid = true; + for (int i = 0; i < N; ++i) + isValid = isValid && verifyResult(expected[i], actual[i], tolerance); + return isValid; +} + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint packed = inputBuffer[0]; + uint index = 0U; + + // + // Test GLSL intrinsics for unpacking floating points. + // Packing intrinsics are tested in `tests/hlsl-intrinsic/packed/pack-unpack-float.slang`. + // + + float4 u4x8Expected = float4(0.4706, 0.3373, 0.2039, 0.0706); + float4 u4x8Float = unpackUnorm4x8(packed); + // BUF: 1 + outputBuffer[index++] = verifyResultVector(u4x8Expected, u4x8Float); + + float4 s4x8Expected = float4(0.9449, 0.6772, 0.4094, 0.1417); + float4 s4x8Float = unpackSnorm4x8(packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(s4x8Expected, s4x8Float); + + float2 u2x16Expected = float2(0.3377, 0.0711); + float2 u2x16Float = unpackUnorm2x16(packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(u2x16Expected, u2x16Float); + + float2 s2x16Expected = float2(0.6756, 0.1422); + float2 s2x16Float = unpackSnorm2x16(packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(s2x16Expected, s2x16Float); + + float2 h2x16Expected = float2(103.5, 0.000757); + float2 h2x16Float = unpackHalf2x16(packed); + // BUF-NEXT: 1 + outputBuffer[index++] = verifyResultVector(h2x16Expected, h2x16Float); +} From 864ebf0d832f5bbd8b191d8659b87deb69d0b474 Mon Sep 17 00:00:00 2001 From: fairywreath Date: Sat, 1 Mar 2025 18:00:52 -0500 Subject: [PATCH 2/3] remove unused functions and update caps in glsl meta file --- source/slang/glsl.meta.slang | 86 +++--------------------------------- 1 file changed, 5 insertions(+), 81 deletions(-) diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index fd2832d2f3..46f3189dcf 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -617,83 +617,7 @@ public vector uintBitsToFloat(highp vector x) [__readNone] [ForceInline] -uint packUnorm1x16(float c) -{ - return uint(round(clamp(c, 0.0, 1.0) * 65535.0)); -} - -[__readNone] -[ForceInline] -uint packSnorm1x16(float v) -{ - return uint(round(clamp(v ,-1.0, 1.0) * 32767.0)); -} - -[__readNone] -[ForceInline] -uint packUnorm1x8(float c) -{ - return uint(round(clamp(c, 0.0, 1.0) * 255.0)); -} - -[__readNone] -[ForceInline] -uint packSnorm1x8(float c) -{ - return uint(round(clamp(c, -1.0, 1.0) * 127.0)); -} - -[__readNone] -[ForceInline] -float unpackUnorm1x16(uint p) -{ - const uint wordMask = 0xffff; - return float(p & wordMask) / 65535.0; -} - -[__readNone] -[ForceInline] -float unpackSnorm1x16(uint p) -{ - const uint wordMask = 0xffff; - return clamp(float(p & wordMask) / 32767.0, -1.0, 1.0); -} - -[__readNone] -[ForceInline] -float unpackUnorm1x8(uint p) -{ - const uint byteMask = 0xff; - return float(p & byteMask) / 255.0; -} - -[__readNone] -[ForceInline] -float unpackSnorm1x8(uint p) -{ - const uint byteMask = 0xff; - return clamp(float(p & byteMask) / 127.0, -1.0, 1.0); -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -uint float2half(float f) -{ - uint u = floatBitsToUint(f); - uint s = ((u >> uint(16)) & uint(0x8000)); - uint e = 0; - uint m = ((u >> uint(13)) & uint(0x03ff)); - if (m != 0) - { - e = ((((u & uint(0x7f800000)) - uint(0x38000000)) >> uint(13)) & uint(0x7c00)); - } - return (s | e | m); -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] public vec2 unpackUnorm2x16(uint p) { return unpackUnorm2x16ToFloat(p); @@ -701,7 +625,7 @@ public vec2 unpackUnorm2x16(uint p) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] public vec2 unpackSnorm2x16(uint p) { return unpackSnorm2x16ToFloat(p); @@ -709,7 +633,7 @@ public vec2 unpackSnorm2x16(uint p) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] public vec4 unpackUnorm4x8(highp uint p) { return unpackUnorm4x8ToFloat(p); @@ -717,7 +641,7 @@ public vec4 unpackUnorm4x8(highp uint p) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] public vec4 unpackSnorm4x8(highp uint p) { return unpackSnorm4x8ToFloat(p); @@ -745,7 +669,7 @@ public float half2float(uint h) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] public vec2 unpackHalf2x16(uint p) { return unpackHalf2x16ToFloat(p); From ed743bbb0b11698832a422b71cc56f4192eb2c4e Mon Sep 17 00:00:00 2001 From: fairywreath Date: Fri, 7 Mar 2025 15:56:40 -0600 Subject: [PATCH 3/3] rename pack capability --- .../a3-02-reference-capability-atoms.md | 4 +- source/slang/glsl.meta.slang | 10 +-- source/slang/hlsl.meta.slang | 88 +++++++++---------- source/slang/slang-capabilities.capdef | 4 +- 4 files changed, 53 insertions(+), 53 deletions(-) diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index b676db166c..696e0fa949 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -1076,8 +1076,8 @@ Compound Capabilities `shader5_sm_5_0` > Capabilities required to use sm_5_0 features apart of GL_ARB_gpu_shader5 -`pack_unpack` -> Capabilities required to use pack/unpack intrinsics +`pack_vector` +> Capabilities required to use pack/unpack intrinsics on packed vector data `subgroup_basic` > Capabilities required to use GLSL-style subgroup operations 'subgroup_basic' diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 46f3189dcf..4412ae4607 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -617,7 +617,7 @@ public vector uintBitsToFloat(highp vector x) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec2 unpackUnorm2x16(uint p) { return unpackUnorm2x16ToFloat(p); @@ -625,7 +625,7 @@ public vec2 unpackUnorm2x16(uint p) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec2 unpackSnorm2x16(uint p) { return unpackSnorm2x16ToFloat(p); @@ -633,7 +633,7 @@ public vec2 unpackSnorm2x16(uint p) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec4 unpackUnorm4x8(highp uint p) { return unpackUnorm4x8ToFloat(p); @@ -641,7 +641,7 @@ public vec4 unpackUnorm4x8(highp uint p) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec4 unpackSnorm4x8(highp uint p) { return unpackSnorm4x8ToFloat(p); @@ -669,7 +669,7 @@ public float half2float(uint h) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec2 unpackHalf2x16(uint p) { return unpackHalf2x16ToFloat(p); diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 2ef22c6dbe..ffce2a4e04 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -24172,7 +24172,7 @@ typealias int8_t4_packed = uint; /// Unpack 4 signed 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int16_t4 unpack_s8s16(int8_t4_packed packed) { return unpackInt4x8ToInt16(packed); @@ -24181,7 +24181,7 @@ int16_t4 unpack_s8s16(int8_t4_packed packed) /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint16_t4 unpack_u8u16(uint8_t4_packed packed) { return unpackUint4x8ToUint16(packed); @@ -24190,7 +24190,7 @@ uint16_t4 unpack_u8u16(uint8_t4_packed packed) /// Unpack 4 signed 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int32_t4 unpack_s8s32(int8_t4_packed packed) { return unpackInt4x8ToInt32(packed); @@ -24199,7 +24199,7 @@ int32_t4 unpack_s8s32(int8_t4_packed packed) /// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint32_t4 unpack_u8u32(uint8_t4_packed packed) { return unpackUint4x8ToUint32(packed); @@ -24208,7 +24208,7 @@ uint32_t4 unpack_u8u32(uint8_t4_packed packed) /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint8_t4_packed pack_u8(uint32_t4 unpackedValue) { return packUint4x8(unpackedValue); @@ -24217,7 +24217,7 @@ uint8_t4_packed pack_u8(uint32_t4 unpackedValue) /// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int8_t4_packed pack_s8(int32_t4 unpackedValue) { return packInt4x8(unpackedValue); @@ -24226,7 +24226,7 @@ int8_t4_packed pack_s8(int32_t4 unpackedValue) /// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint8_t4_packed pack_u8(uint16_t4 unpackedValue) { return packUint4x8(unpackedValue); @@ -24235,7 +24235,7 @@ uint8_t4_packed pack_u8(uint16_t4 unpackedValue) /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int8_t4_packed pack_s8(int16_t4 unpackedValue) { return packInt4x8(unpackedValue); @@ -24245,7 +24245,7 @@ int8_t4_packed pack_s8(int16_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue) { return packUint4x8Clamp(unpackedValue); @@ -24255,7 +24255,7 @@ uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue) { return packInt4x8Clamp(unpackedValue); @@ -24265,7 +24265,7 @@ int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue) { return packUint4x8Clamp(unpackedValue); @@ -24275,7 +24275,7 @@ uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue) { return packInt4x8Clamp(unpackedValue); @@ -24384,7 +24384,7 @@ uint __packInt2x16(int32_t2 unpackedValue) /// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint32_t4 unpackUint4x8ToUint32(uint packedValue) { __target_switch @@ -24411,7 +24411,7 @@ uint32_t4 unpackUint4x8ToUint32(uint packedValue) /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint16_t4 unpackUint4x8ToUint16(uint packedValue) { __target_switch @@ -24437,7 +24437,7 @@ uint16_t4 unpackUint4x8ToUint16(uint packedValue) /// Unpack 4 signed 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int32_t4 unpackInt4x8ToInt32(uint packedValue) { __target_switch @@ -24464,7 +24464,7 @@ int32_t4 unpackInt4x8ToInt32(uint packedValue) /// Unpack 4 signed 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int16_t4 unpackInt4x8ToInt16(uint packedValue) { __target_switch @@ -24490,7 +24490,7 @@ int16_t4 unpackInt4x8ToInt16(uint packedValue) /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUint4x8(uint32_t4 unpackedValue) { __target_switch @@ -24508,7 +24508,7 @@ uint packUint4x8(uint32_t4 unpackedValue) /// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUint4x8(uint16_t4 unpackedValue) { __target_switch @@ -24522,7 +24522,7 @@ uint packUint4x8(uint16_t4 unpackedValue) /// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packInt4x8(int32_t4 unpackedValue) { __target_switch @@ -24537,7 +24537,7 @@ uint packInt4x8(int32_t4 unpackedValue) /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packInt4x8(int16_t4 unpackedValue) { __target_switch @@ -24552,7 +24552,7 @@ uint packInt4x8(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUint4x8Clamp(int32_t4 unpackedValue) { __target_switch @@ -24568,7 +24568,7 @@ uint packUint4x8Clamp(int32_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUint4x8Clamp(int16_t4 unpackedValue) { __target_switch @@ -24583,7 +24583,7 @@ uint packUint4x8Clamp(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packInt4x8Clamp(int32_t4 unpackedValue) { __target_switch @@ -24599,7 +24599,7 @@ uint packInt4x8Clamp(int32_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packInt4x8Clamp(int16_t4 unpackedValue) { __target_switch @@ -24621,7 +24621,7 @@ uint packInt4x8Clamp(int16_t4 unpackedValue) /// floating-point value to generate a 4-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] float4 unpackUnorm4x8ToFloat(uint packedValue) { __target_switch @@ -24645,7 +24645,7 @@ float4 unpackUnorm4x8ToFloat(uint packedValue) /// floating-point value to generate a 4-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] half4 unpackUnorm4x8ToHalf(uint packedValue) { __target_switch @@ -24661,7 +24661,7 @@ half4 unpackUnorm4x8ToHalf(uint packedValue) /// floating-point value to generate a 4-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] float4 unpackSnorm4x8ToFloat(uint packedValue) { __target_switch @@ -24685,7 +24685,7 @@ float4 unpackSnorm4x8ToFloat(uint packedValue) /// floating-point value to generate a 4-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] half4 unpackSnorm4x8ToHalf(uint packedValue) { __target_switch @@ -24701,7 +24701,7 @@ half4 unpackSnorm4x8ToHalf(uint packedValue) /// floating-point value to generate a 2-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] float2 unpackUnorm2x16ToFloat(uint packedValue) { __target_switch @@ -24725,7 +24725,7 @@ float2 unpackUnorm2x16ToFloat(uint packedValue) /// floating-point value to generate a 2-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] half2 unpackUnorm2x16ToHalf(uint packedValue) { __target_switch @@ -24741,7 +24741,7 @@ half2 unpackUnorm2x16ToHalf(uint packedValue) /// floating-point value to generate a 2-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] float2 unpackSnorm2x16ToFloat(uint packedValue) { __target_switch @@ -24765,7 +24765,7 @@ float2 unpackSnorm2x16ToFloat(uint packedValue) /// floating-point value to generate a 2-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] half2 unpackSnorm2x16ToHalf(uint packedValue) { __target_switch @@ -24781,7 +24781,7 @@ half2 unpackSnorm2x16ToHalf(uint packedValue) /// floating-point value to generate a 2-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] float2 unpackHalf2x16ToFloat(uint packedValue) { __target_switch @@ -24804,7 +24804,7 @@ float2 unpackHalf2x16ToFloat(uint packedValue) /// floating-point value to generate a 2-component vector. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] half2 unpackHalf2x16ToHalf(uint packedValue) { return half2(unpackHalf2x16ToFloat(packedValue)); @@ -24815,7 +24815,7 @@ half2 unpackHalf2x16ToHalf(uint packedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUnorm4x8(float4 unpackedValue) { __target_switch @@ -24839,7 +24839,7 @@ uint packUnorm4x8(float4 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUnorm4x8(half4 unpackedValue) { __target_switch @@ -24855,7 +24855,7 @@ uint packUnorm4x8(half4 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packSnorm4x8(float4 unpackedValue) { __target_switch @@ -24879,7 +24879,7 @@ uint packSnorm4x8(float4 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packSnorm4x8(half4 unpackedValue) { __target_switch @@ -24895,7 +24895,7 @@ uint packSnorm4x8(half4 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUnorm2x16(float2 unpackedValue) { __target_switch @@ -24919,7 +24919,7 @@ uint packUnorm2x16(float2 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUnorm2x16(half2 unpackedValue) { __target_switch @@ -24935,7 +24935,7 @@ uint packUnorm2x16(half2 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packSnorm2x16(float2 unpackedValue) { __target_switch @@ -24959,7 +24959,7 @@ uint packSnorm2x16(float2 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packSnorm2x16(half2 unpackedValue) { __target_switch @@ -24975,7 +24975,7 @@ uint packSnorm2x16(half2 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packHalf2x16(float2 unpackedValue) { __target_switch @@ -24998,7 +24998,7 @@ uint packHalf2x16(float2 unpackedValue) /// 32-bit unsigned integer. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_unpack)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packHalf2x16(half2 unpackedValue) { return packHalf2x16(float2(unpackedValue)); diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 9a3125268b..2285bd2e5a 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -1937,9 +1937,9 @@ alias shader5_sm_4_0 = GL_ARB_gpu_shader5 | sm_4_0_version; /// [Compound] alias shader5_sm_5_0 = GL_ARB_gpu_shader5 | sm_5_0_version; -/// Capabilities required to use pack/unpack intrinsics +/// Capabilities required to use pack/unpack intrinsics on packed vector data /// [Compound] -alias pack_unpack = GL_ARB_gpu_shader5 +alias pack_vector = GL_ARB_gpu_shader5 | _sm_6_6 | _cuda_sm_9_0 | wgsl