From f8956474e1e4d9bf612f630f4f157d9aa8ddb010 Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Wed, 30 Apr 2025 00:43:56 -0700 Subject: [PATCH] Fix return types of dot4add_i8packed, dot4add_u8packed, and dot2add. Change the definition of the HLSL `dot4add_i8packed`, `dot4add_u8packed`, and `dot2add` intrinsics in `utils/hct/gen_intrin_main.txt` to simply spell out the return types, rather than saying that their return type is determined by their third argument. This prevents DXC from trying to give those functions declarations like declare i64 @"\01?dot4add_u8packed@hlsl@@YA_JII_J@Z"(i32, i32, i64 signext) #1 which seems to expect a 64-bit third argument and return value. `HLSLExternalSource::MatchArguments` assumes that functions whose return type depends on their arguments' types will get cleaned up by `TryEvalInstrinsic`. Unfortunately, the `dot4add` variants cannot be constant expressions, so this cleanup does not happen for them. But these functions are not generic, and they have only one overload, so there is no need to use interesting `uComponentTypeId` values to get the right effects in the first place. Fixes #7400. --- tools/clang/lib/Sema/SemaHLSL.cpp | 4 +-- .../test/DXC/dot4add_i8_u8_packed-types.hlsl | 34 +++++++++++++++++++ utils/hct/gen_intrin_main.txt | 6 ++-- 3 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index ba0801dd52..ed0291b909 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6216,8 +6216,8 @@ bool HLSLExternalSource::MatchArguments( (iArg != retArgIdx && retTypeIdx == pIntrinsicArg->uComponentTypeId); // For literal arg which don't affect return type, find concrete type. // For literal arg affect return type, - // TryEvalIntrinsic in CGHLSLMS.cpp will take care of cases - // where all argumentss are literal. + // TryEvalIntrinsic in CGHLSLMSFinishCodeGen.cpp will take care of + // cases where all arguments are literal. // CombineBasicTypes will cover the rest cases. if (!affectRetType) { TypeInfoEltKind = diff --git a/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl b/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl new file mode 100644 index 0000000000..53c87bb9c1 --- /dev/null +++ b/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc /enable-16bit-types /T cs_6_8 %s | FileCheck %s + +// Compiling this HLSL would fail this assertion in TranslateDot4AddPacked: +// +// DXASSERT( +// !accTy->isVectorTy() && accTy->isIntegerTy(32), +// "otherwise, unexpected vector support in high level intrinsic template"); +// +// Bug was fixed by changing the declarations of dot4add_i8packed and +// dot4add_u8packed in utils/hct/gen_intrin_main.txt to simply write +// out their argument and return types, rather than using the $typeN +// reference syntax. + +// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddI8Packed(acc,a,b) +// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddU8Packed(acc,a,b) +// CHECK: call float @dx.op.dot2AddHalf.f32{{.*}}Dot2AddHalf(acc,ax,ay,bx,by) + +RWByteAddressBuffer buf; + +[numthreads(1, 1, 1)] +void main() +{ + int a = dot4add_i8packed(0, 0, 0); + int b = dot4add_i8packed(0, 0, a); + buf.Store(0, b); + + uint c = dot4add_u8packed(0, 0, 0); + uint d = dot4add_u8packed(0, 0, c); + buf.Store(4, d); + + float e = dot2add(half2(0,0), half2(0,0), 1.0); + float f = dot2add(half2(0,0), half2(0,0), e); + buf.Store(8, f); +} diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 7f7637b230..404ac6eb5a 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -336,9 +336,9 @@ float<4,3> [[rn]] ObjectToWorld4x3(); float<4,3> [[rn]] WorldToObject4x3(); // Packed dot products with accumulate: -$type3 [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c); -$type3 [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c); -$type3 [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c); +uint [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c); +int [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c); +float [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c); // Unpacking intrinsics int16_t<4> [[rn]] unpack_s8s16(in p32i8 pk);