Skip to content

Commit 553be8c

Browse files
committed
Fix return types of dot4add_i8packed, dot4add_u8packed, and dot2add.
Change the definition of the HLSL `dot4add_i8packed`, `dot4add_u8packed`, and `dot2add` intrinsics in `utils/hct/gen_intrin_main.txt` to simply spell out the return types, rather than saying that their return type is determined by their third argument. This prevents DXC from trying to give those functions declarations like declare i64 @"\01?dot4add_u8packed@hlsl@@YA_JII_J@Z"(i32, i32, i64 signext) #1 which seems to expect a 64-bit third argument and return value. `HLSLExternalSource::MatchArguments` assumes that functions whose return type depends on their arguments' types will get cleaned up by `TryEvalInstrinsic`. Unfortunately, the `dot4add` variants cannot be constant expressions, so this cleanup does not happen for them. But these functions are not generic, and they have only one overload, so there is no need to use interesting `uComponentTypeId` values to get the right effects in the first place. Fixes #7400.
1 parent 8df7449 commit 553be8c

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

tools/clang/lib/Sema/SemaHLSL.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -6737,8 +6737,8 @@ bool HLSLExternalSource::MatchArguments(
67376737
(iArg != retArgIdx && retTypeIdx == pIntrinsicArg->uComponentTypeId);
67386738
// For literal arg which don't affect return type, find concrete type.
67396739
// For literal arg affect return type,
6740-
// TryEvalIntrinsic in CGHLSLMS.cpp will take care of cases
6741-
// where all argumentss are literal.
6740+
// TryEvalIntrinsic in CGHLSLMSFinishCodeGen.cpp will take care of
6741+
// cases where all arguments are literal.
67426742
// CombineBasicTypes will cover the rest cases.
67436743
if (!affectRetType) {
67446744
TypeInfoEltKind =
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %dxc /enable-16bit-types /T cs_6_8 %s | FileCheck %s
2+
3+
// Compiling this HLSL would fail this assertion in TranslateDot4AddPacked:
4+
//
5+
// DXASSERT(
6+
// !accTy->isVectorTy() && accTy->isIntegerTy(32),
7+
// "otherwise, unexpected vector support in high level intrinsic template");
8+
//
9+
// Bug was fixed by changing the declarations of dot4add_i8packed and
10+
// dot4add_u8packed in utils/hct/gen_intrin_main.txt to simply write
11+
// out their argument and return types, rather than using the $typeN
12+
// reference syntax.
13+
14+
// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddI8Packed(acc,a,b)
15+
// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddU8Packed(acc,a,b)
16+
// CHECK: call float @dx.op.dot2AddHalf.f32{{.*}}Dot2AddHalf(acc,ax,ay,bx,by)
17+
18+
RWByteAddressBuffer buf;
19+
20+
[numthreads(1, 1, 1)]
21+
void main()
22+
{
23+
int a = dot4add_i8packed(0, 0, 0);
24+
int b = dot4add_i8packed(0, 0, a);
25+
buf.Store<int>(0, b);
26+
27+
uint c = dot4add_u8packed(0, 0, 0);
28+
uint d = dot4add_u8packed(0, 0, c);
29+
buf.Store<uint>(4, d);
30+
31+
float e = dot2add(half2(0,0), half2(0,0), 1.0);
32+
float f = dot2add(half2(0,0), half2(0,0), e);
33+
buf.Store<float>(8, f);
34+
}

utils/hct/gen_intrin_main.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,9 @@ float<4,3> [[rn]] ObjectToWorld4x3();
339339
float<4,3> [[rn]] WorldToObject4x3();
340340

341341
// Packed dot products with accumulate:
342-
$type3 [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c);
343-
$type3 [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c);
344-
$type3 [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c);
342+
uint [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c);
343+
int [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c);
344+
float [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c);
345345

346346
// Unpacking intrinsics
347347
int16_t<4> [[rn]] unpack_s8s16(in p32i8 pk);

0 commit comments

Comments
 (0)