Skip to content

Commit 377c4ca

Browse files
authored
Fix the return types of dot4add_i8packed and dot4add_u8packed. (#7401)
Change the definition of the HLSL `dot4add_i8packed` and `dot4add_u8packed` intrinsics in `utils/hct/gen_intrin_main.txt` to simply spell out the return types, rather than saying that their return type is determined by their third argument. This prevents DXC from trying to give those functions declarations like declare i64 @"\01?dot4add_u8packed@hlsl@@YA_JII_J@Z"(i32, i32, i64 signext) #1 which seems to expect a 64-bit third argument and return value. These functions are not generic, and they have only one overload, so there is no need to use interesting `uComponentTypeId` values to get the right effects, and `HLSLExternalSource::MatchArguments` seems to get confused about how to treat argument types that affect the return types. Fixes #7400.
1 parent 474f9d2 commit 377c4ca

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

tools/clang/lib/Sema/SemaHLSL.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6737,8 +6737,8 @@ bool HLSLExternalSource::MatchArguments(
67376737
(iArg != retArgIdx && retTypeIdx == pIntrinsicArg->uComponentTypeId);
67386738
// For literal arg which don't affect return type, find concrete type.
67396739
// For literal arg affect return type,
6740-
// TryEvalIntrinsic in CGHLSLMS.cpp will take care of cases
6741-
// where all argumentss are literal.
6740+
// TryEvalIntrinsic in CGHLSLMSFinishCodeGen.cpp will take care of
6741+
// cases where all arguments are literal.
67426742
// CombineBasicTypes will cover the rest cases.
67436743
if (!affectRetType) {
67446744
TypeInfoEltKind =
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %dxc /enable-16bit-types /T cs_6_8 %s | FileCheck %s
2+
3+
// Compiling this HLSL would fail this assertion in TranslateDot4AddPacked:
4+
//
5+
// DXASSERT(
6+
// !accTy->isVectorTy() && accTy->isIntegerTy(32),
7+
// "otherwise, unexpected vector support in high level intrinsic template");
8+
//
9+
// Bug was fixed by changing the declarations of dot4add_i8packed and
10+
// dot4add_u8packed in utils/hct/gen_intrin_main.txt to simply write
11+
// out their argument and return types, rather than using the $typeN
12+
// reference syntax.
13+
14+
// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddI8Packed(acc,a,b)
15+
// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddU8Packed(acc,a,b)
16+
// CHECK: call float @dx.op.dot2AddHalf.f32{{.*}}Dot2AddHalf(acc,ax,ay,bx,by)
17+
18+
RWByteAddressBuffer buf;
19+
20+
[numthreads(1, 1, 1)]
21+
void main()
22+
{
23+
int a = dot4add_i8packed(0, 0, 0);
24+
int b = dot4add_i8packed(0, 0, a);
25+
buf.Store<int>(0, b);
26+
27+
uint c = dot4add_u8packed(0, 0, 0);
28+
uint d = dot4add_u8packed(0, 0, c);
29+
buf.Store<uint>(4, d);
30+
31+
float e = dot2add(half2(0,0), half2(0,0), 1.0);
32+
float f = dot2add(half2(0,0), half2(0,0), e);
33+
buf.Store<float>(8, f);
34+
}

utils/hct/gen_intrin_main.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,9 @@ float<4,3> [[rn]] ObjectToWorld4x3();
339339
float<4,3> [[rn]] WorldToObject4x3();
340340

341341
// Packed dot products with accumulate:
342-
$type3 [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c);
343-
$type3 [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c);
344-
$type3 [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c);
342+
uint [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c);
343+
int [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c);
344+
float [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c);
345345

346346
// Unpacking intrinsics
347347
int16_t<4> [[rn]] unpack_s8s16(in p32i8 pk);

0 commit comments

Comments
 (0)