Skip to content

Commit

Permalink
[AIE2] Change scl2vec intrinsic shift amount type to unsigned int
Browse files Browse the repository at this point in the history
This matches the type in the low level intrinsic spec
  • Loading branch information
konstantinschwarz committed Aug 29, 2024
1 parent 396572a commit 00f5f74
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 62 deletions.
86 changes: 48 additions & 38 deletions clang/lib/Headers/aiev2_scl2vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,128 +11,138 @@
#ifndef __AIEV2_SCL2VEC_H__
#define __AIEV2_SCL2VEC_H__

INTRINSIC(v128int4) shiftx(v128int4 a, v128int4 b, int step, int shift) {
INTRINSIC(v128int4)
shiftx(v128int4 a, v128int4 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
INTRINSIC(v64int8) shiftx(v64int8 a, v64int8 b, int step, int shift) {
INTRINSIC(v64int8) shiftx(v64int8 a, v64int8 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
INTRINSIC(v32int16) shiftx(v32int16 a, v32int16 b, int step, int shift) {
INTRINSIC(v32int16)
shiftx(v32int16 a, v32int16 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
INTRINSIC(v16int32) shiftx(v16int32 a, v16int32 b, int step, int shift) {
INTRINSIC(v16int32)
shiftx(v16int32 a, v16int32 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
INTRINSIC(v128uint4) shiftx(v128uint4 a, v128uint4 b, int step, int shift) {
INTRINSIC(v128uint4)
shiftx(v128uint4 a, v128uint4 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
INTRINSIC(v64uint8) shiftx(v64uint8 a, v64uint8 b, int step, int shift) {
INTRINSIC(v64uint8)
shiftx(v64uint8 a, v64uint8 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
INTRINSIC(v32uint16) shiftx(v32uint16 a, v32uint16 b, int step, int shift) {
INTRINSIC(v32uint16)
shiftx(v32uint16 a, v32uint16 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
INTRINSIC(v16uint32) shiftx(v16uint32 a, v16uint32 b, int step, int shift) {
INTRINSIC(v16uint32)
shiftx(v16uint32 a, v16uint32 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
#if 0
INTRINSIC(v16cint16) shiftx(v16cint16 a, v16cint16 b, int step, int shift) {
INTRINSIC(v16cint16) shiftx(v16cint16 a, v16cint16 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
INTRINSIC(v8cint32) shiftx(v8cint32 a, v8cint32 b, int step, int shift) {
INTRINSIC(v8cint32) shiftx(v8cint32 a, v8cint32 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}
#endif
INTRINSIC(v32bfloat16)
shiftx(v32bfloat16 a, v32bfloat16 b, int step, int shift) {
shiftx(v32bfloat16 a, v32bfloat16 b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_bf512_bf512(a, b, step, shift);
}
INTRINSIC(v16accfloat)
shiftx(v16accfloat a, v16accfloat b, int step, int shift) {
shiftx(v16accfloat a, v16accfloat b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}

INTRINSIC(v16float) shiftx(v16float a, v16float b, int step, int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
INTRINSIC(v16float)
shiftx(v16float a, v16float b, int step, unsigned int shift) {
return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
}

INTRINSIC(v128int4) shift_bytes(v128int4 a, v128int4 b, int shift) {
INTRINSIC(v128int4) shift_bytes(v128int4 a, v128int4 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v64int8) shift_bytes(v64int8 a, v64int8 b, int shift) {
INTRINSIC(v64int8) shift_bytes(v64int8 a, v64int8 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v32int16) shift_bytes(v32int16 a, v32int16 b, int shift) {
INTRINSIC(v32int16) shift_bytes(v32int16 a, v32int16 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v16int32) shift_bytes(v16int32 a, v16int32 b, int shift) {
INTRINSIC(v16int32) shift_bytes(v16int32 a, v16int32 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v128uint4) shift_bytes(v128uint4 a, v128uint4 b, int shift) {
INTRINSIC(v128uint4) shift_bytes(v128uint4 a, v128uint4 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v64uint8) shift_bytes(v64uint8 a, v64uint8 b, int shift) {
INTRINSIC(v64uint8) shift_bytes(v64uint8 a, v64uint8 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v32uint16) shift_bytes(v32uint16 a, v32uint16 b, int shift) {
INTRINSIC(v32uint16) shift_bytes(v32uint16 a, v32uint16 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v16uint32) shift_bytes(v16uint32 a, v16uint32 b, int shift) {
INTRINSIC(v16uint32) shift_bytes(v16uint32 a, v16uint32 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
#if 0
INTRINSIC(v16cint16) shift_bytes(v16cint16 a, v16cint16 b, int shift) {
INTRINSIC(v16cint16) shift_bytes(v16cint16 a, v16cint16 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v8cint32) shift_bytes(v8cint32 a, v8cint32 b, int shift) {
INTRINSIC(v8cint32) shift_bytes(v8cint32 a, v8cint32 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
#endif
INTRINSIC(v32bfloat16) shift_bytes(v32bfloat16 a, v32bfloat16 b, int shift) {
INTRINSIC(v32bfloat16)
shift_bytes(v32bfloat16 a, v32bfloat16 b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}
INTRINSIC(v16accfloat) shift_bytes(v16accfloat a, v16accfloat b, int shift) {
INTRINSIC(v16accfloat)
shift_bytes(v16accfloat a, v16accfloat b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}

INTRINSIC(v16float) shift_bytes(v16float a, v16float b, int shift) {
INTRINSIC(v16float) shift_bytes(v16float a, v16float b, unsigned int shift) {
return shiftx(a, b, 0, shift);
}

INTRINSIC(v64int8) shift(v64int8 a, v64int8 b, int shift) {
INTRINSIC(v64int8) shift(v64int8 a, v64int8 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 1);
}
INTRINSIC(v32int16) shift(v32int16 a, v32int16 b, int shift) {
INTRINSIC(v32int16) shift(v32int16 a, v32int16 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 2);
}
INTRINSIC(v16int32) shift(v16int32 a, v16int32 b, int shift) {
INTRINSIC(v16int32) shift(v16int32 a, v16int32 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 4);
}
INTRINSIC(v64uint8) shift(v64uint8 a, v64uint8 b, int shift) {
INTRINSIC(v64uint8) shift(v64uint8 a, v64uint8 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 1);
}
INTRINSIC(v32uint16) shift(v32uint16 a, v32uint16 b, int shift) {
INTRINSIC(v32uint16) shift(v32uint16 a, v32uint16 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 2);
}
INTRINSIC(v16uint32) shift(v16uint32 a, v16uint32 b, int shift) {
INTRINSIC(v16uint32) shift(v16uint32 a, v16uint32 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 4);
}
#if 0
INTRINSIC(v16cint16) shift(v16cint16 a, v16cint16 b, int shift) {
INTRINSIC(v16cint16) shift(v16cint16 a, v16cint16 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 4);
}
INTRINSIC(v8cint32) shift(v8cint32 a, v8cint32 b, int shift) {
INTRINSIC(v8cint32) shift(v8cint32 a, v8cint32 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 8);
}
#endif
INTRINSIC(v32bfloat16) shift(v32bfloat16 a, v32bfloat16 b, int shift) {
INTRINSIC(v32bfloat16) shift(v32bfloat16 a, v32bfloat16 b, unsigned int shift) {
return shiftx(a, b, 0, shift * 2);
}
INTRINSIC(v16accfloat) shift(v16accfloat a, v16accfloat b, int shift) {
INTRINSIC(v16accfloat) shift(v16accfloat a, v16accfloat b, unsigned int shift) {
return shiftx(a, b, 0, shift * 4);
}

INTRINSIC(v16float) shift(v16float a, v16float b, int shift) {
INTRINSIC(v16float) shift(v16float a, v16float b, unsigned int shift) {
return shiftx(a, b, 0, shift * 4);
}

Expand Down
48 changes: 24 additions & 24 deletions clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,48 +11,48 @@
// RUN: %clang -O2 %s --target=aie2 -nostdlibinc -S -emit-llvm -o - | FileCheck %s


// CHECK-LABEL: @_Z11test_shiftxDv16_iS_ii(
// CHECK-LABEL: @_Z11test_shiftxDv16_iS_ij(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]])
// CHECK-NEXT: ret <16 x i32> [[TMP0]]
//
v16int32 test_shiftx(v16int32 a, v16int32 b, int step, int shift) {
v16int32 test_shiftx(v16int32 a, v16int32 b, int step, unsigned int shift) {
return shiftx(a,b,step,shift);
}

// CHECK-LABEL: @_Z11test_shiftxDv32_tS_ii(
// CHECK-LABEL: @_Z11test_shiftxDv32_tS_ij(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <32 x i16> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <32 x i16>
// CHECK-NEXT: ret <32 x i16> [[TMP3]]
//
v32uint16 test_shiftx(v32uint16 a, v32uint16 b, int step, int shift) {
v32uint16 test_shiftx(v32uint16 a, v32uint16 b, int step, unsigned int shift) {
return shiftx(a,b,step,shift);
}

// CHECK-LABEL: @_Z16test_shift_bytesDv64_aS_i(
// CHECK-LABEL: @_Z16test_shift_bytesDv64_aS_j(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <64 x i8> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[SHIFT:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8>
// CHECK-NEXT: ret <64 x i8> [[TMP3]]
//
v64int8 test_shift_bytes(v64int8 a, v64int8 b, int shift) {
v64int8 test_shift_bytes(v64int8 a, v64int8 b, unsigned int shift) {
return shift_bytes(a,b,shift);
}

// CHECK-LABEL: @_Z10test_shiftDv64_hS_i(
// CHECK-LABEL: @_Z10test_shiftDv64_hS_j(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <64 x i8> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[SHIFT_BY:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8>
// CHECK-NEXT: ret <64 x i8> [[TMP3]]
//
v64uint8 test_shift(v64uint8 a, v64uint8 b, int shift_by) {
v64uint8 test_shift(v64uint8 a, v64uint8 b, unsigned int shift_by) {
return shift(a,b,shift_by);
}

Expand Down Expand Up @@ -1117,40 +1117,40 @@ unsigned long long test_ext_u64(v16int32 v, int idx, int sign) {

/* Test Intrinsic using ACCFLOAT type */

// CHECK-LABEL: @_Z11test_shiftxDv16_u10__accfloatS_ii(
// CHECK-LABEL: @_Z11test_shiftxDv16_u10__accfloatS_ij(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
// CHECK-NEXT: ret <8 x i64> [[TMP3]]
//
v16accfloat test_shiftx(v16accfloat a, v16accfloat b, int step, int shift) {
v16accfloat test_shiftx(v16accfloat a, v16accfloat b, int step, unsigned int shift) {
return shiftx(a,b,step,shift);
}

// CHECK-LABEL: @_Z16test_shift_bytesDv16_u10__accfloatS_i(
// CHECK-LABEL: @_Z16test_shift_bytesDv16_u10__accfloatS_j(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[SHIFT:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
// CHECK-NEXT: ret <8 x i64> [[TMP3]]
//
v16accfloat test_shift_bytes(v16accfloat a, v16accfloat b, int shift) {
v16accfloat test_shift_bytes(v16accfloat a, v16accfloat b, unsigned int shift) {
return shift_bytes(a,b,shift);
}

// CHECK-LABEL: @_Z10test_shiftDv16_u10__accfloatS_i(
// CHECK-LABEL: @_Z10test_shiftDv16_u10__accfloatS_j(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[MUL_I:%.*]] = shl nsw i32 [[SHIFT_BY:%.*]], 2
// CHECK-NEXT: [[MUL_I:%.*]] = shl i32 [[SHIFT_BY:%.*]], 2
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[MUL_I]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
// CHECK-NEXT: ret <8 x i64> [[TMP3]]
//
v16accfloat test_shift(v16accfloat a, v16accfloat b, int shift_by) {
v16accfloat test_shift(v16accfloat a, v16accfloat b, unsigned int shift_by) {
return shift(a, b, shift_by);
}

Expand All @@ -1176,12 +1176,12 @@ v16float test_broadcast_to_v16float (float b) {
return broadcast_to_v16float(b);
}

// CHECK-LABEL: @_Z11test_shiftxDv32_u6__bf16S_ii(
// CHECK-LABEL: @_Z11test_shiftxDv32_u6__bf16S_ij(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vshift.bf512.bf512(<32 x bfloat> [[A:%.*]], <32 x bfloat> [[B:%.*]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]])
// CHECK-NEXT: ret <32 x bfloat> [[TMP0]]
//
v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, int shift) {
v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, unsigned int shift) {
return shiftx(a, b, step, shift);
}

Expand Down Expand Up @@ -1358,40 +1358,40 @@ v16float test_shuffle(v16float a, unsigned int mode) {
return shuffle(a, mode);
}

// CHECK-LABEL: @_Z11test_shiftxDv16_fS_ii(
// CHECK-LABEL: @_Z11test_shiftxDv16_fS_ij(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float>
// CHECK-NEXT: ret <16 x float> [[TMP3]]
//
v16float test_shiftx(v16float a, v16float b, int step, int shift) {
v16float test_shiftx(v16float a, v16float b, int step, unsigned int shift) {
return shiftx(a,b,step,shift);
}

// CHECK-LABEL: @_Z16test_shift_bytesDv16_fS_i(
// CHECK-LABEL: @_Z16test_shift_bytesDv16_fS_j(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[SHIFT:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float>
// CHECK-NEXT: ret <16 x float> [[TMP3]]
//
v16float test_shift_bytes(v16float a, v16float b, int shift) {
v16float test_shift_bytes(v16float a, v16float b, unsigned int shift) {
return shift_bytes(a, b, shift);
}

// CHECK-LABEL: @_Z10test_shiftDv16_fS_i(
// CHECK-LABEL: @_Z10test_shiftDv16_fS_j(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[MUL_I:%.*]] = shl nsw i32 [[SHIFT_BY:%.*]], 2
// CHECK-NEXT: [[MUL_I:%.*]] = shl i32 [[SHIFT_BY:%.*]], 2
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[MUL_I]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float>
// CHECK-NEXT: ret <16 x float> [[TMP3]]
//
v16float test_shift(v16float a, v16float b, int shift_by) {
v16float test_shift(v16float a, v16float b, unsigned int shift_by) {
return shift(a, b, shift_by);
}

Expand Down

0 comments on commit 00f5f74

Please sign in to comment.