Skip to content

Commit

Permalink
[CIR][CIRGen][Builtin][Neon] Lower neon_vaddv_s16 and neon_vaddv_u16 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ghehg authored Dec 6, 2024
1 parent 4ae9a04 commit 8d507b1
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
11 changes: 9 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4183,10 +4183,17 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
llvm_unreachable("NEON::BI__builtin_neon_vaddv_s8 NYI");
}
case NEON::BI__builtin_neon_vaddv_u16:
llvm_unreachable("NEON::BI__builtin_neon_vaddv_u16 NYI");
usgn = true;
[[fallthrough]];
case NEON::BI__builtin_neon_vaddv_s16: {
llvm_unreachable("NEON::BI__builtin_neon_vaddv_s16 NYI");
cir::IntType eltTy = usgn ? UInt16Ty : SInt16Ty;
cir::VectorType vTy = cir::VectorType::get(builder.getContext(), eltTy, 4);
Ops.push_back(emitScalarExpr(E->getArg(0)));
// This is to add across the vector elements, so wider result type needed.
Ops[0] = emitNeonCall(builder, {vTy}, Ops,
usgn ? "aarch64.neon.uaddv" : "aarch64.neon.saddv",
SInt32Ty, getLoc(E->getExprLoc()));
return builder.createIntCast(Ops[0], eltTy);
}
case NEON::BI__builtin_neon_vaddvq_u8:
llvm_unreachable("NEON::BI__builtin_neon_vaddvq_u8 NYI");
Expand Down
26 changes: 26 additions & 0 deletions clang/test/CIR/CodeGen/AArch64/neon-arith.c
Original file line number Diff line number Diff line change
Expand Up @@ -893,3 +893,29 @@ uint32_t test_vaddlvq_u16(uint16x8_t a) {
// LLVM: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> [[A]])
// LLVM: ret i32 [[VADDLV_I]]
}

uint16_t test_vaddv_u16(uint16x4_t a) {
return vaddv_u16(a);

// CIR-LABEL: vaddv_u16
// CIR: [[VADDV_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddv" {{%.*}} : (!cir.vector<!u16i x 4>) -> !s32i
// CIR: cir.cast(integral, [[VADDV_I]] : !s32i), !u16i

// LLVM: {{.*}}test_vaddv_u16(<4 x i16>{{.*}}[[A:%.*]])
// LLVM: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[A]])
// LLVM-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
// LLVM-NEXT: ret i16 [[TMP0]]
}

int16_t test_vaddv_s16(int16x4_t a) {
return vaddv_s16(a);

// CIR-LABEL: vaddv_s16
// CIR: [[VADDV_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddv" {{%.*}} : (!cir.vector<!s16i x 4>) -> !s32i
// CIR: cir.cast(integral, [[VADDV_I]] : !s32i), !s16i

// LLVM: {{.*}}test_vaddv_s16(<4 x i16>{{.*}}[[A:%.*]])
// LLVM: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[A]])
// LLVM-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
// LLVM-NEXT: ret i16 [[TMP0]]
}

0 comments on commit 8d507b1

Please sign in to comment.