Skip to content

Commit

Permalink
[msan] Apply handleVectorReduceIntrinsic to max/min vector instructio…
Browse files Browse the repository at this point in the history
…ns (#129819)

Changes the handling of:
- llvm.aarch64.neon.smaxv
- llvm.aarch64.neon.sminv
- llvm.aarch64.neon.umaxv
- llvm.aarch64.neon.uminv
- llvm.vector.reduce.smax
- llvm.vector.reduce.smin
- llvm.vector.reduce.umax
- llvm.vector.reduce.umin
- llvm.vector.reduce.fmax
- llvm.vector.reduce.fmin
from the default strict handling (visitInstruction) to
handleVectorReduceIntrinsic.

Also adds a parameter to handleVectorReduceIntrinsic to specify whether
the return type must match the elements of the vector.

Updates the tests from #129741,
#129810,
#129768
  • Loading branch information
thurstond authored Mar 9, 2025
1 parent 9e87caf commit 667bbd2
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 531 deletions.
64 changes: 46 additions & 18 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3656,14 +3656,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Instrument generic vector reduction intrinsics
// by ORing together all their fields.
//
// The return type does not need to be the same type as the fields
// If AllowShadowCast is true, the return type does not need to be the same
// type as the fields
// e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
void handleVectorReduceIntrinsic(IntrinsicInst &I) {
void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) {
assert(I.arg_size() == 1);

IRBuilder<> IRB(&I);
Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
S = CreateShadowCast(IRB, S, getShadowTy(&I));
if (AllowShadowCast)
S = CreateShadowCast(IRB, S, getShadowTy(&I));
else
assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
Expand All @@ -3672,13 +3676,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
// %a1)
// shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
//
// The type of the return value, initial starting value, and elements of the
// vector must be identical.
void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
assert(I.arg_size() == 2);

IRBuilder<> IRB(&I);
Value *Shadow0 = getShadow(&I, 0);
Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1));
assert(Shadow0->getType() == Shadow1->getType());
Value *S = IRB.CreateOr(Shadow0, Shadow1);
assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
Expand Down Expand Up @@ -4461,21 +4470,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_mul:
// Add reduction to scalar
case Intrinsic::aarch64_neon_faddv:
case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_uaddv:
// Floating-point min/max (vector)
// The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
// but our shadow propagation is the same.
case Intrinsic::aarch64_neon_fmaxv:
case Intrinsic::aarch64_neon_fminv:
case Intrinsic::aarch64_neon_fmaxnmv:
case Intrinsic::aarch64_neon_fminnmv:
// Sum long across vector
case Intrinsic::aarch64_neon_saddlv:
case Intrinsic::aarch64_neon_uaddlv:
handleVectorReduceIntrinsic(I);
// Signed/Unsigned Min/Max
// TODO: handling similarly to AND/OR may be more precise.
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin:
// TODO: this has no false positives, but arguably we should check that all
// the bits are initialized.
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false);
break;

case Intrinsic::vector_reduce_fadd:
Expand Down Expand Up @@ -4918,6 +4923,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}

// Add reduction to scalar
case Intrinsic::aarch64_neon_faddv:
case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_uaddv:
// Signed/Unsigned min/max (Vector)
// TODO: handling similarly to AND/OR may be more precise.
case Intrinsic::aarch64_neon_smaxv:
case Intrinsic::aarch64_neon_sminv:
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv:
// Floating-point min/max (vector)
// The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
// but our shadow propagation is the same.
case Intrinsic::aarch64_neon_fmaxv:
case Intrinsic::aarch64_neon_fminv:
case Intrinsic::aarch64_neon_fmaxnmv:
case Intrinsic::aarch64_neon_fminnmv:
// Sum long across vector
case Intrinsic::aarch64_neon_saddlv:
case Intrinsic::aarch64_neon_uaddlv:
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
break;

// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
Expand Down
139 changes: 40 additions & 99 deletions llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
; RUN: opt < %s -passes=msan -S | FileCheck %s
;
; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll
;
; Handled suboptimally (visitInstruction):
; - llvm.aarch64.neon.smaxv

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"
Expand All @@ -15,16 +12,12 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
Expand All @@ -39,16 +32,12 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
Expand All @@ -63,15 +52,9 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A1]])
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
Expand All @@ -85,16 +68,12 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
Expand All @@ -109,16 +88,12 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
Expand All @@ -133,15 +108,9 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A1]])
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
Expand All @@ -156,16 +125,12 @@ define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A2]])
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i8> [[TMP7]]
Expand All @@ -184,16 +149,12 @@ define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A2]])
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP7]]
Expand All @@ -212,15 +173,9 @@ define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A2]])
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP6]]
Expand All @@ -238,16 +193,12 @@ define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A2]])
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[TMP7]]
Expand All @@ -266,16 +217,12 @@ define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A2]])
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP7]]
Expand All @@ -294,15 +241,9 @@ define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A2]])
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
Expand Down
Loading

0 comments on commit 667bbd2

Please sign in to comment.