Skip to content

Commit

Permalink
[VectorCombine] vectorizeLoadInsert - only fold when inserting into a…
Browse files Browse the repository at this point in the history
… poison vector (llvm#119906)

We have corresponding poison tests in the "-inseltpoison.ll" sibling test files.

Fixes llvm#119900
  • Loading branch information
RKSimon authored Dec 14, 2024
1 parent 1911919 commit cc54a0c
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 69 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// Match insert into fixed vector of scalar value.
// TODO: Handle non-zero insert index.
Value *Scalar;
if (!match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
!Scalar->hasOneUse())
if (!match(&I,
m_InsertElt(m_Poison(), m_OneUse(m_Value(Scalar)), m_ZeroInt())))
return false;

// Optionally match an extract from another vector.
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %r
; CHECK-NEXT: bb:
; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[D:%.*]] = load float, ptr [[TMP0]], align 4
; CHECK-NEXT: [[E:%.*]] = insertelement <4 x float> undef, float [[D]], i32 0
; CHECK-NEXT: store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16
; CHECK-NEXT: ret void
;
Expand Down
124 changes: 59 additions & 65 deletions llvm/test/Transforms/VectorCombine/X86/load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)

define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
Expand All @@ -169,8 +169,8 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n

define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
Expand All @@ -182,8 +182,8 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)

define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
Expand All @@ -195,8 +195,8 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof

define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
Expand All @@ -208,8 +208,8 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %

define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i64 0
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 16
Expand All @@ -221,8 +221,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)

define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load float, ptr addrspace(44) [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i64 0
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr addrspace(44) %p, align 16
Expand All @@ -235,8 +235,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) alig
define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %p) nofree nosync {
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
Expand All @@ -248,16 +248,11 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
; Can't safely load the offset vector, but can load+shuffle if it is profitable.

define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable(17) %p) nofree nosync {
; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
; SSE2-NEXT: ret <8 x i16> [[R]]
;
; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; AVX2-NEXT: ret <8 x i16> [[R]]
; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
%s = load i16, ptr %gep, align 2
Expand All @@ -268,16 +263,11 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
; Verify that alignment of the new load is not over-specified.

define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 dereferenceable(16) %p) nofree nosync {
; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
; SSE2-NEXT: ret <8 x i16> [[R]]
;
; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; AVX2-NEXT: ret <8 x i16> [[R]]
; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
%s = load i16, ptr %gep, align 8
Expand All @@ -304,8 +294,9 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl

define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 12
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12
Expand Down Expand Up @@ -336,8 +327,8 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
Expand Down Expand Up @@ -439,8 +430,8 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(

define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_align(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
Expand All @@ -463,8 +454,8 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)

define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
Expand All @@ -474,8 +465,8 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof

define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
Expand All @@ -485,8 +476,8 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %

define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v16f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <16 x float> undef, float [[S]], i32 0
; CHECK-NEXT: ret <16 x float> [[R]]
;
%s = load float, ptr %p, align 4
Expand All @@ -496,8 +487,8 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)

define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v2f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0
; CHECK-NEXT: ret <2 x float> [[R]]
;
%s = load float, ptr %p, align 4
Expand Down Expand Up @@ -549,8 +540,9 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc

define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 4
; CHECK-NEXT: [[S:%.*]] = extractelement <2 x float> [[L]], i32 0
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
; CHECK-NEXT: ret <4 x float> [[R]]
;
%l = load <2 x float>, ptr %p, align 4
Expand All @@ -560,10 +552,17 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
}

define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: ret <4 x float> [[R]]
; SSE2-LABEL: @load_v8f32_extract_insert_v4f32(
; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x float>, ptr [[P:%.*]], i32 0, i32 0
; SSE2-NEXT: [[S:%.*]] = load float, ptr [[TMP1]], align 4
; SSE2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
; SSE2-NEXT: ret <4 x float> [[R]]
;
; AVX2-LABEL: @load_v8f32_extract_insert_v4f32(
; AVX2-NEXT: [[L:%.*]] = load <8 x float>, ptr [[P:%.*]], align 4
; AVX2-NEXT: [[S:%.*]] = extractelement <8 x float> [[L]], i32 0
; AVX2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
; AVX2-NEXT: ret <4 x float> [[R]]
;
%l = load <8 x float>, ptr %p, align 4
%s = extractelement <8 x float> %l, i32 0
Expand All @@ -589,17 +588,12 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
; Can't safely load the offset vector, but can load+shuffle if it is profitable.

define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceable(16) %p) nofree nosync {
; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
; SSE2-NEXT: ret <8 x i16> [[R]]
;
; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; AVX2-NEXT: ret <8 x i16> [[R]]
; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1
%l = load <2 x i16>, ptr %gep, align 8
Expand Down

0 comments on commit cc54a0c

Please sign in to comment.