Skip to content

Commit

Permalink
Merge pull request #589 from coldav/colin/fix_vecz_lit_tests_splat
Browse files Browse the repository at this point in the history
Fix vecz lit test for splat issues after llvm 20 changes to output
  • Loading branch information
coldav authored Nov 15, 2024
2 parents 2c86f69 + d163e49 commit 18ab777
Show file tree
Hide file tree
Showing 34 changed files with 108 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ attributes #3 = { nobuiltin nounwind }
; CHECK: %BroadcastAddr.splatinsert = insertelement <4 x ptr addrspace(1)> {{poison|undef}}, ptr addrspace(1) %1, {{i32|i64}} 0
; CHECK: %BroadcastAddr.splat = shufflevector <4 x ptr addrspace(1)> %BroadcastAddr.splatinsert, <4 x ptr addrspace(1)> {{poison|undef}}, <4 x i32> zeroinitializer
; CHECK: %2 = getelementptr double, <4 x ptr addrspace(1)> %BroadcastAddr.splat, <4 x i64> <i64 0, i64 4, i64 8, i64 12>
; CHECK: call void @llvm.masked.scatter.v4f64.v4p1(<4 x double> %0, <4 x ptr addrspace(1)> %2, i32{{( immarg)?}} 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) #[[ATTRS:[0-9]+]]
; CHECK: call void @llvm.masked.scatter.v4f64.v4p1(<4 x double> %0, <4 x ptr addrspace(1)> %2, i32{{( immarg)?}} 8, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}) #[[ATTRS:[0-9]+]]
; CHECK: ret void

; CHECK: attributes #[[ATTRS]] = {
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ attributes #3 = { nobuiltin nounwind }
; CHECK: %BroadcastAddr.splatinsert = insertelement <4 x ptr addrspace(1)> {{poison|undef}}, ptr addrspace(1) %1, {{i32|i64}} 0
; CHECK: %BroadcastAddr.splat = shufflevector <4 x ptr addrspace(1)> %BroadcastAddr.splatinsert, <4 x ptr addrspace(1)> {{poison|undef}}, <4 x i32> zeroinitializer
; CHECK: %2 = getelementptr double, <4 x ptr addrspace(1)> %BroadcastAddr.splat, <4 x i64> <i64 0, i64 4, i64 8, i64 12>
; CHECK: call void @llvm.masked.scatter.v4f64.v4p1(<4 x double> %0, <4 x ptr addrspace(1)> %2, i32{{( immarg)?}} 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) #[[ATTRS:[0-9]+]]
; CHECK: call void @llvm.masked.scatter.v4f64.v4p1(<4 x double> %0, <4 x ptr addrspace(1)> %2, i32{{( immarg)?}} 8, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}) #[[ATTRS:[0-9]+]]
; CHECK: ret void

; CHECK: attributes #[[ATTRS]] = {
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,5 @@ declare i64 @__mux_get_global_size(i32)
; This test checks if a uniform <4 x i32> phi is not scalarized
; CHECK: define spir_kernel void @__vecz_v4_vector_loop
; CHECK: %[[STOREMERGE:.+]] = phi <4 x i32> [ %[[INC:.+]], %for.body ], [ zeroinitializer, %entry.ROSCC ]
; CHECK: %[[INC]] = add <4 x i32> %storemerge, <i32 1, i32 1, i32 1, i32 1>
; CHECK: %[[INC]] = add <4 x i32> %storemerge, {{<(i32 1(, )?)+>|splat \(i32 1\)}}
; CHECK: ret void
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ declare i64 @__mux_get_global_size(i32)
; CHECK: %storemerge{{[0-9]+}} = phi <4 x i32> [ %{{[0-9]+}}, %entry.ROSCC ], [ %inc{{[0-9]+}}, %for.cond ]
; CHECK: %storemerge{{[0-9]+}} = phi <4 x i32> [ %{{[0-9]+}}, %entry.ROSCC ], [ %inc{{[0-9]+}}, %for.cond ]
; CHECK: %storemerge{{[0-9]+}} = phi <4 x i32> [ %{{[0-9]+}}, %entry.ROSCC ], [ %inc{{[0-9]+}}, %for.cond ]
; CHECK: %inc{{[0-9]+}} = add <4 x i32> %storemerge{{[0-9]+}}, <i32 1, i32 1, i32 1, i32 1>
; CHECK: %inc{{[0-9]+}} = add <4 x i32> %storemerge{{[0-9]+}}, <i32 1, i32 1, i32 1, i32 1>
; CHECK: %inc{{[0-9]+}} = add <4 x i32> %storemerge{{[0-9]+}}, <i32 1, i32 1, i32 1, i32 1>
; CHECK: %inc{{[0-9]+}} = add <4 x i32> %storemerge{{[0-9]+}}, <i32 1, i32 1, i32 1, i32 1>
; CHECK: %inc{{[0-9]+}} = add <4 x i32> %storemerge{{[0-9]+}}, {{<(i32 1(, )?)+>|splat \(i32 1\)}}
; CHECK: %inc{{[0-9]+}} = add <4 x i32> %storemerge{{[0-9]+}}, {{<(i32 1(, )?)+>|splat \(i32 1\)}}
; CHECK: %inc{{[0-9]+}} = add <4 x i32> %storemerge{{[0-9]+}}, {{<(i32 1(, )?)+>|splat \(i32 1\)}}
; CHECK: %inc{{[0-9]+}} = add <4 x i32> %storemerge{{[0-9]+}}, {{<(i32 1(, )?)+>|splat \(i32 1\)}}
; CHECK: ret void
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ entry:
; CHECK_4F: [[WREM:%.*]] = sub nuw nsw i64 [[LSIZE]], [[LID]]
; CHECK_4F: [[T0:%.*]] = call i64 @llvm.umin.i64(i64 [[WREM]], i64 4)
; CHECK_4F: [[VL:%.*]] = trunc {{(nuw )?(nsw )?}}i64 [[T0]] to i32
; CHECK_4F: [[LHS:%.*]] = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr {{%.*}}, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 [[VL]])
; CHECK_4F: [[RHS:%.*]] = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr {{%.*}}, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 [[VL]])
; CHECK_4F: [[ADD:%.*]] = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> [[LHS]], <4 x i32> [[RHS]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 [[VL]])
; CHECK_4F: call void @llvm.vp.store.v4i32.p0(<4 x i32> [[ADD]], ptr {{%.*}}, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 [[VL]])
; CHECK_4F: [[LHS:%.*]] = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr {{%.*}}, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, i32 [[VL]])
; CHECK_4F: [[RHS:%.*]] = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr {{%.*}}, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, i32 [[VL]])
; CHECK_4F: [[ADD:%.*]] = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> [[LHS]], <4 x i32> [[RHS]], <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, i32 [[VL]])
; CHECK_4F: call void @llvm.vp.store.v4i32.p0(<4 x i32> [[ADD]], ptr {{%.*}}, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, i32 [[VL]])

; CHECK_1S: define spir_kernel void @__vecz_nxv4_vp_load_add_store_i32(
; CHECK_1S: [[LID:%.*]] = call i64 @__mux_get_local_id(i32 0)
Expand Down Expand Up @@ -84,10 +84,10 @@ entry:
; CHECK_V4_2F: [[VL:%.*]] = trunc {{(nuw )?(nsw )?}}i64 [[T0]] to i32
; Each WI performs 4 elements, so multiply the VL by 4
; CHECK_V4_2F: [[SVL:%.*]] = shl nuw nsw i32 [[VL]], 2
; CHECK_V4_2F: [[LHS:%.*]] = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr {{%.*}}, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 [[SVL]])
; CHECK_V4_2F: [[RHS:%.*]] = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr {{%.*}}, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 [[SVL]])
; CHECK_V4_2F: [[ADD:%.*]] = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> [[LHS]], <8 x i32> [[RHS]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 [[SVL]])
; CHECK_V4_2F: call void @llvm.vp.store.v8i32.p0(<8 x i32> [[ADD]], ptr {{%.*}}, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 [[SVL]])
; CHECK_V4_2F: [[LHS:%.*]] = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr {{%.*}}, <8 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, i32 [[SVL]])
; CHECK_V4_2F: [[RHS:%.*]] = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr {{%.*}}, <8 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, i32 [[SVL]])
; CHECK_V4_2F: [[ADD:%.*]] = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> [[LHS]], <8 x i32> [[RHS]], <8 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, i32 [[SVL]])
; CHECK_V4_2F: call void @llvm.vp.store.v8i32.p0(<8 x i32> [[ADD]], ptr {{%.*}}, <8 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, i32 [[SVL]])

; CHECK_V4_1S: define spir_kernel void @__vecz_nxv4_vp_load_add_store_v4i32(
; CHECK_V4_1S: [[LID:%.*]] = call i64 @__mux_get_local_id(i32 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ attributes #3 = { nobuiltin nounwind }

; And in between them there should be a barrier call
; CHECK: call void @__mux_work_group_barrier
; CHECK: call void @__vecz_b_interleaved_store8_4_Dv4_du3ptrU3AS1(<4 x double> <double 1.600000e+01, double 1.600000e+01, double 1.600000e+01, double 1.600000e+01>
; CHECK: call void @__vecz_b_interleaved_store8_4_Dv4_du3ptrU3AS1(<4 x double> {{<(double 1.600000e\+01(, )?)+>|splat \(double 1.600000e\+01\)}}
; CHECK: load <16 x double>
; CHECK: load <16 x double>
; CHECK: load <16 x double>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ declare i8 @llvm.fshl.i8(i8, i8, i8)
; It checks that the fshl intrinsic of i8 gets widened by a factor of 16
; CHECK: %[[LDA:.+]] = load <16 x i8>, ptr %{{.+}}
; CHECK: %[[LDB:.+]] = load <16 x i8>, ptr %{{.+}}
; CHECK: %[[RES:.+]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %[[LDA]], <16 x i8> %[[LDB]], <16 x i8> <{{(i8 4, )+i8 4}}>)
; CHECK: %[[RES:.+]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %[[LDA]], <16 x i8> %[[LDB]], <16 x i8> {{<(i8 4(, )?)+>|splat \(i8 4\)}})
; CHECK: store <16 x i8> %[[RES]], ptr %{{.+}}

; CHECK: ret void
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ declare i8 @llvm.fshr.i8(i8, i8, i8)
; It checks that the fshr intrinsic of i8 gets widened by a factor of 16
; CHECK: %[[LDA:.+]] = load <16 x i8>, ptr %{{.+}}
; CHECK: %[[LDB:.+]] = load <16 x i8>, ptr %{{.+}}
; CHECK: %[[RES:.+]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %[[LDA]], <16 x i8> %[[LDB]], <16 x i8> <{{(i8 2, )+i8 2}}>)
; CHECK: %[[RES:.+]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %[[LDA]], <16 x i8> %[[LDB]], <16 x i8> {{<(i8 2(, )?)+>|splat \(i8 2\)}})
; CHECK: store <16 x i8> %[[RES]], ptr %{{.+}}

; CHECK: ret void
6 changes: 3 additions & 3 deletions modules/compiler/vecz/test/lit/llvm/cmpxchg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ entry:

; Test that this cmpxchg is packetized by generating a call to an all-true masked version.
; CHECK: [[A0:%.*]] = call { <4 x i32>, <4 x i1> } @__vecz_b_v4_masked_cmpxchg_align4_acquire_monotonic_1_Dv4_u3ptrDv4_jDv4_jDv4_b(
; CHECK-SAME: <4 x ptr> [[SPLAT_PTR]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>,
; CHECK-SAME: <4 x i32> <i32 2, i32 2, i32 2, i32 2>,
; CHECK-SAME: <4 x i1> <i1 true, i1 true, i1 true, i1 true>
; CHECK-SAME: <4 x ptr> [[SPLAT_PTR]], <4 x i32> {{<(i32 1(, )?)+>|splat \(i32 1\)}},
; CHECK-SAME: <4 x i32> {{<(i32 2(, )?)+>|splat \(i32 2\)}},
; CHECK-SAME: <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}
%old0 = cmpxchg ptr %p, i32 1, i32 2 acquire monotonic
; CHECK: [[EXT0:%.*]] = extractvalue { <4 x i32>, <4 x i1> } [[A0]], 0
%val0 = extractvalue { i32, i1 } %old0, 0
Expand Down
2 changes: 1 addition & 1 deletion modules/compiler/vecz/test/lit/llvm/define_gather_load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ declare i64 @__mux_get_global_id(i32)

; Test if the scatter store is defined correctly
; CHECK: define <4 x i64> @__vecz_b_gather_load4_Dv4_mDv4_u3ptr(<4 x ptr>{{( %0)?}}) [[ATTRS:#[0-9]+]] {
; CHECK: %[[V1:[0-9]+]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %0, i32{{( immarg)?}} 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>,
; CHECK: %[[V1:[0-9]+]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %0, i32{{( immarg)?}} 4, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}},
; CHECK: ret <4 x i64> %[[V1]]

; CHECK: attributes [[ATTRS]] = { norecurse nounwind }
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ declare i64 @__mux_get_global_id(i32)

; Test if the scatter store is defined correctly
; CHECK: define <4 x i64> @__vecz_b_gather_load4_Dv4_mDv4_u3ptr(<4 x ptr>{{( %0)?}}) [[ATTRS:#[0-9]+]] {
; CHECK: call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %0, i32{{( immarg)?}} 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> undef)
; CHECK: call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %0, i32{{( immarg)?}} 4, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, <4 x i64> undef)
; CHECK: ret <4 x i64>

; CHECK: attributes [[ATTRS]] = { norecurse nounwind }
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@ declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double
; CHECK: %BroadcastAddr.splatinsert = insertelement <4 x ptr addrspace(1)> {{poison|undef}}, ptr addrspace(1) %0, {{i32|i64}} 0
; CHECK: %BroadcastAddr.splat = shufflevector <4 x ptr addrspace(1)> %BroadcastAddr.splatinsert, <4 x ptr addrspace(1)> {{poison|undef}}, <4 x i32> zeroinitializer
; CHECK: %[[TMP1:.*]] = getelementptr double, <4 x ptr addrspace(1)> %BroadcastAddr.splat, <4 x i64> <i64 0, i64 4, i64 8, i64 12>
; CHECK: %[[TMP2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p1(<4 x ptr addrspace(1)> %[[TMP1]], i32{{( immarg)?}} 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
; CHECK: %[[TMP2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p1(<4 x ptr addrspace(1)> %[[TMP1]], i32{{( immarg)?}} 8, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, <4 x double> undef)
; CHECK: ret <4 x double> %[[TMP2]]
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,5 @@ attributes #3 = { nobuiltin nounwind }
; CHECK: %BroadcastAddr.splatinsert = insertelement <4 x ptr addrspace(1)> {{poison|undef}}, ptr addrspace(1) %0, {{i32|i64}} 0
; CHECK: %BroadcastAddr.splat = shufflevector <4 x ptr addrspace(1)> %BroadcastAddr.splatinsert, <4 x ptr addrspace(1)> {{poison|undef}}, <4 x i32> zeroinitializer
; CHECK: %1 = getelementptr double, <4 x ptr addrspace(1)> %BroadcastAddr.splat, <4 x i64> <i64 0, i64 4, i64 8, i64 12>
; CHECK: %2 = call <4 x double> @llvm.masked.gather.v4f64.v4p1(<4 x ptr addrspace(1)> %1, i32{{( immarg)?}} 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
; CHECK: %2 = call <4 x double> @llvm.masked.gather.v4f64.v4p1(<4 x ptr addrspace(1)> %1, i32{{( immarg)?}} 8, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}}, <4 x double> undef)
; CHECK: ret <4 x double> %2
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,5 @@ declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double
; CHECK: %BroadcastAddr.splatinsert = insertelement <4 x ptr addrspace(1)> {{poison|undef}}, ptr addrspace(1) %1, {{i32|i64}} 0
; CHECK: %BroadcastAddr.splat = shufflevector <4 x ptr addrspace(1)> %BroadcastAddr.splatinsert, <4 x ptr addrspace(1)> {{poison|undef}}, <4 x i32> zeroinitializer
; CHECK: %2 = getelementptr double, <4 x ptr addrspace(1)> %BroadcastAddr.splat, <4 x i64> <i64 0, i64 4, i64 8, i64 12>
; CHECK: call void @llvm.masked.scatter.v4f64.v4p1(<4 x double> %0, <4 x ptr addrspace(1)> %2, i32{{( immarg)?}} 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
; CHECK: call void @llvm.masked.scatter.v4f64.v4p1(<4 x double> %0, <4 x ptr addrspace(1)> %2, i32{{( immarg)?}} 8, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}})
; CHECK: ret void
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ attributes #3 = { nobuiltin nounwind }
; CHECK: %BroadcastAddr.splatinsert = insertelement <4 x ptr addrspace(1)> {{poison|undef}}, ptr addrspace(1) %1, {{i32|i64}} 0
; CHECK: %BroadcastAddr.splat = shufflevector <4 x ptr addrspace(1)> %BroadcastAddr.splatinsert, <4 x ptr addrspace(1)> {{poison|undef}}, <4 x i32> zeroinitializer
; CHECK: %2 = getelementptr double, <4 x ptr addrspace(1)> %BroadcastAddr.splat, <4 x i64> <i64 0, i64 4, i64 8, i64 12>
; CHECK: call void @llvm.masked.scatter.v4f64.v4p1(<4 x double> %0, <4 x ptr addrspace(1)> %2, i32{{( immarg)?}} 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
; CHECK: call void @llvm.masked.scatter.v4f64.v4p1(<4 x double> %0, <4 x ptr addrspace(1)> %2, i32{{( immarg)?}} 8, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}})
; CHECK: ret void
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ declare i64 @__mux_get_global_id(i32)
; Test if the scatter store is defined correctly
; CHECK: define void @__vecz_b_scatter_store4_Dv4_mDv4_u3ptr(<4 x i64>{{( %0)?}}, <4 x ptr>{{( %1)?}}) [[ATTRS:#[0-9]+]] {
; CHECK: entry
; CHECK: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %0, <4 x ptr> %1, i32{{( immarg)?}} 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
; CHECK: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %0, <4 x ptr> %1, i32{{( immarg)?}} 4, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}})
; CHECK: ret void

; CHECK: attributes [[ATTRS]] = { norecurse nounwind }
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ declare i64 @__mux_get_global_id(i32)
; Test if the scatter store is defined correctly
; CHECK: define void @__vecz_b_scatter_store4_Dv4_mDv4_u3ptr(<4 x i64>{{( %0)?}}, <4 x ptr>{{( %1)?}}) [[ATTRS:#[0-9]+]] {
; CHECK: entry:
; CHECK: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %0, <4 x ptr> %1, i32{{( immarg)?}} 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
; CHECK: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %0, <4 x ptr> %1, i32{{( immarg)?}} 4, <4 x i1> {{<(i1 true(, )?)+>|splat \(i1 true\)}})
; CHECK: ret void

; CHECK: attributes [[ATTRS]] = { norecurse nounwind }
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ entry:

; Four icmps and selects
; CHECK: icmp eq <4 x i32> %{{.+}}, zeroinitializer
; CHECK: select <4 x i1> %{{.+}}, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
; CHECK: icmp eq <4 x i32> %{{.+}}, <i32 1, i32 1, i32 1, i32 1>
; CHECK: select <4 x i1> %{{.+}}, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
; CHECK: icmp eq <4 x i32> %{{.+}}, <i32 2, i32 2, i32 2, i32 2>
; CHECK: select <4 x i1> %{{.+}}, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
; CHECK: icmp eq <4 x i32> %{{.+}}, <i32 3, i32 3, i32 3, i32 3>
; CHECK: select <4 x i1> %{{.+}}, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
; CHECK: select <4 x i1> %{{.+}}, <4 x i32> {{<(i32 42(, )?)+>|splat \(i32 42\)}}
; CHECK: icmp eq <4 x i32> %{{.+}}, {{<(i32 1(, )?)+>|splat \(i32 1\)}}
; CHECK: select <4 x i1> %{{.+}}, <4 x i32> {{<(i32 42(, )?)+>|splat \(i32 42\)}}
; CHECK: icmp eq <4 x i32> %{{.+}}, {{<(i32 2(, )?)+>|splat \(i32 2\)}}
; CHECK: select <4 x i1> %{{.+}}, <4 x i32> {{<(i32 42(, )?)+>|splat \(i32 42\)}}
; CHECK: icmp eq <4 x i32> %{{.+}}, {{<(i32 3(, )?)+>|splat \(i32 3\)}}
; CHECK: select <4 x i1> %{{.+}}, <4 x i32> {{<(i32 42(, )?)+>|splat \(i32 42\)}}

; Four stores
; CHECK: store <4 x i32>
Expand Down
4 changes: 2 additions & 2 deletions modules/compiler/vecz/test/lit/llvm/interleaved_safety.ll
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ attributes #3 = { nobuiltin nounwind }

; And in between them there should be a barrier call
; CHECK: call void @__mux_work_group_barrier
; CHECK: call void @__vecz_b_interleaved_store8_4_Dv4_du3ptrU3AS1(<4 x double> <double 1.600000e+01, double 1.600000e+01, double 1.600000e+01, double 1.600000e+01>
; CHECK: call void @__vecz_b_interleaved_store8_4_Dv4_du3ptrU3AS1(<4 x double> {{<(double 1.600000e\+01(, )?)+>|splat \(double 1.600000e\+01\)}}
; CHECK: call <4 x double> @__vecz_b_interleaved_load8_4_Dv4_du3ptrU3AS1
; CHECK: call <4 x double> @__vecz_b_interleaved_load8_4_Dv4_du3ptrU3AS1

; There shouldn't be any more interleaved loads or stores left
; CHECK-NOT: call <4 x double> @__vecz_b_interleaved_load4_Dv4_du3ptrU3AS1
; CHECK-NOT: call void @__vecz_b_interleaved_store8_4_Dv4_du3ptrU3AS1(<4 x double> <double 1.600000e+01, double 1.600000e+01, double 1.600000e+01, double 1.600000e+01>
; CHECK-NOT: call void @__vecz_b_interleaved_store8_4_Dv4_du3ptrU3AS1(<4 x double> {{<(double 1.600000e\+01(, )?)+>|splat \(double 1.600000e\+01\)}}

; There should be some sufflevector instructions after the simplification
; CHECK: shufflevector
Expand Down
10 changes: 5 additions & 5 deletions modules/compiler/vecz/test/lit/llvm/masked_atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ define spir_kernel void @test_fn(ptr %p) {
entry:
; CHECK: [[SPLAT_PTR_INS:%.*]] = insertelement <4 x ptr> poison, ptr %p, i64 0
; CHECK: [[SPLAT_PTR:%.*]] = shufflevector <4 x ptr> [[SPLAT_PTR_INS]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> <i64 3, i64 3, i64 3, i64 3>,
; CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> {{<(i64 3(, )?)+>|splat \(i64 3\)}},
%call = call i64 @__mux_get_global_id(i32 0)
%cmp = icmp sgt i64 3, %call
; CHECK: [[VEC_PTR:%.*]] = getelementptr i32, ptr %p, <4 x i64>
Expand All @@ -33,16 +33,16 @@ entry:

if.then: ; preds = %entry
; CHECK: = call <4 x i32> @__vecz_b_v4_masked_atomicrmw_add_align4_acquire_1_Dv4_u3ptrDv4_jDv4_b(
; CHECK-SAME: <4 x ptr> [[SPLAT_PTR]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i1> [[CMP]]
; CHECK-SAME: <4 x ptr> [[SPLAT_PTR]], <4 x i32> {{<(i32 1(, )?)+>|splat \(i32 1\)}}, <4 x i1> [[CMP]]
%old0 = atomicrmw add ptr %p, i32 1 acquire
; CHECK: = call <4 x i32> @__vecz_b_v4_masked_atomicrmw_add_align4_acquire_1_Dv4_u3ptrDv4_jDv4_b(
; CHECK-SAME: <4 x ptr> [[VEC_PTR]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i1> [[CMP]]
; CHECK-SAME: <4 x ptr> [[VEC_PTR]], <4 x i32> {{<(i32 1(, )?)+>|splat \(i32 1\)}}, <4 x i1> [[CMP]]
%old1 = atomicrmw add ptr %wi_p_i32, i32 1 acquire
; CHECK: = call <4 x i32> @__vecz_b_v4_masked_atomicrmw_umin_align2_monotonic_1_Dv4_u3ptrDv4_jDv4_b(
; CHECK-SAME: <4 x ptr> [[VEC_PTR]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i1> [[CMP]]
; CHECK-SAME: <4 x ptr> [[VEC_PTR]], <4 x i32> {{<(i32 1(, )?)+>|splat \(i32 1\)}}, <4 x i1> [[CMP]]
%old2 = atomicrmw umin ptr %wi_p_i32, i32 1 monotonic, align 2
; CHECK: = call <4 x float> @__vecz_b_v4_masked_atomicrmw_volatile_fmax_align4_seqcst_0_Dv4_u3ptrDv4_fDv4_b(
; CHECK-SAME: <4 x ptr> [[VEC_PTR]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x i1> [[CMP]]
; CHECK-SAME: <4 x ptr> [[VEC_PTR]], <4 x float> {{<(float 1.000000e\+00(, )?)+>|splat \(float 1.000000e\+00\)}}, <4 x i1> [[CMP]]
%old3 = atomicrmw volatile fmax ptr %wi_p_i32, float 1.0 syncscope("singlethread") seq_cst
br label %if.end

Expand Down
Loading

0 comments on commit 18ab777

Please sign in to comment.