@@ -7,138 +7,141 @@ define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) {
7
7
; CHECK-LABEL: issue63986:
8
8
; CHECK: ; %bb.0: ; %entry
9
9
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10
- ; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3]
11
- ; CHECK-NEXT: v_mov_b32_e32 v6, s17
12
- ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s16, v4
13
- ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v6, v5, vcc
10
+ ; CHECK-NEXT: v_lshlrev_b64 v[8:9], 6, v[2:3]
11
+ ; CHECK-NEXT: v_mov_b32_e32 v4, s17
12
+ ; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s16, v8
13
+ ; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v4, v9, vcc
14
+ ; CHECK-NEXT: ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge
15
+ ; CHECK-NEXT: v_mov_b32_e32 v4, 0
16
+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
17
+ ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
14
18
; CHECK-NEXT: s_mov_b64 s[4:5], 0
15
- ; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion
19
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
20
+ ; CHECK-NEXT: .LBB0_2: ; %loop-memcpy-expansion
16
21
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
17
- ; CHECK-NEXT: v_mov_b32_e32 v7, s5
18
- ; CHECK-NEXT: v_mov_b32_e32 v6, s4
19
- ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
20
- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v8
22
+ ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s4, v10
21
23
; CHECK-NEXT: s_add_u32 s4, s4, 16
24
+ ; CHECK-NEXT: v_mov_b32_e32 v13, s5
22
25
; CHECK-NEXT: s_addc_u32 s5, s5, 0
23
26
; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
24
- ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
27
+ ; CHECK-NEXT: v_addc_co_u32_e32 v13 , vcc, v11, v13 , vcc
25
28
; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
26
- ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
27
- ; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13]
28
- ; CHECK-NEXT: s_cbranch_vccz .LBB0_1
29
- ; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header
30
- ; CHECK-NEXT: s_branch .LBB0_4
31
- ; CHECK-NEXT: ; %bb.3:
32
- ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
29
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
30
+ ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
31
+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_2
32
+ ; CHECK-NEXT: ; %bb.3: ; %loop-memcpy-residual-header
33
33
; CHECK-NEXT: s_branch .LBB0_5
34
- ; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
35
- ; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3]
36
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_8
37
- ; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader
38
- ; CHECK-NEXT: s_add_u32 s4, s16, 32
39
- ; CHECK-NEXT: s_addc_u32 s5, s17, 0
40
- ; CHECK-NEXT: v_mov_b32_e32 v3, s5
41
- ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s4, v4
42
- ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
34
+ ; CHECK-NEXT: ; %bb.4:
35
+ ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
36
+ ; CHECK-NEXT: s_branch .LBB0_6
37
+ ; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
38
+ ; CHECK-NEXT: v_lshlrev_b64 v[2:3], 6, v[2:3]
39
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_9
40
+ ; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge
41
+ ; CHECK-NEXT: v_mov_b32_e32 v2, 0
42
+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
43
+ ; CHECK-NEXT: flat_load_ubyte v2, v[2:3]
44
+ ; CHECK-NEXT: s_add_u32 s6, s16, 32
45
+ ; CHECK-NEXT: s_addc_u32 s4, s17, 0
46
+ ; CHECK-NEXT: v_mov_b32_e32 v4, s4
47
+ ; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s6, v8
43
48
; CHECK-NEXT: s_mov_b64 s[4:5], 0
44
- ; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual
45
- ; CHECK-NEXT: s_add_u32 s6, 32, s4
46
- ; CHECK-NEXT: s_addc_u32 s7, 0, s5
47
- ; CHECK-NEXT: v_mov_b32_e32 v6, s6
48
- ; CHECK-NEXT: v_mov_b32_e32 v7, s7
49
- ; CHECK-NEXT: flat_load_ubyte v10, v[6:7]
50
- ; CHECK-NEXT: v_mov_b32_e32 v7, s5
51
- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v2
52
- ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc
49
+ ; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v9, vcc
50
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
51
+ ; CHECK-NEXT: ; %bb.7: ; %loop-memcpy-residual
52
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s5
53
+ ; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, s4, v3
53
54
; CHECK-NEXT: s_add_u32 s4, s4, 1
55
+ ; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v4, v6, vcc
54
56
; CHECK-NEXT: s_addc_u32 s5, s5, 0
55
- ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
56
- ; CHECK-NEXT: flat_store_byte v[6:7 ], v10
57
- ; CHECK-NEXT: ; %bb.7 :
58
- ; CHECK-NEXT: v_mov_b32_e32 v7, v5
59
- ; CHECK-NEXT: v_mov_b32_e32 v6, v4
60
- ; CHECK-NEXT: .LBB0_8 : ; %post-loop-memcpy-expansion
61
- ; CHECK-NEXT: v_and_b32_e32 v2 , 15, v0
57
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
58
+ ; CHECK-NEXT: flat_store_byte v[5:6 ], v2
59
+ ; CHECK-NEXT: ; %bb.8 :
60
+ ; CHECK-NEXT: v_mov_b32_e32 v2, v8
61
+ ; CHECK-NEXT: v_mov_b32_e32 v3, v9
62
+ ; CHECK-NEXT: .LBB0_9 : ; %post-loop-memcpy-expansion
63
+ ; CHECK-NEXT: v_and_b32_e32 v6 , 15, v0
62
64
; CHECK-NEXT: v_and_b32_e32 v0, -16, v0
63
- ; CHECK-NEXT: v_add_co_u32_e32 v4 , vcc, v6 , v0
64
- ; CHECK-NEXT: v_mov_b32_e32 v3 , 0
65
- ; CHECK-NEXT: v_addc_co_u32_e32 v5 , vcc, v7 , v1, vcc
65
+ ; CHECK-NEXT: v_add_co_u32_e32 v2 , vcc, v2 , v0
66
+ ; CHECK-NEXT: v_mov_b32_e32 v7 , 0
67
+ ; CHECK-NEXT: v_addc_co_u32_e32 v3 , vcc, v3 , v1, vcc
66
68
; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
67
- ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
68
- ; CHECK-NEXT: v_mov_b32_e32 v6, s17
69
- ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, s16, v4
70
- ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
71
- ; CHECK-NEXT: s_branch .LBB0_11
72
- ; CHECK-NEXT: .LBB0_9: ; %Flow14
73
- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
69
+ ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[6:7]
70
+ ; CHECK-NEXT: v_mov_b32_e32 v4, s17
71
+ ; CHECK-NEXT: v_mov_b32_e32 v8, 0
72
+ ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s16, v2
73
+ ; CHECK-NEXT: v_mov_b32_e32 v9, 0
74
+ ; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v4, v3, vcc
75
+ ; CHECK-NEXT: s_branch .LBB0_12
76
+ ; CHECK-NEXT: .LBB0_10: ; %Flow14
77
+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
74
78
; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
75
79
; CHECK-NEXT: s_mov_b64 s[8:9], 0
76
- ; CHECK-NEXT: .LBB0_10 : ; %Flow16
77
- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
80
+ ; CHECK-NEXT: .LBB0_11 : ; %Flow16
81
+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
78
82
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9]
79
- ; CHECK-NEXT: s_cbranch_vccz .LBB0_19
80
- ; CHECK-NEXT: .LBB0_11 : ; %while.cond
83
+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_20
84
+ ; CHECK-NEXT: .LBB0_12 : ; %while.cond
81
85
; CHECK-NEXT: ; =>This Loop Header: Depth=1
82
- ; CHECK-NEXT: ; Child Loop BB0_13 Depth 2
83
- ; CHECK-NEXT: ; Child Loop BB0_17 Depth 2
86
+ ; CHECK-NEXT: ; Child Loop BB0_14 Depth 2
87
+ ; CHECK-NEXT: ; Child Loop BB0_18 Depth 2
84
88
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
85
- ; CHECK-NEXT: s_cbranch_execz .LBB0_14
86
- ; CHECK-NEXT: ; %bb.12: ; %loop-memcpy-expansion2.preheader
87
- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
89
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_15
90
+ ; CHECK-NEXT: ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge
91
+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
92
+ ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[8:9]
88
93
; CHECK-NEXT: s_mov_b64 s[10:11], 0
89
94
; CHECK-NEXT: s_mov_b64 s[12:13], 0
90
- ; CHECK-NEXT: .LBB0_13: ; %loop-memcpy-expansion2
91
- ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
95
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
96
+ ; CHECK-NEXT: .LBB0_14: ; %loop-memcpy-expansion2
97
+ ; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
92
98
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
93
- ; CHECK-NEXT: v_mov_b32_e32 v6, s12
94
- ; CHECK-NEXT: v_mov_b32_e32 v7, s13
95
- ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
96
- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s12, v8
99
+ ; CHECK-NEXT: v_mov_b32_e32 v15, s13
100
+ ; CHECK-NEXT: v_add_co_u32_e32 v14, vcc, s12, v10
97
101
; CHECK-NEXT: s_add_u32 s12, s12, 16
98
- ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
102
+ ; CHECK-NEXT: v_addc_co_u32_e32 v15 , vcc, v11, v15 , vcc
99
103
; CHECK-NEXT: s_addc_u32 s13, s13, 0
100
104
; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
105
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
106
+ ; CHECK-NEXT: flat_store_dwordx4 v[14:15], v[2:5]
101
107
; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
102
- ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
103
- ; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13]
104
108
; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
105
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_13
106
- ; CHECK-NEXT: .LBB0_14 : ; %Flow15
107
- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
109
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_14
110
+ ; CHECK-NEXT: .LBB0_15 : ; %Flow15
111
+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
108
112
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
109
113
; CHECK-NEXT: s_mov_b64 s[8:9], -1
110
- ; CHECK-NEXT: s_cbranch_execz .LBB0_10
111
- ; CHECK-NEXT: ; %bb.15 : ; %loop-memcpy-residual-header5
112
- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
114
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_11
115
+ ; CHECK-NEXT: ; %bb.16 : ; %loop-memcpy-residual-header5
116
+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
113
117
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
114
118
; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9]
115
- ; CHECK-NEXT: s_cbranch_execz .LBB0_9
116
- ; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual4.preheader
117
- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
119
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_10
120
+ ; CHECK-NEXT: ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge
121
+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
122
+ ; CHECK-NEXT: flat_load_ubyte v2, v[8:9]
118
123
; CHECK-NEXT: s_mov_b64 s[12:13], 0
119
124
; CHECK-NEXT: s_mov_b64 s[14:15], 0
120
- ; CHECK-NEXT: .LBB0_17: ; %loop-memcpy-residual4
121
- ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
125
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
126
+ ; CHECK-NEXT: .LBB0_18: ; %loop-memcpy-residual4
127
+ ; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
122
128
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
123
- ; CHECK-NEXT: v_mov_b32_e32 v10, s15
124
- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v0
125
- ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v10, vcc
126
- ; CHECK-NEXT: flat_load_ubyte v11, v[6:7]
127
- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v4
129
+ ; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s14, v12
128
130
; CHECK-NEXT: s_add_u32 s14, s14, 1
131
+ ; CHECK-NEXT: v_mov_b32_e32 v4, s15
129
132
; CHECK-NEXT: s_addc_u32 s15, s15, 0
130
- ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3 ]
131
- ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v5, v10 , vcc
133
+ ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7 ]
134
+ ; CHECK-NEXT: v_addc_co_u32_e32 v4 , vcc, v13, v4 , vcc
132
135
; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
133
- ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
134
- ; CHECK-NEXT: flat_store_byte v[6:7 ], v11
136
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
137
+ ; CHECK-NEXT: flat_store_byte v[3:4 ], v2
135
138
; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13]
136
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_17
137
- ; CHECK-NEXT: ; %bb.18 : ; %Flow
138
- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
139
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_18
140
+ ; CHECK-NEXT: ; %bb.19 : ; %Flow
141
+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
139
142
; CHECK-NEXT: s_or_b64 exec, exec, s[12:13]
140
- ; CHECK-NEXT: s_branch .LBB0_9
141
- ; CHECK-NEXT: .LBB0_19 : ; %DummyReturnBlock
143
+ ; CHECK-NEXT: s_branch .LBB0_10
144
+ ; CHECK-NEXT: .LBB0_20 : ; %DummyReturnBlock
142
145
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
143
146
; CHECK-NEXT: s_setpc_b64 s[30:31]
144
147
entry:
0 commit comments