Skip to content

Commit

Permalink
[MachineLICM] Use RegisterClassInfo::getRegPressureSetLimit (llvm#1…
Browse files Browse the repository at this point in the history
…19826)

`RegisterClassInfo::getRegPressureSetLimit` is a wrapper of
`TargetRegisterInfo::getRegPressureSetLimit` with some logics to
adjust the limit by removing reserved registers.

It seems that we shouldn't use
`TargetRegisterInfo::getRegPressureSetLimit`
directly, just like the comment "This limit must be adjusted
dynamically for reserved registers" said.

Separate from llvm#118787
  • Loading branch information
wangpc-pp authored Jan 9, 2025
1 parent e3e26dc commit b4e17d4
Show file tree
Hide file tree
Showing 50 changed files with 32,480 additions and 32,047 deletions.
4 changes: 3 additions & 1 deletion llvm/lib/CodeGen/MachineLICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ namespace {
const TargetRegisterInfo *TRI = nullptr;
const MachineFrameInfo *MFI = nullptr;
MachineRegisterInfo *MRI = nullptr;
RegisterClassInfo RegClassInfo;
TargetSchedModel SchedModel;
bool PreRegAlloc = false;
bool HasProfileData = false;
Expand Down Expand Up @@ -392,6 +393,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
MFI = &MF.getFrameInfo();
MRI = &MF.getRegInfo();
SchedModel.init(&ST);
RegClassInfo.runOnMachineFunction(MF);

HasProfileData = MF.getFunction().hasProfileData();

Expand All @@ -408,7 +410,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
std::fill(RegPressure.begin(), RegPressure.end(), 0);
RegLimit.resize(NumRPS);
for (unsigned i = 0, e = NumRPS; i != e; ++i)
RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
RegLimit[i] = RegClassInfo.getRegPressureSetLimit(i);
}

if (HoistConstLoads)
Expand Down
670 changes: 345 additions & 325 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll

Large diffs are not rendered by default.

670 changes: 345 additions & 325 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll

Large diffs are not rendered by default.

44 changes: 20 additions & 24 deletions llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -557,11 +557,11 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_mul_hi_u32 s9, s0, s7
; GFX908-NEXT: s_mul_i32 s0, s0, s7
; GFX908-NEXT: s_add_i32 s1, s9, s1
; GFX908-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
; GFX908-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
; GFX908-NEXT: s_branch .LBB3_2
; GFX908-NEXT: .LBB3_1: ; %Flow20
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[0:1]
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[14:15]
; GFX908-NEXT: s_cbranch_vccz .LBB3_12
; GFX908-NEXT: .LBB3_2: ; %bb9
; GFX908-NEXT: ; =>This Loop Header: Depth=1
Expand All @@ -571,17 +571,15 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: ; %bb.3: ; %bb14
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
; GFX908-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
; GFX908-NEXT: s_mov_b32 s7, s6
; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1]
; GFX908-NEXT: v_mov_b32_e32 v4, s6
; GFX908-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v6
; GFX908-NEXT: v_mov_b32_e32 v6, s6
; GFX908-NEXT: v_mov_b32_e32 v9, s7
; GFX908-NEXT: v_mov_b32_e32 v5, s7
; GFX908-NEXT: v_mov_b32_e32 v7, s7
; GFX908-NEXT: v_mov_b32_e32 v8, s6
; GFX908-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
; GFX908-NEXT: v_cmp_lt_i64_e64 s[14:15], s[4:5], 0
; GFX908-NEXT: v_cmp_gt_i64_e64 s[16:17], s[4:5], -1
; GFX908-NEXT: v_mov_b32_e32 v11, v5
; GFX908-NEXT: s_mov_b64 s[18:19], s[10:11]
; GFX908-NEXT: v_mov_b32_e32 v10, v4
Expand All @@ -601,9 +599,9 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
; GFX908-NEXT: v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX908-NEXT: s_add_u32 s18, s18, s14
; GFX908-NEXT: s_add_u32 s18, s18, s0
; GFX908-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[2:3]
; GFX908-NEXT: s_addc_u32 s19, s19, s15
; GFX908-NEXT: s_addc_u32 s19, s19, s1
; GFX908-NEXT: s_mov_b64 s[20:21], 0
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
; GFX908-NEXT: s_cbranch_vccz .LBB3_9
Expand All @@ -622,7 +620,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: ds_read_b64 v[12:13], v19
; GFX908-NEXT: ds_read_b64 v[14:15], v0
; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1]
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[16:17]
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_cbranch_vccnz .LBB3_7
; GFX908-NEXT: ; %bb.6: ; %bb51
Expand Down Expand Up @@ -650,7 +648,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_mov_b64 s[20:21], -1
; GFX908-NEXT: s_branch .LBB3_4
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
; GFX908-NEXT: s_mov_b64 s[20:21], s[16:17]
; GFX908-NEXT: s_mov_b64 s[20:21], s[14:15]
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21]
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
Expand All @@ -661,7 +659,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_xor_b64 s[16:17], s[20:21], -1
; GFX908-NEXT: .LBB3_10: ; %Flow19
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: s_mov_b64 s[0:1], -1
; GFX908-NEXT: s_mov_b64 s[14:15], -1
; GFX908-NEXT: s_and_b64 vcc, exec, s[16:17]
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
; GFX908-NEXT: ; %bb.11: ; %bb12
Expand All @@ -670,7 +668,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_addc_u32 s5, s5, 0
; GFX908-NEXT: s_add_u32 s10, s10, s12
; GFX908-NEXT: s_addc_u32 s11, s11, s13
; GFX908-NEXT: s_mov_b64 s[0:1], 0
; GFX908-NEXT: s_mov_b64 s[14:15], 0
; GFX908-NEXT: s_branch .LBB3_1
; GFX908-NEXT: .LBB3_12: ; %DummyReturnBlock
; GFX908-NEXT: s_endpgm
Expand Down Expand Up @@ -720,11 +718,11 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_mul_hi_u32 s9, s0, s7
; GFX90A-NEXT: s_mul_i32 s0, s0, s7
; GFX90A-NEXT: s_add_i32 s1, s9, s1
; GFX90A-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
; GFX90A-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
; GFX90A-NEXT: s_branch .LBB3_2
; GFX90A-NEXT: .LBB3_1: ; %Flow20
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[0:1]
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[14:15]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_12
; GFX90A-NEXT: .LBB3_2: ; %bb9
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
Expand All @@ -734,14 +732,12 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: ; %bb.3: ; %bb14
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
; GFX90A-NEXT: s_mov_b32 s7, s6
; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1]
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v8
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[14:15], s[4:5], 0
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[16:17], s[4:5], -1
; GFX90A-NEXT: s_mov_b64 s[18:19], s[10:11]
; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -760,8 +756,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
; GFX90A-NEXT: v_add_co_u32_sdwa v4, vcc, v4, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
; GFX90A-NEXT: s_add_u32 s18, s18, s14
; GFX90A-NEXT: s_addc_u32 s19, s19, s15
; GFX90A-NEXT: s_add_u32 s18, s18, s0
; GFX90A-NEXT: s_addc_u32 s19, s19, s1
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[4:5]
; GFX90A-NEXT: s_mov_b64 s[20:21], 0
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
Expand All @@ -781,7 +777,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: ds_read_b64 v[14:15], v19
; GFX90A-NEXT: ds_read_b64 v[16:17], v0
; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1]
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[16:17]
; GFX90A-NEXT: ; kill: killed $sgpr20 killed $sgpr21
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_7
Expand All @@ -802,7 +798,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_mov_b64 s[20:21], -1
; GFX90A-NEXT: s_branch .LBB3_4
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
; GFX90A-NEXT: s_mov_b64 s[20:21], s[16:17]
; GFX90A-NEXT: s_mov_b64 s[20:21], s[14:15]
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
Expand All @@ -813,7 +809,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_xor_b64 s[16:17], s[20:21], -1
; GFX90A-NEXT: .LBB3_10: ; %Flow19
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: s_mov_b64 s[0:1], -1
; GFX90A-NEXT: s_mov_b64 s[14:15], -1
; GFX90A-NEXT: s_and_b64 vcc, exec, s[16:17]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
; GFX90A-NEXT: ; %bb.11: ; %bb12
Expand All @@ -822,7 +818,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_addc_u32 s5, s5, 0
; GFX90A-NEXT: s_add_u32 s10, s10, s12
; GFX90A-NEXT: s_addc_u32 s11, s11, s13
; GFX90A-NEXT: s_mov_b64 s[0:1], 0
; GFX90A-NEXT: s_mov_b64 s[14:15], 0
; GFX90A-NEXT: s_branch .LBB3_1
; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock
; GFX90A-NEXT: s_endpgm
Expand Down
Loading

0 comments on commit b4e17d4

Please sign in to comment.