Skip to content

Commit

Permalink
[AMDGPU][True16][CodeGen] true16 codegen pat for fptrunc_round (#124044)
Browse files Browse the repository at this point in the history
true16 codegen pattern for fptrunc_round f32 to f16.

For mir test, split to preGFX11 and postGFX11. and add a true16 and a
fake16 test accordingly
  • Loading branch information
broxigarchen authored Jan 30, 2025
1 parent 7ceef1b commit a51798e
Show file tree
Hide file tree
Showing 7 changed files with 672 additions and 236 deletions.
23 changes: 23 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -228,16 +228,39 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
// Pseudo instructions used for @llvm.fptrunc.round. The final codegen is done
// in the ModeRegister pass.
let Uses = [MODE, EXEC] in {
let True16Predicate = NotHasTrue16BitInsts in
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round)>;

let True16Predicate = UseFakeTrue16Insts in
def FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round)>;

let True16Predicate = UseRealTrue16Insts in
// The operands of these pseudos should match V_CVT_F16_F32_t16_e64
def FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 : VPseudoInstSI <(outs VOPDstOperand_t16:$vdst),
(ins FP32InputMods:$src0_modifiers, VSrc_f32:$src0, Clamp0:$clamp, omod0:$omod, op_sel0:$op_sel, i32imm:$round)> {
let FPClamp = 1;
let ClampLo = 1;
let UseNamedOperandTable = 1;
}

def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VReg_64:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]

let True16Predicate = NotHasTrue16BitInsts in
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;

let True16Predicate = UseFakeTrue16Insts in
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $src0, (as_hw_round_mode $round))>;

let True16Predicate = UseRealTrue16Insts in
def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 $src0_modifiers, $src0, (as_hw_round_mode $round))>;

def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;

Expand Down
28 changes: 15 additions & 13 deletions llvm/lib/Target/AMDGPU/SIModeRegister.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
unsigned Opcode = MI.getOpcode();
if (TII->usesFPDPRounding(MI) ||
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||
Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
switch (Opcode) {
case AMDGPU::V_INTERP_P1LL_F16:
Expand All @@ -177,19 +179,19 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
unsigned Mode = MI.getOperand(2).getImm();
MI.removeOperand(2);
// Replacing the pseudo by a real instruction in place
if (TII->getSubtarget().hasTrue16BitInsts()) {
MachineBasicBlock &MBB = *MI.getParent();
MachineInstrBuilder B(*MBB.getParent(), MI);
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));
MachineOperand Src0 = MI.getOperand(1);
MI.removeOperand(1);
B.addImm(0); // src0_modifiers
B.add(Src0); // re-add src0 operand
B.addImm(0); // clamp
B.addImm(0); // omod
} else
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {
unsigned Mode = MI.getOperand(2).getImm();
MI.removeOperand(2);
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {
unsigned Mode = MI.getOperand(6).getImm();
MI.removeOperand(6);
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11

---
name: ftrunc_tonearest

body: |
bb.0:
liveins: $sgpr0
; GFX11-LABEL: name: ftrunc_tonearest
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 0, implicit $mode, implicit $exec
S_ENDPGM 0
...

---
name: ftrunc_upward

body: |
bb.0:
liveins: $sgpr0
; GFX11-LABEL: name: ftrunc_upward
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 1, implicit $mode, implicit $exec
S_ENDPGM 0
...

---
name: ftrunc_downward

body: |
bb.0:
liveins: $sgpr0
; GFX11-LABEL: name: ftrunc_downward
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e32 $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr1, 2, implicit $mode, implicit $exec
S_ENDPGM 0
...

---
name: ftrunc_towardzero

body: |
bb.0:
liveins: $sgpr0
; GFX11-LABEL: name: ftrunc_towardzero
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 3, implicit $mode, implicit $exec
S_ENDPGM 0
...
73 changes: 73 additions & 0 deletions llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11

---
name: ftrunc_tonearest

body: |
bb.0:
liveins: $sgpr0
; GFX11-LABEL: name: ftrunc_tonearest
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0
...

---
name: ftrunc_upward

body: |
bb.0:
liveins: $sgpr0
; GFX11-LABEL: name: ftrunc_upward
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 1, implicit $mode, implicit $exec
S_ENDPGM 0
...

---
name: ftrunc_downward

body: |
bb.0:
liveins: $sgpr0
; GFX11-LABEL: name: ftrunc_downward
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr0_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr0_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr1, 0, 0, 0, 2, implicit $mode, implicit $exec
S_ENDPGM 0
...

---
name: ftrunc_towardzero

body: |
bb.0:
liveins: $sgpr0
; GFX11-LABEL: name: ftrunc_towardzero
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 3, implicit $mode, implicit $exec
S_ENDPGM 0
...
32 changes: 0 additions & 32 deletions llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11

---
name: ftrunc_tonearest
Expand All @@ -15,13 +14,6 @@ body: |
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: ftrunc_tonearest
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 0, implicit $mode, implicit $exec
S_ENDPGM 0
Expand All @@ -39,14 +31,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: ftrunc_upward
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 1, implicit $mode, implicit $exec
S_ENDPGM 0
Expand All @@ -64,14 +48,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr0 = V_CVT_F16_F32_e32 $vgpr1, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: ftrunc_downward
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr1, 2, implicit $mode, implicit $exec
S_ENDPGM 0
Expand All @@ -89,14 +65,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: ftrunc_towardzero
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 3, implicit $mode, implicit $exec
S_ENDPGM 0
Expand Down
Loading

0 comments on commit a51798e

Please sign in to comment.