Skip to content

Commit

Permalink
[AMDGPU][True16][CodeGen] true16 codegen pattern for v_med3_u/i16 (ll…
Browse files Browse the repository at this point in the history
…vm#121850)

True16 codegen pattern for v_med3_u/i16
  • Loading branch information
broxigarchen authored Jan 7, 2025
1 parent 478648e commit 49357b2
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 0 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3726,6 +3726,10 @@ let True16Predicate = NotHasTrue16BitInsts in {
defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, VSrc_b16>;
defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax, VSrc_b16>;
}
let True16Predicate = UseRealTrue16Insts in {
defm : Int16Med3Pat<V_MED3_I16_t16_e64, smin, smax, VSrcT_b16>;
defm : Int16Med3Pat<V_MED3_U16_t16_e64, umin, umax, VSrcT_b16>;
}
let True16Predicate = UseFakeTrue16Insts in {
defm : Int16Med3Pat<V_MED3_I16_fake16_e64, smin, smax, VSrc_b16>;
defm : Int16Med3Pat<V_MED3_U16_fake16_e64, umin, umax, VSrc_b16>;
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AMDGPU/smed3.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s

declare i32 @llvm.amdgcn.workitem.id.x() #0

Expand Down Expand Up @@ -98,6 +100,8 @@ declare i64 @llvm.smin.i64(i64, i64)
; VI: v_max_i16_e32 [[MAX:v[0-9]]], 12, {{v[0-9]}}
; VI: v_min_i16_e32 {{v[0-9]}}, 17, [[MAX]]
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, 12, 17
; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
define amdgpu_kernel void @v_test_smed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
Expand Down Expand Up @@ -686,6 +690,8 @@ bb:
; VI: v_max_i16

; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @v_test_smed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -707,6 +713,8 @@ bb:

; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1:
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}

define amdgpu_kernel void @v_test_smed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
bb:
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AMDGPU/umed3.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-TRUE16 %s

declare i32 @llvm.amdgcn.workitem.id.x() #0

Expand Down Expand Up @@ -84,6 +86,8 @@ define amdgpu_kernel void @v_test_umed3_r_i_i_i64(ptr addrspace(1) %out, ptr add
; VI: v_max_u16_e32 [[MAX:v[0-9]]], 12, {{v[0-9]}}
; VI: v_min_u16_e32 {{v[0-9]}}, 17, [[MAX]]
; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
; GFX11-TRUE16: v_med3_u16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, 12, 17
; GFX11-FAKE16: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
define amdgpu_kernel void @v_test_umed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
Expand Down Expand Up @@ -707,6 +711,8 @@ bb:
; VI: v_max_u16

; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX11-TRUE16: v_med3_u16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
; GFX11-FAKE16: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @v_test_umed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -728,6 +734,8 @@ bb:

; GCN-LABEL: {{^}}v_test_umed3_i16_pat_1:
; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX11-TRUE16: v_med3_u16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
; GFX11-FAKE16: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @v_test_umed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down

0 comments on commit 49357b2

Please sign in to comment.