forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DAG: Fix assuming f16 is the only 16-bit fp type in concat vector com…
…bine (llvm#121637) This would see if there are mixed integer and FP types and pick an equivalently sized FP type to use as the vector element type, and only cast if there were mixed integers. We need to insert a cast if the types are mixed, which may include different FP types. Fixes llvm#121601
- Loading branch information
Showing
2 changed files
with
32 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s | ||
|
||
define <4 x float> @issue121601(bfloat %fptrunc) { | ||
; CHECK-LABEL: issue121601: | ||
; CHECK: ; %bb.0: ; %bb | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0 | ||
; CHECK-NEXT: v_mov_b32_e32 v1, v0 | ||
; CHECK-NEXT: v_mov_b32_e32 v2, 0 | ||
; CHECK-NEXT: v_mov_b32_e32 v3, 0 | ||
; CHECK-NEXT: s_setpc_b64 s[30:31] | ||
bb: | ||
%bitcast = bitcast bfloat %fptrunc to <1 x bfloat> | ||
%shufflevector = shufflevector <1 x bfloat> %bitcast, <1 x bfloat> zeroinitializer, <2 x i32> zeroinitializer | ||
%fpext = fpext <2 x bfloat> %shufflevector to <2 x float> | ||
%shufflevector1 = shufflevector <2 x float> %fpext, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | ||
ret <4 x float> %shufflevector1 | ||
} |