From d34f7ead886aaaca50f672c47e4f97e078d574db Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 6 Jan 2025 10:38:54 +0700 Subject: [PATCH] DAG: Fix assuming f16 is the only 16-bit fp type in concat vector combine (#121637) This would see if there are mixed integer and FP types and pick an equivalently sized FP type to use as the vector element type, and only cast if there were mixed integers. We need to insert a cast if the types are mixed, which may include different FP types. Fixes #121601 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 30 ++++++++----------- ...1601-combine-concat-vectors-assumes-f16.ll | 19 ++++++++++++ 2 files changed, 32 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9ec3310b5219b7..e89e7efa98c1af 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24308,8 +24308,8 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); // Keep track of what we encounter. - bool AnyInteger = false; - bool AnyFP = false; + EVT AnyFPVT; + for (const SDValue &Op : N->ops()) { if (ISD::BITCAST == Op.getOpcode() && !Op.getOperand(0).getValueType().isVector()) @@ -24323,27 +24323,23 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { // If it's neither, bail out, it could be something weird like x86mmx. EVT LastOpVT = Ops.back().getValueType(); if (LastOpVT.isFloatingPoint()) - AnyFP = true; - else if (LastOpVT.isInteger()) - AnyInteger = true; - else + AnyFPVT = LastOpVT; + else if (!LastOpVT.isInteger()) return SDValue(); } // If any of the operands is a floating point scalar bitcast to a vector, // use floating point types throughout, and bitcast everything. // Replace UNDEFs by another scalar UNDEF node, of the final desired type. - if (AnyFP) { - SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); - if (AnyInteger) { - for (SDValue &Op : Ops) { - if (Op.getValueType() == SVT) - continue; - if (Op.isUndef()) - Op = DAG.getNode(ISD::UNDEF, DL, SVT); - else - Op = DAG.getBitcast(SVT, Op); - } + if (AnyFPVT != EVT()) { + SVT = AnyFPVT; + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.isUndef()) + Op = DAG.getNode(ISD::UNDEF, DL, SVT); + else + Op = DAG.getBitcast(SVT, Op); } } diff --git a/llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll b/llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll new file mode 100644 index 00000000000000..1a87887e28d72e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s + +define <4 x float> @issue121601(bfloat %fptrunc) { +; CHECK-LABEL: issue121601: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] +bb: + %bitcast = bitcast bfloat %fptrunc to <1 x bfloat> + %shufflevector = shufflevector <1 x bfloat> %bitcast, <1 x bfloat> zeroinitializer, <2 x i32> zeroinitializer + %fpext = fpext <2 x bfloat> %shufflevector to <2 x float> + %shufflevector1 = shufflevector <2 x float> %fpext, <2 x float> zeroinitializer, <4 x i32> + ret <4 x float> %shufflevector1 +}