Skip to content

Commit 588d97e

Browse files
committed
[X86] getTargetVShiftNode - peek through any zext node
If the shift amount has been zero-extended, peek through as this might help us further canonicalize the shift amount. Fixes regression mentioned in rG147cfcbef1255ba2b4875b76708dab1a685085f5
1 parent bcc65fb commit 588d97e

File tree

2 files changed

+36
-30
lines changed

2 files changed

+36
-30
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25744,6 +25744,16 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
2574425744
ShAmt = DAG.getVectorShuffle(AmtVT, dl, ShAmt, DAG.getUNDEF(AmtVT), Mask);
2574525745
}
2574625746

25747+
// Peek through any zext node if we can get back to a 128-bit source.
25748+
if (AmtVT.getScalarSizeInBits() == 64 &&
25749+
(ShAmt.getOpcode() == ISD::ZERO_EXTEND ||
25750+
ShAmt.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
25751+
ShAmt.getOperand(0).getValueType().isSimple() &&
25752+
ShAmt.getOperand(0).getValueType().is128BitVector()) {
25753+
ShAmt = ShAmt.getOperand(0);
25754+
AmtVT = ShAmt.getSimpleValueType();
25755+
}
25756+
2574725757
// See if we can mask off the upper elements using the existing source node.
2574825758
// The shift uses the entire lower 64-bits of the amount vector, so no need to
2574925759
// do this for vXi64 types.
@@ -25784,10 +25794,13 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
2578425794
// Zero-extend bottom element to v2i64 vector type, either by extension or
2578525795
// shuffle masking.
2578625796
if (!IsMasked && AmtVT.getScalarSizeInBits() < 64) {
25787-
if (Subtarget.hasSSE41())
25797+
if (AmtVT == MVT::v4i32 && (ShAmt.getOpcode() == X86ISD::VBROADCAST ||
25798+
ShAmt.getOpcode() == X86ISD::VBROADCAST_LOAD)) {
25799+
ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, SDLoc(ShAmt), MVT::v4i32, ShAmt);
25800+
} else if (Subtarget.hasSSE41()) {
2578825801
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
2578925802
MVT::v2i64, ShAmt);
25790-
else {
25803+
} else {
2579125804
SDValue ByteShift = DAG.getTargetConstant(
2579225805
(128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
2579325806
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);

llvm/test/CodeGen/X86/vector-shift-ashr-256.ll

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2105,18 +2105,16 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
21052105
; AVX1-LABEL: PR52719:
21062106
; AVX1: # %bb.0:
21072107
; AVX1-NEXT: vmovd %edi, %xmm1
2108-
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
2109-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
2110-
; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm4
2111-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
2112-
; AVX1-NEXT: vpsrlq %xmm1, %xmm5, %xmm1
2113-
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
2114-
; AVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1
2115-
; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm3
2116-
; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
2117-
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
2118-
; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
2119-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2108+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
2109+
; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
2110+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2111+
; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
2112+
; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
2113+
; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
2114+
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
2115+
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
2116+
; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
2117+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
21202118
; AVX1-NEXT: retq
21212119
;
21222120
; AVX2-LABEL: PR52719:
@@ -2170,28 +2168,23 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
21702168
;
21712169
; X86-AVX1-LABEL: PR52719:
21722170
; X86-AVX1: # %bb.0:
2173-
; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
2174-
; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
2175-
; X86-AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
2176-
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
2177-
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
2178-
; X86-AVX1-NEXT: # xmm3 = mem[0,0]
2179-
; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4
2180-
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
2181-
; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm2
2182-
; X86-AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
2183-
; X86-AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2
2171+
; X86-AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2172+
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
2173+
; X86-AVX1-NEXT: # xmm2 = mem[0,0]
2174+
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
2175+
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
21842176
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
2177+
; X86-AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
2178+
; X86-AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
21852179
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
2186-
; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
2187-
; X86-AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
2188-
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2180+
; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
2181+
; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
2182+
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
21892183
; X86-AVX1-NEXT: retl
21902184
;
21912185
; X86-AVX2-LABEL: PR52719:
21922186
; X86-AVX2: # %bb.0:
2193-
; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
2194-
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
2187+
; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
21952188
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
21962189
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
21972190
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0

0 commit comments

Comments
 (0)