From 56f6a6f34e05290e5bb09c13ba23f4227a282bf3 Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Wed, 30 Apr 2014 21:07:24 +0000 Subject: [PATCH] [ARM64] Prevent bit extraction to be adjusted by following shift For pattern like ((x >> C1) & Mask) << C2, DAG combiner may convert it into (x >> (C1-C2)) & (Mask << C2), which makes pattern matching of ubfx more difficult. For example: Given %shr = lshr i64 %x, 4 %and = and i64 %shr, 15 %arrayidx = getelementptr inbounds [8 x [64 x i64]]* @arr, i64 0, %i64 2, i64 %and %0 = load i64* %arrayidx With current shift folding, it takes 3 instrs to compute base address: lsr x8, x0, #1 and x8, x8, #0x78 add x8, x9, x8 If using ubfx, it only needs 2 instrs: ubfx x8, x0, #4, #4 add x8, x9, x8, lsl #3 This fixes bug 19589 --- llvm/include/llvm/Target/TargetLowering.h | 9 +++++++++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +++ llvm/lib/Target/ARM64/ARM64ISelLowering.cpp | 15 +++++++++++++++ llvm/lib/Target/ARM64/ARM64ISelLowering.h | 3 +++ llvm/test/CodeGen/ARM64/bitfield-extract.ll | 13 +++++++++++++ 5 files changed, 43 insertions(+) diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index aadfca964aaf..d332c2d366ba 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -2017,6 +2017,15 @@ class TargetLowering : public TargetLoweringBase { /// virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + /// Return true if it is profitable to move a following shift through this + // node, adjusting any immediate operands as necessary to preserve semantics. + // This transformation may not be desirable if it disrupts a particularly + // auspicious target-specific tree (e.g. bitfield extractionon in AArch64). + // By default, it returns true. + virtual bool isDesirableToCommuteWithShift(const SDNode *N /*Op*/) const { + return true; + } + /// Return true if the target has native support for the specified value type /// and it is 'desirable' to use the type for the given node type. e.g. On x86 /// i16 is legal, but undesirable since i16 instruction encodings are longer diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 014e97d7b2a9..290f2a1ea278 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3867,6 +3867,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { return SDValue(); } + if (!TLI.isDesirableToCommuteWithShift(LHS)) + return SDValue(); + // Fold the constants, shifting the binop RHS by the shift amount. SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), diff --git a/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp b/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp index ccd80175faf1..6dd588c3705e 100644 --- a/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -5931,6 +5931,21 @@ ARM64TargetLowering::getScratchRegisters(CallingConv::ID) const { return ScratchRegs; } +bool ARM64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const { + EVT VT = N->getValueType(0); + // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine + // it with shift to let it be lowered to UBFX. + if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && + isa(N->getOperand(1))) { + uint64_t TruncMask = N->getConstantOperandVal(1); + if (isMask_64(TruncMask) && + N->getOperand(0).getOpcode() == ISD::SRL && + isa(N->getOperand(0)->getOperand(1))) + return false; + } + return true; +} + bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); diff --git a/llvm/lib/Target/ARM64/ARM64ISelLowering.h b/llvm/lib/Target/ARM64/ARM64ISelLowering.h index ddaf347bf1ab..96a32ea85c00 100644 --- a/llvm/lib/Target/ARM64/ARM64ISelLowering.h +++ b/llvm/lib/Target/ARM64/ARM64ISelLowering.h @@ -284,6 +284,9 @@ class ARM64TargetLowering : public TargetLowering { const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; + /// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask. + bool isDesirableToCommuteWithShift(const SDNode *N) const override; + /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, diff --git a/llvm/test/CodeGen/ARM64/bitfield-extract.ll b/llvm/test/CodeGen/ARM64/bitfield-extract.ll index 6de563c1eb60..3ea6d938e9d8 100644 --- a/llvm/test/CodeGen/ARM64/bitfield-extract.ll +++ b/llvm/test/CodeGen/ARM64/bitfield-extract.ll @@ -501,6 +501,19 @@ end: ret i80 %conv3 } +; Check if we can still catch UBFX when "AND" is used by SHL. +; CHECK-LABEL: fct21: +; CHECK: ubfx +@arr = external global [8 x [64 x i64]] +define i64 @fct21(i64 %x) { +entry: + %shr = lshr i64 %x, 4 + %and = and i64 %shr, 15 + %arrayidx = getelementptr inbounds [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and + %0 = load i64* %arrayidx, align 8 + ret i64 %0 +} + define i16 @test_ignored_rightbits(i32 %dst, i32 %in) { ; CHECK-LABEL: test_ignored_rightbits: