From abc02dffb79318ac48521480270aa5d87231e79f Mon Sep 17 00:00:00 2001 From: Spencer Comin Date: Tue, 25 Jun 2024 09:30:10 -0400 Subject: [PATCH] Z peephole: Remove unnecessary L(L)GFR A 32 bit load instruction followed by a zero/sign extend instruction can be replaced with an equivalent load and zero/sign extend instruction. Signed-off-by: Spencer Comin --- compiler/z/codegen/OMRPeephole.cpp | 167 +++++++++++++++++++++++++++-- compiler/z/codegen/OMRPeephole.hpp | 42 ++++++-- 2 files changed, 191 insertions(+), 18 deletions(-) diff --git a/compiler/z/codegen/OMRPeephole.cpp b/compiler/z/codegen/OMRPeephole.cpp index 6db7ba5c9d1..3f02f106e37 100644 --- a/compiler/z/codegen/OMRPeephole.cpp +++ b/compiler/z/codegen/OMRPeephole.cpp @@ -38,7 +38,7 @@ isBarrierToPeepHoleLookback(TR::Instruction* cursor) { if (cursor == NULL) return true; - + if (cursor->isLabel()) return true; @@ -198,6 +198,11 @@ OMR::Z::Peephole::performOnInstruction(TR::Instruction* cursor) performed |= performedCurrentPeephole; break; } + case TR::InstOpCode::LGFR: + { + performed |= self()->tryToRemoveRedundant32To64BitExtend(true); + break; + } case TR::InstOpCode::LHI: { performed |= self()->tryToReduceLHIToXR(); @@ -213,6 +218,11 @@ OMR::Z::Peephole::performOnInstruction(TR::Instruction* cursor) performed |= self()->tryToReduceLToLZRF(TR::InstOpCode::LLZRGF); break; } + case TR::InstOpCode::LLGFR: + { + performed |= self()->tryToRemoveRedundant32To64BitExtend(false); + break; + } case TR::InstOpCode::LR: { bool performedCurrentPeephole = false; @@ -254,7 +264,7 @@ OMR::Z::Peephole::performOnInstruction(TR::Instruction* cursor) if (!performedCurrentPeephole) performedCurrentPeephole |= self()->tryToRemoveDuplicateLoadRegister(); - + performed |= performedCurrentPeephole; break; } @@ -358,7 +368,7 @@ OMR::Z::Peephole::tryLoadStoreReduction(TR::InstOpCode::Mnemonic storeOpCode, ui return false; } - if (performTransformation(self()->comp(), "O^O S390 PEEPHOLE: Transforming load-store sequence at %p to MVC.", storeInst)) + if (performTransformation(self()->comp(), "O^O S390 PEEPHOLE: Transforming load-store sequence at %p to MVC.\n", storeInst)) { TR::DebugCounter::incStaticDebugCounter(self()->comp(), "z/peephole/load-store"); @@ -942,7 +952,7 @@ OMR::Z::Peephole::tryToReduceAGI() { if (performTransformation(self()->comp(), "O^O S390 PEEPHOLE: AGI LA reduction on [%p] from source load [%p].\n", current, cursor)) { - auto laInst = generateRXInstruction(self()->cg(), TR::InstOpCode::LA, cursor->getNode(), lgrTargetReg, + auto laInst = generateRXInstruction(self()->cg(), TR::InstOpCode::LA, cursor->getNode(), lgrTargetReg, generateS390MemoryReference(lgrSourceReg, 0, self()->cg()), cursor->getPrev()); self()->cg()->replaceInst(cursor, laInst); @@ -1328,7 +1338,7 @@ OMR::Z::Peephole::tryToReduceLLCToLLGC() memRef->resetMemRefUsedBefore(); auto llgcInst = generateRXInstruction(self()->cg(), TR::InstOpCode::LLGC, cursor->getNode(), llcTgtReg, memRef, cursor->getPrev()); self()->cg()->replaceInst(cursor, llgcInst); - + return true; } } @@ -1419,7 +1429,7 @@ OMR::Z::Peephole::tryToReduceLTRToCHI() TR::InstOpCode lgrOpCode = cursor->getOpCode(); if (lgrTargetReg == lgrSourceReg && - (lgrOpCode.getOpCodeValue() == TR::InstOpCode::LTR || + (lgrOpCode.getOpCodeValue() == TR::InstOpCode::LTR || lgrOpCode.getOpCodeValue() == TR::InstOpCode::LTGR)) { if (seekRegInFutureMemRef(cursor, 4, lgrTargetReg)) @@ -1528,7 +1538,7 @@ OMR::Z::Peephole::tryToRemoveDuplicateLoadRegister() windowSize = 0; setCC = setCC || current->getOpCode().setsCC(); useCC = useCC || current->getOpCode().readsCC(); - + rrInst->remove(); continue; @@ -1740,7 +1750,7 @@ OMR::Z::Peephole::tryToRemoveRedundantLA() if (performTransformation(self()->comp(), "O^O S390 PEEPHOLE: Removing redundant LA [%p].\n", cursor)) { cursor->remove(); - + return true; } } @@ -1828,7 +1838,7 @@ OMR::Z::Peephole::tryToRemoveRedundantLTR() TR::Register *lgrSourceReg = cursor->getRegisterOperand(2); TR::Register *lgrTargetReg = cursor->getRegisterOperand(1); - + if (lgrTargetReg == lgrSourceReg) { TR::Instruction *prevInst = cursor->getPrev(); @@ -1861,3 +1871,142 @@ OMR::Z::Peephole::tryToRemoveRedundantLTR() return false; } + +bool +OMR::Z::Peephole::tryToRemoveRedundant32To64BitExtend(bool isSigned) + { + static const bool disableRemoveExtend = feGetEnv("TR_DisableRemoveRedundant32to64Extend") != NULL; + if (disableRemoveExtend) + { + return false; + } + + int32_t windowSize = 0; + const int32_t maxWindowSize = 10; + + const char *lgfrMnemonicName = isSigned ? "LGFR" : "LLGFR"; + TR::Compilation *comp = self()->comp(); + TR::Instruction *lgfr = cursor; + TR::Register *lgfrReg = lgfr->getRegisterOperand(1); + + if (lgfrReg != lgfr->getRegisterOperand(2)) + return false; + + TR::Instruction *current = lgfr->getPrev(); + + while ((current != NULL) && + !isBarrierToPeepHoleLookback(current) && + windowSize < maxWindowSize) + { + TR::InstOpCode::Mnemonic curOpMnemonic = current->getOpCode().getMnemonic(); + + if (current->getNumRegisterOperands() > 0 && lgfrReg == current->getRegisterOperand(1)) + { + TR::MemoryReference *mr = NULL; + TR::Instruction *replacement = NULL; + switch (curOpMnemonic) + { + case TR::InstOpCode::L: + if (performTransformation(comp, "O^O S390 PEEPHOLE: Merging L [%p] and %s [%p] into %s.\n", + current, lgfrMnemonicName, lgfr, isSigned ? "LGF" : "LLGF")) + { + mr = current->getMemoryReference(); + mr->resetMemRefUsedBefore(); + replacement = generateRXInstruction(self()->cg(), isSigned ? TR::InstOpCode::LGF : TR::InstOpCode::LLGF, current->getNode(), lgfrReg, mr, current->getPrev()); + } + break; + case TR::InstOpCode::LH: + if (isSigned && performTransformation(comp, "O^O S390 PEEPHOLE: Merging LH [%p] and LGFR [%p] into LGH.\n", current, lgfr)) + { + mr = current->getMemoryReference(); + mr->resetMemRefUsedBefore(); + replacement = generateRXInstruction(self()->cg(), TR::InstOpCode::LGH, current->getNode(), lgfrReg, mr, current->getPrev()); + } + break; + case TR::InstOpCode::LLH: + if (performTransformation(comp, "O^O S390 PEEPHOLE: Merging LLH [%p] and %s [%p] into LLGH.\n", current, lgfrMnemonicName, lgfr)) + { + mr = current->getMemoryReference(); + mr->resetMemRefUsedBefore(); + replacement = generateRXInstruction(self()->cg(), TR::InstOpCode::LLGH, current->getNode(), lgfrReg, mr, current->getPrev()); + } + break; + case TR::InstOpCode::LB: + if (isSigned && performTransformation(comp, "O^O S390 PEEPHOLE: Merging LB [%p] and LGFR [%p] into LGB.\n", current, lgfr)) + { + mr = current->getMemoryReference(); + mr->resetMemRefUsedBefore(); + replacement = generateRXInstruction(self()->cg(), TR::InstOpCode::LGB, current->getNode(), lgfrReg, mr, current->getPrev()); + } + break; + case TR::InstOpCode::LLC: + if (performTransformation(comp, "O^O S390 PEEPHOLE: Merging LLC [%p] and %s [%p] into LLGC.\n", current, lgfrMnemonicName, lgfr)) + { + mr = current->getMemoryReference(); + mr->resetMemRefUsedBefore(); + replacement = generateRXInstruction(self()->cg(), TR::InstOpCode::LLGC, current->getNode(), lgfrReg, mr, current->getPrev()); + } + break; + + case TR::InstOpCode::XR: + // The following sequence of instructions + // XR GPR1, GPR1 ; Zero out bottom 32 bits of GPR1 + // LGFR/LLGFR GPR1, GPR1 ; Extend those zeros to all 64 bits of GPR1 + // Can be converted to + // XGR GPR1, GPR1 ; Zero out all 64 bits of GPR1 + if (lgfrReg == current->getRegisterOperand(2) && + performTransformation(comp, "O^O S390 PEEPHOLE: Merging XR [%p] and %s [%p] into XGR.\n", current, lgfrMnemonicName, lgfr)) + replacement = generateRRInstruction(self()->cg(), TR::InstOpCode::XGR, current->getNode(), lgfrReg, lgfrReg, current->getPrev()); + break; + case TR::InstOpCode::IILF: + if (performTransformation(comp, "O^O S390 PEEPHOLE: Merging IILF [%p] and %s [%p] into %s.\n", current, lgfrMnemonicName, lgfr, isSigned ? "LGFI" : "LLILF")) + replacement = generateRILInstruction(self()->cg(), isSigned ? TR::InstOpCode::LGFI : TR::InstOpCode::LLILF, current->getNode(), lgfrReg, toS390RILInstruction(current)->getSourceImmediate(), current->getPrev()); + break; + case TR::InstOpCode::LHI: + if (isSigned && performTransformation(comp, "O^O S390 PEEPHOLE: Merging LHI [%p] and LGFR [%p] into LGH.\n", current, lgfr)) + { + replacement = generateRIInstruction(self()->cg(), TR::InstOpCode::LGHI, current->getNode(), lgfrReg, toS390RIInstruction(current)->getSourceImmediate(), current->getPrev()); + } + else if (performTransformation(comp, "O^O S390 PEEPHOLE: Merging LHI [%p] and LLGFR [%p] into LLILF.\n", current, lgfr)) + { + // The following sequence of instructions: + // LHI GPR1, IMM ; sign extend IMM from 16 to 32 bits + // LLGFR GPR1, GPR1 ; zero extend from 32 to 64 bits + // Can be converted to + // LLILF GPR1, IMM' ; where IMM' is IMM sign extended from 16 to 32 bits + int16_t imm = toS390RIInstruction(current)->getSourceImmediate(); + replacement = generateRILInstruction(self()->cg(), TR::InstOpCode::LLILF, current->getNode(), lgfrReg, static_cast(imm), current->getPrev()); + } + break; + + case TR::InstOpCode::LR: + case TR::InstOpCode::LGR: + replacement = generateRRInstruction(self()->cg(), isSigned ? TR::InstOpCode::LGFR : TR::InstOpCode::LLGFR, current->getNode(), lgfrReg, current->getRegisterOperand(2), current->getPrev()); + break; + } + + if (replacement != NULL) + { + TR::DebugCounter::incStaticDebugCounter(comp, + TR::DebugCounter::debugCounterName(comp, "z/peephole/redundant32To64BitExtend/%s/%s/%s/(%s)", + current->getOpCode().getMnemonicName(), + lgfr->getOpCode().getMnemonicName(), + replacement->getOpCode().getMnemonicName(), + comp->signature())); + self()->cg()->replaceInst(current, replacement); + lgfr->remove(); + return true; + } + } + + // Ensure the extend acts on the correct register values + if (current->isDefRegister(lgfrReg)) + break; + + current = current->getPrev(); + + windowSize++; + } + + return false; + } diff --git a/compiler/z/codegen/OMRPeephole.hpp b/compiler/z/codegen/OMRPeephole.hpp index 0b41ad0f9e6..4e40d0be44e 100644 --- a/compiler/z/codegen/OMRPeephole.hpp +++ b/compiler/z/codegen/OMRPeephole.hpp @@ -70,7 +70,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryLoadStoreReduction(TR::InstOpCode::Mnemonic storeOpCode, uint16_t size); - + /** \brief * Tries to fold a load register instruction (\c LR or \c LGR) into a subsequent three-operand instruction if * possible. For example: @@ -92,7 +92,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryToFoldLoadRegisterIntoSubsequentInstruction(); - + /** \brief * Tries to forward a branch target if the branch instruction transfers control to another unconditional * branch instruction (i.e. a trampoline). For example: @@ -170,7 +170,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryToReduceAGI(); - + /** \brief * Tries to reduce a compare logical (\c CLR) insturction followed by a branch to a compare and branch * instruction (\c CLRJ) For example: @@ -190,7 +190,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryToReduceCLRToCLRJ(); - + /** \brief * Tries to reduce a simple branch conditional load of an immediate to a load immediate on condition branch- * less sequence. For example: @@ -218,7 +218,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryToReduceCRJLHIToLOCHI(TR::InstOpCode::Mnemonic compareMnemonic); - + /** \brief * Tries to reduce a load instruction (\c L) to an insert character under mask (\c ICM) instruction. This can * be done if following the load we have a load and test or a compare against certain immediates. For example: @@ -261,7 +261,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryToReduceLToLZRF(TR::InstOpCode::Mnemonic loadAndZeroRightMostByteMnemonic); - + /** \brief * Tries to reduce a load register instruction (\c LGR or \c LTGR) followed by a sign extension to \c LGFR. * For example: @@ -300,7 +300,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryToReduceLHIToXR(); - + /** \brief * Tries to reduce a load logical character instruction (\c LLC) followed by a zero extension to \c LLGC. * For example: @@ -320,7 +320,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryToReduceLLCToLLGC(); - + /** \brief * Tries to reduce a load register instruction (\c LR or \c LGR) and a future compare (\c CHI) against the * target register to \c LTR or \c LTGR. For example: @@ -347,7 +347,7 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole * true if the reduction was successful; false otherwise. */ bool tryToReduceLRCHIToLTR(); - + /** \brief * Tries to reduce a load and test register instruction (\c LTR or \c LTGR) to a compare halfword immediate if * the target register of the load is used in a future memory reference. This is an attempt to reduce the AGI @@ -480,6 +480,30 @@ class OMR_EXTENSIBLE Peephole : public OMR::Peephole */ bool tryToRemoveRedundantLTR(); + /** \brief + * Tries to remove redundant 32 to 64 bit extensions with \c LGFR or \c LLGFR on register + * values originating from 32 bit loads if the 32 bit load instruction can be replaced with + * an equivalent extending 32 bit load. For example: + * + * + * L R1,N(R2,R3) + * LGFR R1,R1 + * + * + * can be reduced to: + * + * + * LGF R1,N(R2,R3) + * + * + * \param isSigned + * true if operating on an LGFR instruction; false if LLGFR + * + * \return + * true if the reduction was successful; false otherwise + */ + bool tryToRemoveRedundant32To64BitExtend(bool isSigned); + private: /// The instruction cursor currently being processed by the peephole optimization