Skip to content

Commit c669827

Browse files
mingmingl-llvmmemfrob
authored and
memfrob
committed
[Peephole-opt][X86] Enhance peephole opt to see through SUBREG_TO_REG
(following AND) and eliminates redundant TEST instruction. Differential Revision: https://reviews.llvm.org/D124118
1 parent 495f8b2 commit c669827

File tree

2 files changed

+311
-0
lines changed

2 files changed

+311
-0
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,16 @@
2525
#include "llvm/CodeGen/MachineConstantPool.h"
2626
#include "llvm/CodeGen/MachineDominators.h"
2727
#include "llvm/CodeGen/MachineFrameInfo.h"
28+
#include "llvm/CodeGen/MachineInstr.h"
2829
#include "llvm/CodeGen/MachineInstrBuilder.h"
2930
#include "llvm/CodeGen/MachineModuleInfo.h"
31+
#include "llvm/CodeGen/MachineOperand.h"
3032
#include "llvm/CodeGen/MachineRegisterInfo.h"
3133
#include "llvm/CodeGen/StackMaps.h"
3234
#include "llvm/IR/DebugInfoMetadata.h"
3335
#include "llvm/IR/DerivedTypes.h"
3436
#include "llvm/IR/Function.h"
37+
#include "llvm/IR/InstrTypes.h"
3538
#include "llvm/MC/MCAsmInfo.h"
3639
#include "llvm/MC/MCExpr.h"
3740
#include "llvm/MC/MCInst.h"
@@ -964,6 +967,101 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
964967
return ShAmt < 4 && ShAmt > 0;
965968
}
966969

970+
static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
971+
MachineInstr &CmpValDefInstr,
972+
const MachineRegisterInfo *MRI,
973+
MachineInstr **AndInstr,
974+
const TargetRegisterInfo *TRI,
975+
bool &NoSignFlag, bool &ClearsOverflowFlag) {
976+
if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
977+
return false;
978+
979+
if (CmpInstr.getOpcode() != X86::TEST64rr)
980+
return false;
981+
982+
// CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
983+
// guarantees that it's analyzable only if two registers are identical.
984+
assert(
985+
(CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
986+
"CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
987+
"requires two reg operands are the same.");
988+
989+
// Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
990+
// `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
991+
// if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is
992+
// redundant.
993+
assert(
994+
(MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
995+
"Caller guarantees that TEST64rr is a user of SUBREG_TO_REG.");
996+
997+
// As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically
998+
// 0.
999+
if (CmpValDefInstr.getOperand(1).getImm() != 0)
1000+
return false;
1001+
1002+
// As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
1003+
// sub_32bit or sub_xmm.
1004+
if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
1005+
return false;
1006+
1007+
MachineInstr *VregDefInstr =
1008+
MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
1009+
1010+
assert(VregDefInstr && "Must have a definition (SSA)");
1011+
1012+
// Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB
1013+
// to simplify the subsequent analysis.
1014+
//
1015+
// FIXME: If `VregDefInstr->getParent()` is the only predecessor of
1016+
// `CmpValDefInstr.getParent()`, this could be handled.
1017+
if (VregDefInstr->getParent() != CmpValDefInstr.getParent())
1018+
return false;
1019+
1020+
if (X86::isAND(VregDefInstr->getOpcode())) {
1021+
// Get a sequence of instructions like
1022+
// %reg = and* ... // Set EFLAGS
1023+
// ... // EFLAGS not changed
1024+
// %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
1025+
// test64rr %extended_reg, %extended_reg, implicit-def $eflags
1026+
//
1027+
// If subsequent readers use a subset of bits that don't change
1028+
// after `and*` instructions, it's likely that the test64rr could
1029+
// be optimized away.
1030+
for (const MachineInstr &Instr :
1031+
make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)),
1032+
MachineBasicBlock::iterator(CmpValDefInstr))) {
1033+
// There are instructions between 'VregDefInstr' and
1034+
// 'CmpValDefInstr' that modifies EFLAGS.
1035+
if (Instr.modifiesRegister(X86::EFLAGS, TRI))
1036+
return false;
1037+
}
1038+
1039+
*AndInstr = VregDefInstr;
1040+
1041+
// AND instruction will essentially update SF and clear OF, so
1042+
// NoSignFlag should be false in the sense that SF is modified by `AND`.
1043+
//
1044+
// However, the implementation artifically sets `NoSignFlag` to true
1045+
// to poison the SF bit; that is to say, if SF is looked at later, the
1046+
// optimization (to erase TEST64rr) will be disabled.
1047+
//
1048+
// The reason to poison SF bit is that SF bit value could be different
1049+
// in the `AND` and `TEST` operation; signed bit is not known for `AND`,
1050+
// and is known to be 0 as a result of `TEST64rr`.
1051+
//
1052+
// FIXME: As opposed to poisoning the SF bit direclty, consider peeking into
1053+
// the AND instruction and using the static information to guide peephole optimization if possible.
1054+
// For example, it's possible to fold a conditional move into a copy
1055+
// if the relevant EFLAG bits could be deduced from an immediate operand of and operation.
1056+
//
1057+
NoSignFlag = true;
1058+
// ClearsOverflowFlag is true for AND operation (no surprise).
1059+
ClearsOverflowFlag = true;
1060+
return true;
1061+
}
1062+
return false;
1063+
}
1064+
9671065
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
9681066
unsigned Opc, bool AllowSP, Register &NewSrc,
9691067
bool &isKill, MachineOperand &ImplicitOp,
@@ -4226,6 +4324,23 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
42264324
MI = &Inst;
42274325
break;
42284326
}
4327+
4328+
// Look back for the following pattern, in which case the test64rr
4329+
// instruction could be erased.
4330+
//
4331+
// Example:
4332+
// %reg = and32ri %in_reg, 5
4333+
// ... // EFLAGS not changed.
4334+
// %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
4335+
// test64rr %src_reg, %src_reg, implicit-def $eflags
4336+
MachineInstr *AndInstr = nullptr;
4337+
if (IsCmpZero &&
4338+
findRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI,
4339+
NoSignFlag, ClearsOverflowFlag)) {
4340+
assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode()));
4341+
MI = AndInstr;
4342+
break;
4343+
}
42294344
// Cannot find other candidates before definition of SrcReg.
42304345
return false;
42314346
}
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -o - %s -mtriple=x86_64-unknown-linux-gnu --run-pass=peephole-opt | FileCheck %s
3+
4+
# Test that TEST64rr is erased in `test_erased`, and kept in `test_not_erased_when_sf_used`
5+
# and `test_not_erased_when_eflags_change`.
6+
7+
--- |
8+
; ModuleID = 'tmp.ll'
9+
source_filename = "tmp.ll"
10+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
11+
12+
define i64 @test_erased(ptr %0, i64 %1, i64 %2) {
13+
%4 = load i64, ptr %0, align 8
14+
%5 = and i64 %4, 3
15+
%6 = icmp eq i64 %5, 0
16+
%7 = select i1 %6, i64 %1, i64 %5
17+
store i64 %7, ptr %0, align 8
18+
ret i64 %5
19+
}
20+
21+
define i64 @test_not_erased_when_sf_used(ptr %0, i64 %1, i64 %2, i64 %3) {
22+
%5 = load i64, ptr %0, align 8
23+
%6 = and i64 %5, 3
24+
%7 = icmp slt i64 %6, 0
25+
%8 = select i1 %7, i64 %1, i64 %6
26+
store i64 %8, ptr %0, align 8
27+
ret i64 %5
28+
}
29+
30+
define void @test_not_erased_when_eflags_change(ptr %0, i64 %1, i64 %2, i64 %3, ptr %4) {
31+
%6 = load i64, ptr %0, align 8
32+
%7 = and i64 %6, 3
33+
%8 = xor i64 %3, 5
34+
%9 = icmp eq i64 %7, 0
35+
%10 = select i1 %9, i64 %1, i64 %7
36+
store i64 %10, ptr %0, align 8
37+
store i64 %8, ptr %4, align 8
38+
ret void
39+
}
40+
41+
...
42+
---
43+
name: test_erased
44+
alignment: 16
45+
tracksDebugUserValues: false
46+
registers:
47+
- { id: 0, class: gr64, preferred-register: '' }
48+
- { id: 1, class: gr64, preferred-register: '' }
49+
- { id: 2, class: gr64, preferred-register: '' }
50+
- { id: 3, class: gr64, preferred-register: '' }
51+
- { id: 4, class: gr32, preferred-register: '' }
52+
- { id: 5, class: gr32, preferred-register: '' }
53+
- { id: 6, class: gr64, preferred-register: '' }
54+
- { id: 7, class: gr64, preferred-register: '' }
55+
liveins:
56+
- { reg: '$rdi', virtual-reg: '%0' }
57+
- { reg: '$rsi', virtual-reg: '%1' }
58+
frameInfo:
59+
maxAlignment: 1
60+
machineFunctionInfo: {}
61+
body: |
62+
bb.0 (%ir-block.3):
63+
liveins: $rdi, $rsi
64+
65+
; CHECK-LABEL: name: test_erased
66+
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
67+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
68+
; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
69+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit
70+
; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY2]], 3, implicit-def $eflags
71+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit
72+
; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY]], 4, implicit $eflags
73+
; CHECK-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0)
74+
; CHECK-NEXT: $rax = COPY [[SUBREG_TO_REG]]
75+
; CHECK-NEXT: RET 0, $rax
76+
%1:gr64 = COPY $rsi
77+
%0:gr64 = COPY $rdi
78+
%3:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
79+
%4:gr32 = COPY %3.sub_32bit
80+
%5:gr32 = AND32ri8 %4, 3, implicit-def dead $eflags
81+
%6:gr64 = SUBREG_TO_REG 0, killed %5, %subreg.sub_32bit
82+
TEST64rr %6, %6, implicit-def $eflags
83+
%7:gr64 = CMOV64rr %6, %1, 4, implicit $eflags
84+
MOV64mr %0, 1, $noreg, 0, $noreg, killed %7 :: (store (s64) into %ir.0)
85+
$rax = COPY %6
86+
RET 0, $rax
87+
88+
...
89+
---
90+
name: test_not_erased_when_sf_used
91+
alignment: 16
92+
tracksDebugUserValues: false
93+
registers:
94+
- { id: 0, class: gr64, preferred-register: '' }
95+
- { id: 1, class: gr64, preferred-register: '' }
96+
- { id: 2, class: gr64, preferred-register: '' }
97+
- { id: 3, class: gr64, preferred-register: '' }
98+
- { id: 4, class: gr64, preferred-register: '' }
99+
- { id: 5, class: gr32, preferred-register: '' }
100+
- { id: 6, class: gr32, preferred-register: '' }
101+
- { id: 7, class: gr64, preferred-register: '' }
102+
- { id: 8, class: gr64, preferred-register: '' }
103+
liveins:
104+
- { reg: '$rdi', virtual-reg: '%0' }
105+
- { reg: '$rsi', virtual-reg: '%1' }
106+
frameInfo:
107+
maxAlignment: 1
108+
machineFunctionInfo: {}
109+
body: |
110+
bb.0 (%ir-block.4):
111+
liveins: $rdi, $rsi
112+
113+
; CHECK-LABEL: name: test_not_erased_when_sf_used
114+
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
115+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
116+
; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
117+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit
118+
; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY2]], 3, implicit-def dead $eflags
119+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit
120+
; CHECK-NEXT: TEST64rr [[SUBREG_TO_REG]], [[SUBREG_TO_REG]], implicit-def $eflags
121+
; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY]], 8, implicit $eflags
122+
; CHECK-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0)
123+
; CHECK-NEXT: $rax = COPY [[MOV64rm]]
124+
; CHECK-NEXT: RET 0, $rax
125+
%1:gr64 = COPY $rsi
126+
%0:gr64 = COPY $rdi
127+
%4:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
128+
%5:gr32 = COPY %4.sub_32bit
129+
%6:gr32 = AND32ri8 %5, 3, implicit-def dead $eflags
130+
%7:gr64 = SUBREG_TO_REG 0, killed %6, %subreg.sub_32bit
131+
TEST64rr %7, %7, implicit-def $eflags
132+
%8:gr64 = CMOV64rr %7, %1, 8, implicit $eflags
133+
MOV64mr %0, 1, $noreg, 0, $noreg, killed %8 :: (store (s64) into %ir.0)
134+
$rax = COPY %4
135+
RET 0, $rax
136+
137+
...
138+
---
139+
name: test_not_erased_when_eflags_change
140+
alignment: 16
141+
tracksDebugUserValues: false
142+
registers:
143+
- { id: 0, class: gr64, preferred-register: '' }
144+
- { id: 1, class: gr64, preferred-register: '' }
145+
- { id: 2, class: gr64, preferred-register: '' }
146+
- { id: 3, class: gr64, preferred-register: '' }
147+
- { id: 4, class: gr64, preferred-register: '' }
148+
- { id: 5, class: gr64, preferred-register: '' }
149+
- { id: 6, class: gr32, preferred-register: '' }
150+
- { id: 7, class: gr32, preferred-register: '' }
151+
- { id: 8, class: gr64, preferred-register: '' }
152+
- { id: 9, class: gr64, preferred-register: '' }
153+
- { id: 10, class: gr64, preferred-register: '' }
154+
liveins:
155+
- { reg: '$rdi', virtual-reg: '%0' }
156+
- { reg: '$rsi', virtual-reg: '%1' }
157+
- { reg: '$rcx', virtual-reg: '%3' }
158+
- { reg: '$r8', virtual-reg: '%4' }
159+
frameInfo:
160+
maxAlignment: 1
161+
machineFunctionInfo: {}
162+
body: |
163+
bb.0 (%ir-block.5):
164+
liveins: $rdi, $rsi, $rcx, $r8
165+
166+
; CHECK-LABEL: name: test_not_erased_when_eflags_change
167+
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $r8
168+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rcx
169+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rsi
170+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rdi
171+
; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY3]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
172+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit
173+
; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY4]], 3, implicit-def dead $eflags
174+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit
175+
; CHECK-NEXT: [[XOR64ri8_:%[0-9]+]]:gr64 = XOR64ri8 [[COPY1]], 5, implicit-def dead $eflags
176+
; CHECK-NEXT: TEST64rr [[SUBREG_TO_REG]], [[SUBREG_TO_REG]], implicit-def $eflags
177+
; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY2]], 4, implicit $eflags
178+
; CHECK-NEXT: MOV64mr [[COPY3]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0)
179+
; CHECK-NEXT: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, killed [[XOR64ri8_]] :: (store (s64) into %ir.4)
180+
; CHECK-NEXT: RET 0
181+
%4:gr64 = COPY $r8
182+
%3:gr64 = COPY $rcx
183+
%1:gr64 = COPY $rsi
184+
%0:gr64 = COPY $rdi
185+
%5:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
186+
%6:gr32 = COPY %5.sub_32bit
187+
%7:gr32 = AND32ri8 %6, 3, implicit-def dead $eflags
188+
%8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit
189+
%9:gr64 = XOR64ri8 %3, 5, implicit-def dead $eflags
190+
TEST64rr %8, %8, implicit-def $eflags
191+
%10:gr64 = CMOV64rr %8, %1, 4, implicit $eflags
192+
MOV64mr %0, 1, $noreg, 0, $noreg, killed %10 :: (store (s64) into %ir.0)
193+
MOV64mr %4, 1, $noreg, 0, $noreg, killed %9 :: (store (s64) into %ir.4)
194+
RET 0
195+
196+
...

0 commit comments

Comments
 (0)