1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Perform peephole optimizations on the machine code:
12 // - Optimize Extensions
14 // Optimization of sign / zero extension instructions. It may be extended to
15 // handle other instructions with similar properties.
17 // On some targets, some instructions, e.g. X86 sign / zero extension, may
18 // leave the source value in the lower part of the result. This optimization
19 // will replace some uses of the pre-extension value with uses of the
20 // sub-register of the results.
22 // - Optimize Comparisons
24 // Optimization of comparison instructions. For instance, in this code:
30 // If the "sub" instruction all ready sets (or could be modified to set) the
31 // same flag that the "cmp" instruction sets and that "bz" uses, then we can
32 // eliminate the "cmp" instruction.
34 // - Optimize Bitcast pairs:
43 //===----------------------------------------------------------------------===//
45 #define DEBUG_TYPE "peephole-opt"
46 #include "llvm/CodeGen/Passes.h"
47 #include "llvm/CodeGen/MachineDominators.h"
48 #include "llvm/CodeGen/MachineInstrBuilder.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/Target/TargetInstrInfo.h"
51 #include "llvm/Target/TargetRegisterInfo.h"
52 #include "llvm/Support/CommandLine.h"
53 #include "llvm/ADT/DenseMap.h"
54 #include "llvm/ADT/SmallPtrSet.h"
55 #include "llvm/ADT/SmallSet.h"
56 #include "llvm/ADT/Statistic.h"
59 // Optimize Extensions
61 Aggressive("aggressive-ext-opt", cl::Hidden
,
62 cl::desc("Aggressive extension optimization"));
65 DisablePeephole("disable-peephole", cl::Hidden
, cl::init(false),
66 cl::desc("Disable the peephole optimizer"));
68 STATISTIC(NumReuse
, "Number of extension results reused");
69 STATISTIC(NumBitcasts
, "Number of bitcasts eliminated");
70 STATISTIC(NumCmps
, "Number of compares eliminated");
71 STATISTIC(NumImmFold
, "Number of move immediate foled");
74 class PeepholeOptimizer
: public MachineFunctionPass
{
75 const TargetMachine
*TM
;
76 const TargetInstrInfo
*TII
;
77 MachineRegisterInfo
*MRI
;
78 MachineDominatorTree
*DT
; // Machine dominator tree
81 static char ID
; // Pass identification
82 PeepholeOptimizer() : MachineFunctionPass(ID
) {
83 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
86 virtual bool runOnMachineFunction(MachineFunction
&MF
);
88 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
90 MachineFunctionPass::getAnalysisUsage(AU
);
92 AU
.addRequired
<MachineDominatorTree
>();
93 AU
.addPreserved
<MachineDominatorTree
>();
98 bool OptimizeBitcastInstr(MachineInstr
*MI
, MachineBasicBlock
*MBB
);
99 bool OptimizeCmpInstr(MachineInstr
*MI
, MachineBasicBlock
*MBB
);
100 bool OptimizeExtInstr(MachineInstr
*MI
, MachineBasicBlock
*MBB
,
101 SmallPtrSet
<MachineInstr
*, 8> &LocalMIs
);
102 bool isMoveImmediate(MachineInstr
*MI
,
103 SmallSet
<unsigned, 4> &ImmDefRegs
,
104 DenseMap
<unsigned, MachineInstr
*> &ImmDefMIs
);
105 bool FoldImmediate(MachineInstr
*MI
, MachineBasicBlock
*MBB
,
106 SmallSet
<unsigned, 4> &ImmDefRegs
,
107 DenseMap
<unsigned, MachineInstr
*> &ImmDefMIs
);
111 char PeepholeOptimizer::ID
= 0;
112 INITIALIZE_PASS_BEGIN(PeepholeOptimizer
, "peephole-opts",
113 "Peephole Optimizations", false, false)
114 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
115 INITIALIZE_PASS_END(PeepholeOptimizer
, "peephole-opts",
116 "Peephole Optimizations", false, false)
118 FunctionPass
*llvm::createPeepholeOptimizerPass() {
119 return new PeepholeOptimizer();
122 /// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
123 /// a single register and writes a single register and it does not modify the
124 /// source, and if the source value is preserved as a sub-register of the
125 /// result, then replace all reachable uses of the source with the subreg of the
128 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
129 /// the code. Since this code does not currently share EXTRACTs, just ignore all
131 bool PeepholeOptimizer::
132 OptimizeExtInstr(MachineInstr
*MI
, MachineBasicBlock
*MBB
,
133 SmallPtrSet
<MachineInstr
*, 8> &LocalMIs
) {
134 unsigned SrcReg
, DstReg
, SubIdx
;
135 if (!TII
->isCoalescableExtInstr(*MI
, SrcReg
, DstReg
, SubIdx
))
138 if (TargetRegisterInfo::isPhysicalRegister(DstReg
) ||
139 TargetRegisterInfo::isPhysicalRegister(SrcReg
))
142 MachineRegisterInfo::use_nodbg_iterator UI
= MRI
->use_nodbg_begin(SrcReg
);
143 if (++UI
== MRI
->use_nodbg_end())
147 // The source has other uses. See if we can replace the other uses with use of
148 // the result of the extension.
149 SmallPtrSet
<MachineBasicBlock
*, 4> ReachedBBs
;
150 UI
= MRI
->use_nodbg_begin(DstReg
);
151 for (MachineRegisterInfo::use_nodbg_iterator UE
= MRI
->use_nodbg_end();
153 ReachedBBs
.insert(UI
->getParent());
155 // Uses that are in the same BB of uses of the result of the instruction.
156 SmallVector
<MachineOperand
*, 8> Uses
;
158 // Uses that the result of the instruction can reach.
159 SmallVector
<MachineOperand
*, 8> ExtendedUses
;
161 bool ExtendLife
= true;
162 UI
= MRI
->use_nodbg_begin(SrcReg
);
163 for (MachineRegisterInfo::use_nodbg_iterator UE
= MRI
->use_nodbg_end();
165 MachineOperand
&UseMO
= UI
.getOperand();
166 MachineInstr
*UseMI
= &*UI
;
170 if (UseMI
->isPHI()) {
175 // It's an error to translate this:
177 // %reg1025 = <sext> %reg1024
179 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
183 // %reg1025 = <sext> %reg1024
185 // %reg1027 = COPY %reg1025:4
186 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
188 // The problem here is that SUBREG_TO_REG is there to assert that an
189 // implicit zext occurs. It doesn't insert a zext instruction. If we allow
190 // the COPY here, it will give us the value after the <sext>, not the
191 // original value of %reg1024 before <sext>.
192 if (UseMI
->getOpcode() == TargetOpcode::SUBREG_TO_REG
)
195 MachineBasicBlock
*UseMBB
= UseMI
->getParent();
197 // Local uses that come after the extension.
198 if (!LocalMIs
.count(UseMI
))
199 Uses
.push_back(&UseMO
);
200 } else if (ReachedBBs
.count(UseMBB
)) {
201 // Non-local uses where the result of the extension is used. Always
202 // replace these unless it's a PHI.
203 Uses
.push_back(&UseMO
);
204 } else if (Aggressive
&& DT
->dominates(MBB
, UseMBB
)) {
205 // We may want to extend the live range of the extension result in order
206 // to replace these uses.
207 ExtendedUses
.push_back(&UseMO
);
209 // Both will be live out of the def MBB anyway. Don't extend live range of
210 // the extension result.
216 if (ExtendLife
&& !ExtendedUses
.empty())
217 // Extend the liveness of the extension result.
218 std::copy(ExtendedUses
.begin(), ExtendedUses
.end(),
219 std::back_inserter(Uses
));
221 // Now replace all uses.
222 bool Changed
= false;
224 SmallPtrSet
<MachineBasicBlock
*, 4> PHIBBs
;
226 // Look for PHI uses of the extended result, we don't want to extend the
227 // liveness of a PHI input. It breaks all kinds of assumptions down
228 // stream. A PHI use is expected to be the kill of its source values.
229 UI
= MRI
->use_nodbg_begin(DstReg
);
230 for (MachineRegisterInfo::use_nodbg_iterator
231 UE
= MRI
->use_nodbg_end(); UI
!= UE
; ++UI
)
233 PHIBBs
.insert(UI
->getParent());
235 const TargetRegisterClass
*RC
= MRI
->getRegClass(SrcReg
);
236 for (unsigned i
= 0, e
= Uses
.size(); i
!= e
; ++i
) {
237 MachineOperand
*UseMO
= Uses
[i
];
238 MachineInstr
*UseMI
= UseMO
->getParent();
239 MachineBasicBlock
*UseMBB
= UseMI
->getParent();
240 if (PHIBBs
.count(UseMBB
))
243 unsigned NewVR
= MRI
->createVirtualRegister(RC
);
244 BuildMI(*UseMBB
, UseMI
, UseMI
->getDebugLoc(),
245 TII
->get(TargetOpcode::COPY
), NewVR
)
246 .addReg(DstReg
, 0, SubIdx
);
248 UseMO
->setReg(NewVR
);
257 /// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
258 /// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
259 /// a value cross register classes), and the source is defined by another
260 /// bitcast instruction B. And if the register class of source of B matches
261 /// the register class of instruction A, then it is legal to replace all uses
262 /// of the def of A with source of B. e.g.
263 /// %vreg0<def> = VMOVSR %vreg1
264 /// %vreg3<def> = VMOVRS %vreg0
265 /// Replace all uses of vreg3 with vreg1.
267 bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr
*MI
,
268 MachineBasicBlock
*MBB
) {
269 unsigned NumDefs
= MI
->getDesc().getNumDefs();
270 unsigned NumSrcs
= MI
->getDesc().getNumOperands() - NumDefs
;
276 for (unsigned i
= 0, e
= NumDefs
+ NumSrcs
; i
!= e
; ++i
) {
277 const MachineOperand
&MO
= MI
->getOperand(i
);
280 unsigned Reg
= MO
.getReg();
292 assert(Def
&& Src
&& "Malformed bitcast instruction!");
294 MachineInstr
*DefMI
= MRI
->getVRegDef(Src
);
295 if (!DefMI
|| !DefMI
->getDesc().isBitcast())
300 NumDefs
= DefMI
->getDesc().getNumDefs();
301 NumSrcs
= DefMI
->getDesc().getNumOperands() - NumDefs
;
304 for (unsigned i
= 0, e
= NumDefs
+ NumSrcs
; i
!= e
; ++i
) {
305 const MachineOperand
&MO
= DefMI
->getOperand(i
);
306 if (!MO
.isReg() || MO
.isDef())
308 unsigned Reg
= MO
.getReg();
320 if (MRI
->getRegClass(SrcSrc
) != MRI
->getRegClass(Def
))
323 MRI
->replaceRegWith(Def
, SrcSrc
);
324 MRI
->clearKillFlags(SrcSrc
);
325 MI
->eraseFromParent();
330 /// OptimizeCmpInstr - If the instruction is a compare and the previous
331 /// instruction it's comparing against all ready sets (or could be modified to
332 /// set) the same flag as the compare, then we can remove the comparison and use
333 /// the flag from the previous instruction.
334 bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr
*MI
,
335 MachineBasicBlock
*MBB
) {
336 // If this instruction is a comparison against zero and isn't comparing a
337 // physical register, we can try to optimize it.
339 int CmpMask
, CmpValue
;
340 if (!TII
->AnalyzeCompare(MI
, SrcReg
, CmpMask
, CmpValue
) ||
341 TargetRegisterInfo::isPhysicalRegister(SrcReg
))
344 // Attempt to optimize the comparison instruction.
345 if (TII
->OptimizeCompareInstr(MI
, SrcReg
, CmpMask
, CmpValue
, MRI
)) {
353 bool PeepholeOptimizer::isMoveImmediate(MachineInstr
*MI
,
354 SmallSet
<unsigned, 4> &ImmDefRegs
,
355 DenseMap
<unsigned, MachineInstr
*> &ImmDefMIs
) {
356 const TargetInstrDesc
&TID
= MI
->getDesc();
357 if (!TID
.isMoveImmediate())
359 if (TID
.getNumDefs() != 1)
361 unsigned Reg
= MI
->getOperand(0).getReg();
362 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
363 ImmDefMIs
.insert(std::make_pair(Reg
, MI
));
364 ImmDefRegs
.insert(Reg
);
371 /// FoldImmediate - Try folding register operands that are defined by move
372 /// immediate instructions, i.e. a trivial constant folding optimization, if
373 /// and only if the def and use are in the same BB.
374 bool PeepholeOptimizer::FoldImmediate(MachineInstr
*MI
, MachineBasicBlock
*MBB
,
375 SmallSet
<unsigned, 4> &ImmDefRegs
,
376 DenseMap
<unsigned, MachineInstr
*> &ImmDefMIs
) {
377 for (unsigned i
= 0, e
= MI
->getDesc().getNumOperands(); i
!= e
; ++i
) {
378 MachineOperand
&MO
= MI
->getOperand(i
);
379 if (!MO
.isReg() || MO
.isDef())
381 unsigned Reg
= MO
.getReg();
382 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
384 if (ImmDefRegs
.count(Reg
) == 0)
386 DenseMap
<unsigned, MachineInstr
*>::iterator II
= ImmDefMIs
.find(Reg
);
387 assert(II
!= ImmDefMIs
.end());
388 if (TII
->FoldImmediate(MI
, II
->second
, Reg
, MRI
)) {
396 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction
&MF
) {
400 TM
= &MF
.getTarget();
401 TII
= TM
->getInstrInfo();
402 MRI
= &MF
.getRegInfo();
403 DT
= Aggressive
? &getAnalysis
<MachineDominatorTree
>() : 0;
405 bool Changed
= false;
407 SmallPtrSet
<MachineInstr
*, 8> LocalMIs
;
408 SmallSet
<unsigned, 4> ImmDefRegs
;
409 DenseMap
<unsigned, MachineInstr
*> ImmDefMIs
;
410 for (MachineFunction::iterator I
= MF
.begin(), E
= MF
.end(); I
!= E
; ++I
) {
411 MachineBasicBlock
*MBB
= &*I
;
413 bool SeenMoveImm
= false;
419 MachineBasicBlock::iterator PMII
;
420 for (MachineBasicBlock::iterator
421 MII
= I
->begin(), MIE
= I
->end(); MII
!= MIE
; ) {
422 MachineInstr
*MI
= &*MII
;
425 if (MI
->isLabel() || MI
->isPHI() || MI
->isImplicitDef() ||
426 MI
->isKill() || MI
->isInlineAsm() || MI
->isDebugValue() ||
427 MI
->hasUnmodeledSideEffects()) {
432 const TargetInstrDesc
&TID
= MI
->getDesc();
434 if (TID
.isBitcast()) {
435 if (OptimizeBitcastInstr(MI
, MBB
)) {
438 MII
= First
? I
->begin() : llvm::next(PMII
);
441 } else if (TID
.isCompare()) {
442 if (OptimizeCmpInstr(MI
, MBB
)) {
445 MII
= First
? I
->begin() : llvm::next(PMII
);
450 if (isMoveImmediate(MI
, ImmDefRegs
, ImmDefMIs
)) {
453 Changed
|= OptimizeExtInstr(MI
, MBB
, LocalMIs
);
455 Changed
|= FoldImmediate(MI
, MBB
, ImmDefRegs
, ImmDefMIs
);