1 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // A pre-emit peephole for catching opportunities introduced by late passes such
10 // as MachineBlockPlacement.
12 //===----------------------------------------------------------------------===//
15 #include "PPCInstrInfo.h"
16 #include "PPCSubtarget.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/CodeGen/LivePhysRegs.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
30 #define DEBUG_TYPE "ppc-pre-emit-peephole"
32 STATISTIC(NumRRConvertedInPreEmit
,
33 "Number of r+r instructions converted to r+i in pre-emit peephole");
34 STATISTIC(NumRemovedInPreEmit
,
35 "Number of instructions deleted in pre-emit peephole");
36 STATISTIC(NumberOfSelfCopies
,
37 "Number of self copy instructions eliminated");
38 STATISTIC(NumFrameOffFoldInPreEmit
,
39 "Number of folding frame offset by using r+r in pre-emit peephole");
42 EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden
, cl::init(true),
43 cl::desc("enable PC Relative linker optimization"));
46 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden
, cl::init(true),
47 cl::desc("Run pre-emit peephole optimizations."));
51 static bool hasPCRelativeForm(MachineInstr
&Use
) {
52 switch (Use
.getOpcode()) {
90 class PPCPreEmitPeephole
: public MachineFunctionPass
{
93 PPCPreEmitPeephole() : MachineFunctionPass(ID
) {
94 initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
97 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
98 MachineFunctionPass::getAnalysisUsage(AU
);
101 MachineFunctionProperties
getRequiredProperties() const override
{
102 return MachineFunctionProperties().set(
103 MachineFunctionProperties::Property::NoVRegs
);
106 // This function removes any redundant load immediates. It has two level
107 // loops - The outer loop finds the load immediates BBI that could be used
108 // to replace following redundancy. The inner loop scans instructions that
109 // after BBI to find redundancy and update kill/dead flags accordingly. If
110 // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
111 // that modify the def register of BBI would break the scanning.
112 // DeadOrKillToUnset is a pointer to the previous operand that had the
113 // kill/dead flag set. It keeps track of the def register of BBI, the use
114 // registers of AfterBBIs and the def registers of AfterBBIs.
115 bool removeRedundantLIs(MachineBasicBlock
&MBB
,
116 const TargetRegisterInfo
*TRI
) {
117 LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
118 MBB
.dump(); dbgs() << "\n");
120 DenseSet
<MachineInstr
*> InstrsToErase
;
121 for (auto BBI
= MBB
.instr_begin(); BBI
!= MBB
.instr_end(); ++BBI
) {
122 // Skip load immediate that is marked to be erased later because it
123 // cannot be used to replace any other instructions.
124 if (InstrsToErase
.contains(&*BBI
))
126 // Skip non-load immediate.
127 unsigned Opc
= BBI
->getOpcode();
128 if (Opc
!= PPC::LI
&& Opc
!= PPC::LI8
&& Opc
!= PPC::LIS
&&
131 // Skip load immediate, where the operand is a relocation (e.g., $r3 =
132 // LI target-flags(ppc-lo) %const.0).
133 if (!BBI
->getOperand(1).isImm())
135 assert(BBI
->getOperand(0).isReg() &&
136 "Expected a register for the first operand");
138 LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI
->dump(););
140 Register Reg
= BBI
->getOperand(0).getReg();
141 int64_t Imm
= BBI
->getOperand(1).getImm();
142 MachineOperand
*DeadOrKillToUnset
= nullptr;
143 if (BBI
->getOperand(0).isDead()) {
144 DeadOrKillToUnset
= &BBI
->getOperand(0);
145 LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
146 << " from load immediate " << *BBI
147 << " is a unsetting candidate\n");
149 // This loop scans instructions after BBI to see if there is any
150 // redundant load immediate.
151 for (auto AfterBBI
= std::next(BBI
); AfterBBI
!= MBB
.instr_end();
153 // Track the operand that kill Reg. We would unset the kill flag of
154 // the operand if there is a following redundant load immediate.
155 int KillIdx
= AfterBBI
->findRegisterUseOperandIdx(Reg
, true, TRI
);
157 // We can't just clear implicit kills, so if we encounter one, stop
159 if (KillIdx
!= -1 && AfterBBI
->getOperand(KillIdx
).isImplicit()) {
161 << "Encountered an implicit kill, cannot proceed: ");
162 LLVM_DEBUG(AfterBBI
->dump());
167 assert(!DeadOrKillToUnset
&& "Shouldn't kill same register twice");
168 DeadOrKillToUnset
= &AfterBBI
->getOperand(KillIdx
);
170 << " Kill flag of " << *DeadOrKillToUnset
<< " from "
171 << *AfterBBI
<< " is a unsetting candidate\n");
174 if (!AfterBBI
->modifiesRegister(Reg
, TRI
))
176 // Finish scanning because Reg is overwritten by a non-load
178 if (AfterBBI
->getOpcode() != Opc
)
180 assert(AfterBBI
->getOperand(0).isReg() &&
181 "Expected a register for the first operand");
182 // Finish scanning because Reg is overwritten by a relocation or a
184 if (!AfterBBI
->getOperand(1).isImm() ||
185 AfterBBI
->getOperand(1).getImm() != Imm
)
188 // It loads same immediate value to the same Reg, which is redundant.
189 // We would unset kill flag in previous Reg usage to extend live range
190 // of Reg first, then remove the redundancy.
191 if (DeadOrKillToUnset
) {
193 << " Unset dead/kill flag of " << *DeadOrKillToUnset
194 << " from " << *DeadOrKillToUnset
->getParent());
195 if (DeadOrKillToUnset
->isDef())
196 DeadOrKillToUnset
->setIsDead(false);
198 DeadOrKillToUnset
->setIsKill(false);
201 AfterBBI
->findRegisterDefOperand(Reg
, true, true, TRI
);
202 if (DeadOrKillToUnset
)
204 << " Dead flag of " << *DeadOrKillToUnset
<< " from "
205 << *AfterBBI
<< " is a unsetting candidate\n");
206 InstrsToErase
.insert(&*AfterBBI
);
207 LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
212 for (MachineInstr
*MI
: InstrsToErase
) {
213 MI
->eraseFromParent();
215 NumRemovedInPreEmit
+= InstrsToErase
.size();
216 return !InstrsToErase
.empty();
219 // Check if this instruction is a PLDpc that is part of a GOT indirect
221 bool isGOTPLDpc(MachineInstr
&Instr
) {
222 if (Instr
.getOpcode() != PPC::PLDpc
)
225 // The result must be a register.
226 const MachineOperand
&LoadedAddressReg
= Instr
.getOperand(0);
227 if (!LoadedAddressReg
.isReg())
230 // Make sure that this is a global symbol.
231 const MachineOperand
&SymbolOp
= Instr
.getOperand(1);
232 if (!SymbolOp
.isGlobal())
235 // Finally return true only if the GOT flag is present.
236 return (SymbolOp
.getTargetFlags() & PPCII::MO_GOT_FLAG
);
239 bool addLinkerOpt(MachineBasicBlock
&MBB
, const TargetRegisterInfo
*TRI
) {
240 MachineFunction
*MF
= MBB
.getParent();
241 // If the linker opt is disabled then just return.
242 if (!EnablePCRelLinkerOpt
)
245 // Add this linker opt only if we are using PC Relative memops.
246 if (!MF
->getSubtarget
<PPCSubtarget
>().isUsingPCRelativeCalls())
249 // Struct to keep track of one def/use pair for a GOT indirect access.
250 struct GOTDefUsePair
{
251 MachineBasicBlock::iterator DefInst
;
252 MachineBasicBlock::iterator UseInst
;
257 // Vector of def/ues pairs in this basic block.
258 SmallVector
<GOTDefUsePair
, 4> CandPairs
;
259 SmallVector
<GOTDefUsePair
, 4> ValidPairs
;
260 bool MadeChange
= false;
262 // Run through all of the instructions in the basic block and try to
263 // collect potential pairs of GOT indirect access instructions.
264 for (auto BBI
= MBB
.instr_begin(); BBI
!= MBB
.instr_end(); ++BBI
) {
265 // Look for the initial GOT indirect load.
266 if (isGOTPLDpc(*BBI
)) {
267 GOTDefUsePair CurrentPair
{BBI
, MachineBasicBlock::iterator(),
268 BBI
->getOperand(0).getReg(),
269 PPC::NoRegister
, true};
270 CandPairs
.push_back(CurrentPair
);
274 // We haven't encountered any new PLD instructions, nothing to check.
275 if (CandPairs
.empty())
278 // Run through the candidate pairs and see if any of the registers
279 // defined in the PLD instructions are used by this instruction.
280 // Note: the size of CandPairs can change in the loop.
281 for (unsigned Idx
= 0; Idx
< CandPairs
.size(); Idx
++) {
282 GOTDefUsePair
&Pair
= CandPairs
[Idx
];
283 // The instruction does not use or modify this PLD's def reg,
285 if (!BBI
->readsRegister(Pair
.DefReg
, TRI
) &&
286 !BBI
->modifiesRegister(Pair
.DefReg
, TRI
))
289 // The use needs to be used in the address compuation and not
290 // as the register being stored for a store.
291 const MachineOperand
*UseOp
=
292 hasPCRelativeForm(*BBI
) ? &BBI
->getOperand(2) : nullptr;
294 // Check for a valid use.
295 if (UseOp
&& UseOp
->isReg() && UseOp
->getReg() == Pair
.DefReg
&&
296 UseOp
->isUse() && UseOp
->isKill()) {
298 Pair
.UseReg
= BBI
->getOperand(0).getReg();
299 ValidPairs
.push_back(Pair
);
301 CandPairs
.erase(CandPairs
.begin() + Idx
);
305 // Go through all of the pairs and check for any more valid uses.
306 for (auto Pair
= ValidPairs
.begin(); Pair
!= ValidPairs
.end(); Pair
++) {
307 // We shouldn't be here if we don't have a valid pair.
308 assert(Pair
->UseInst
.isValid() && Pair
->StillValid
&&
309 "Kept an invalid def/use pair for GOT PCRel opt");
310 // We have found a potential pair. Search through the instructions
311 // between the def and the use to see if it is valid to mark this as a
313 MachineBasicBlock::iterator BBI
= Pair
->DefInst
;
315 for (; BBI
!= Pair
->UseInst
; ++BBI
) {
316 if (BBI
->readsRegister(Pair
->UseReg
, TRI
) ||
317 BBI
->modifiesRegister(Pair
->UseReg
, TRI
)) {
318 Pair
->StillValid
= false;
323 if (!Pair
->StillValid
)
326 // The load/store instruction that uses the address from the PLD will
327 // either use a register (for a store) or define a register (for the
328 // load). That register will be added as an implicit def to the PLD
329 // and as an implicit use on the second memory op. This is a precaution
330 // to prevent future passes from using that register between the two
332 MachineOperand ImplDef
=
333 MachineOperand::CreateReg(Pair
->UseReg
, true, true);
334 MachineOperand ImplUse
=
335 MachineOperand::CreateReg(Pair
->UseReg
, false, true);
336 Pair
->DefInst
->addOperand(ImplDef
);
337 Pair
->UseInst
->addOperand(ImplUse
);
339 // Create the symbol.
340 MCContext
&Context
= MF
->getContext();
341 MCSymbol
*Symbol
= Context
.createNamedTempSymbol("pcrel");
342 MachineOperand PCRelLabel
=
343 MachineOperand::CreateMCSymbol(Symbol
, PPCII::MO_PCREL_OPT_FLAG
);
344 Pair
->DefInst
->addOperand(*MF
, PCRelLabel
);
345 Pair
->UseInst
->addOperand(*MF
, PCRelLabel
);
351 // This function removes redundant pairs of accumulator prime/unprime
352 // instructions. In some situations, it's possible the compiler inserts an
353 // accumulator prime instruction followed by an unprime instruction (e.g.
354 // when we store an accumulator after restoring it from a spill). If the
355 // accumulator is not used between the two, they can be removed. This
356 // function removes these redundant pairs from basic blocks.
357 // The algorithm is quite straightforward - every time we encounter a prime
358 // instruction, the primed register is added to a candidate set. Any use
359 // other than a prime removes the candidate from the set and any de-prime
360 // of a current candidate marks both the prime and de-prime for removal.
361 // This way we ensure we only remove prime/de-prime *pairs* with no
363 bool removeAccPrimeUnprime(MachineBasicBlock
&MBB
) {
364 DenseSet
<MachineInstr
*> InstrsToErase
;
365 // Initially, none of the acc registers are candidates.
366 SmallVector
<MachineInstr
*, 8> Candidates(
367 PPC::UACCRCRegClass
.getNumRegs(), nullptr);
369 for (MachineInstr
&BBI
: MBB
.instrs()) {
370 unsigned Opc
= BBI
.getOpcode();
371 // If we are visiting a xxmtacc instruction, we add it and its operand
372 // register to the candidate set.
373 if (Opc
== PPC::XXMTACC
) {
374 Register Acc
= BBI
.getOperand(0).getReg();
375 assert(PPC::ACCRCRegClass
.contains(Acc
) &&
376 "Unexpected register for XXMTACC");
377 Candidates
[Acc
- PPC::ACC0
] = &BBI
;
379 // If we are visiting a xxmfacc instruction and its operand register is
380 // in the candidate set, we mark the two instructions for removal.
381 else if (Opc
== PPC::XXMFACC
) {
382 Register Acc
= BBI
.getOperand(0).getReg();
383 assert(PPC::ACCRCRegClass
.contains(Acc
) &&
384 "Unexpected register for XXMFACC");
385 if (!Candidates
[Acc
- PPC::ACC0
])
387 InstrsToErase
.insert(&BBI
);
388 InstrsToErase
.insert(Candidates
[Acc
- PPC::ACC0
]);
390 // If we are visiting an instruction using an accumulator register
391 // as operand, we remove it from the candidate set.
393 for (MachineOperand
&Operand
: BBI
.operands()) {
394 if (!Operand
.isReg())
396 Register Reg
= Operand
.getReg();
397 if (PPC::ACCRCRegClass
.contains(Reg
))
398 Candidates
[Reg
- PPC::ACC0
] = nullptr;
403 for (MachineInstr
*MI
: InstrsToErase
)
404 MI
->eraseFromParent();
405 NumRemovedInPreEmit
+= InstrsToErase
.size();
406 return !InstrsToErase
.empty();
409 bool runOnMachineFunction(MachineFunction
&MF
) override
{
410 if (skipFunction(MF
.getFunction()) || !RunPreEmitPeephole
) {
411 // Remove UNENCODED_NOP even when this pass is disabled.
412 // This needs to be done unconditionally so we don't emit zeros
413 // in the instruction stream.
414 SmallVector
<MachineInstr
*, 4> InstrsToErase
;
415 for (MachineBasicBlock
&MBB
: MF
)
416 for (MachineInstr
&MI
: MBB
)
417 if (MI
.getOpcode() == PPC::UNENCODED_NOP
)
418 InstrsToErase
.push_back(&MI
);
419 for (MachineInstr
*MI
: InstrsToErase
)
420 MI
->eraseFromParent();
423 bool Changed
= false;
424 const PPCInstrInfo
*TII
= MF
.getSubtarget
<PPCSubtarget
>().getInstrInfo();
425 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
426 SmallVector
<MachineInstr
*, 4> InstrsToErase
;
427 for (MachineBasicBlock
&MBB
: MF
) {
428 Changed
|= removeRedundantLIs(MBB
, TRI
);
429 Changed
|= addLinkerOpt(MBB
, TRI
);
430 Changed
|= removeAccPrimeUnprime(MBB
);
431 for (MachineInstr
&MI
: MBB
) {
432 unsigned Opc
= MI
.getOpcode();
433 if (Opc
== PPC::UNENCODED_NOP
) {
434 InstrsToErase
.push_back(&MI
);
437 // Detect self copies - these can result from running AADB.
438 if (PPCInstrInfo::isSameClassPhysRegCopy(Opc
)) {
439 const MCInstrDesc
&MCID
= TII
->get(Opc
);
440 if (MCID
.getNumOperands() == 3 &&
441 MI
.getOperand(0).getReg() == MI
.getOperand(1).getReg() &&
442 MI
.getOperand(0).getReg() == MI
.getOperand(2).getReg()) {
443 NumberOfSelfCopies
++;
444 LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
445 LLVM_DEBUG(MI
.dump());
446 InstrsToErase
.push_back(&MI
);
449 else if (MCID
.getNumOperands() == 2 &&
450 MI
.getOperand(0).getReg() == MI
.getOperand(1).getReg()) {
451 NumberOfSelfCopies
++;
452 LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
453 LLVM_DEBUG(MI
.dump());
454 InstrsToErase
.push_back(&MI
);
458 MachineInstr
*DefMIToErase
= nullptr;
459 if (TII
->convertToImmediateForm(MI
, &DefMIToErase
)) {
461 NumRRConvertedInPreEmit
++;
462 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
463 LLVM_DEBUG(MI
.dump());
465 InstrsToErase
.push_back(DefMIToErase
);
468 if (TII
->foldFrameOffset(MI
)) {
470 NumFrameOffFoldInPreEmit
++;
471 LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
472 LLVM_DEBUG(MI
.dump());
476 // Eliminate conditional branch based on a constant CR bit by
477 // CRSET or CRUNSET. We eliminate the conditional branch or
478 // convert it into an unconditional branch. Also, if the CR bit
479 // is not used by other instructions, we eliminate CRSET as well.
480 auto I
= MBB
.getFirstInstrTerminator();
481 if (I
== MBB
.instr_end())
483 MachineInstr
*Br
= &*I
;
484 if (Br
->getOpcode() != PPC::BC
&& Br
->getOpcode() != PPC::BCn
)
486 MachineInstr
*CRSetMI
= nullptr;
487 Register CRBit
= Br
->getOperand(0).getReg();
488 unsigned CRReg
= getCRFromCRBit(CRBit
);
489 bool SeenUse
= false;
490 MachineBasicBlock::reverse_iterator It
= Br
, Er
= MBB
.rend();
491 for (It
++; It
!= Er
; It
++) {
492 if (It
->modifiesRegister(CRBit
, TRI
)) {
493 if ((It
->getOpcode() == PPC::CRUNSET
||
494 It
->getOpcode() == PPC::CRSET
) &&
495 It
->getOperand(0).getReg() == CRBit
)
499 if (It
->readsRegister(CRBit
, TRI
))
502 if (!CRSetMI
) continue;
504 unsigned CRSetOp
= CRSetMI
->getOpcode();
505 if ((Br
->getOpcode() == PPC::BCn
&& CRSetOp
== PPC::CRSET
) ||
506 (Br
->getOpcode() == PPC::BC
&& CRSetOp
== PPC::CRUNSET
)) {
507 // Remove this branch since it cannot be taken.
508 InstrsToErase
.push_back(Br
);
509 MBB
.removeSuccessor(Br
->getOperand(1).getMBB());
512 // This conditional branch is always taken. So, remove all branches
513 // and insert an unconditional branch to the destination of this.
514 MachineBasicBlock::iterator It
= Br
, Er
= MBB
.end();
515 for (; It
!= Er
; It
++) {
516 if (It
->isDebugInstr()) continue;
517 assert(It
->isTerminator() && "Non-terminator after a terminator");
518 InstrsToErase
.push_back(&*It
);
520 if (!MBB
.isLayoutSuccessor(Br
->getOperand(1).getMBB())) {
521 ArrayRef
<MachineOperand
> NoCond
;
522 TII
->insertBranch(MBB
, Br
->getOperand(1).getMBB(), nullptr,
523 NoCond
, Br
->getDebugLoc());
525 for (auto &Succ
: MBB
.successors())
526 if (Succ
!= Br
->getOperand(1).getMBB()) {
527 MBB
.removeSuccessor(Succ
);
532 // If the CRBit is not used by another instruction, we can eliminate
533 // CRSET/CRUNSET instruction.
535 // We need to check use of the CRBit in successors.
536 for (auto &SuccMBB
: MBB
.successors())
537 if (SuccMBB
->isLiveIn(CRBit
) || SuccMBB
->isLiveIn(CRReg
)) {
542 InstrsToErase
.push_back(CRSetMI
);
545 for (MachineInstr
*MI
: InstrsToErase
) {
546 LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
547 LLVM_DEBUG(MI
->dump());
548 MI
->eraseFromParent();
549 NumRemovedInPreEmit
++;
556 INITIALIZE_PASS(PPCPreEmitPeephole
, DEBUG_TYPE
, "PowerPC Pre-Emit Peephole",
558 char PPCPreEmitPeephole::ID
= 0;
560 FunctionPass
*llvm::createPPCPreEmitPeepholePass() {
561 return new PPCPreEmitPeephole();