[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / PowerPC / PPCPreEmitPeephole.cpp
bloba8853609a7c87185359c349b165c7e0fe0419ab9
1 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // A pre-emit peephole for catching opportunities introduced by late passes such
10 // as MachineBlockPlacement.
12 //===----------------------------------------------------------------------===//
14 #include "PPC.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCSubtarget.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/CodeGen/LivePhysRegs.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
28 using namespace llvm;
30 #define DEBUG_TYPE "ppc-pre-emit-peephole"
32 STATISTIC(NumRRConvertedInPreEmit,
33 "Number of r+r instructions converted to r+i in pre-emit peephole");
34 STATISTIC(NumRemovedInPreEmit,
35 "Number of instructions deleted in pre-emit peephole");
36 STATISTIC(NumberOfSelfCopies,
37 "Number of self copy instructions eliminated");
38 STATISTIC(NumFrameOffFoldInPreEmit,
39 "Number of folding frame offset by using r+r in pre-emit peephole");
41 static cl::opt<bool>
42 EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
43 cl::desc("enable PC Relative linker optimization"));
45 static cl::opt<bool>
46 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
47 cl::desc("Run pre-emit peephole optimizations."));
49 namespace {
51 static bool hasPCRelativeForm(MachineInstr &Use) {
52 switch (Use.getOpcode()) {
53 default:
54 return false;
55 case PPC::LBZ:
56 case PPC::LBZ8:
57 case PPC::LHA:
58 case PPC::LHA8:
59 case PPC::LHZ:
60 case PPC::LHZ8:
61 case PPC::LWZ:
62 case PPC::LWZ8:
63 case PPC::STB:
64 case PPC::STB8:
65 case PPC::STH:
66 case PPC::STH8:
67 case PPC::STW:
68 case PPC::STW8:
69 case PPC::LD:
70 case PPC::STD:
71 case PPC::LWA:
72 case PPC::LXSD:
73 case PPC::LXSSP:
74 case PPC::LXV:
75 case PPC::STXSD:
76 case PPC::STXSSP:
77 case PPC::STXV:
78 case PPC::LFD:
79 case PPC::LFS:
80 case PPC::STFD:
81 case PPC::STFS:
82 case PPC::DFLOADf32:
83 case PPC::DFLOADf64:
84 case PPC::DFSTOREf32:
85 case PPC::DFSTOREf64:
86 return true;
90 class PPCPreEmitPeephole : public MachineFunctionPass {
91 public:
92 static char ID;
93 PPCPreEmitPeephole() : MachineFunctionPass(ID) {
94 initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
97 void getAnalysisUsage(AnalysisUsage &AU) const override {
98 MachineFunctionPass::getAnalysisUsage(AU);
101 MachineFunctionProperties getRequiredProperties() const override {
102 return MachineFunctionProperties().set(
103 MachineFunctionProperties::Property::NoVRegs);
106 // This function removes any redundant load immediates. It has two level
107 // loops - The outer loop finds the load immediates BBI that could be used
108 // to replace following redundancy. The inner loop scans instructions that
109 // after BBI to find redundancy and update kill/dead flags accordingly. If
110 // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
111 // that modify the def register of BBI would break the scanning.
112 // DeadOrKillToUnset is a pointer to the previous operand that had the
113 // kill/dead flag set. It keeps track of the def register of BBI, the use
114 // registers of AfterBBIs and the def registers of AfterBBIs.
115 bool removeRedundantLIs(MachineBasicBlock &MBB,
116 const TargetRegisterInfo *TRI) {
117 LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
118 MBB.dump(); dbgs() << "\n");
120 DenseSet<MachineInstr *> InstrsToErase;
121 for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
122 // Skip load immediate that is marked to be erased later because it
123 // cannot be used to replace any other instructions.
124 if (InstrsToErase.contains(&*BBI))
125 continue;
126 // Skip non-load immediate.
127 unsigned Opc = BBI->getOpcode();
128 if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
129 Opc != PPC::LIS8)
130 continue;
131 // Skip load immediate, where the operand is a relocation (e.g., $r3 =
132 // LI target-flags(ppc-lo) %const.0).
133 if (!BBI->getOperand(1).isImm())
134 continue;
135 assert(BBI->getOperand(0).isReg() &&
136 "Expected a register for the first operand");
138 LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
140 Register Reg = BBI->getOperand(0).getReg();
141 int64_t Imm = BBI->getOperand(1).getImm();
142 MachineOperand *DeadOrKillToUnset = nullptr;
143 if (BBI->getOperand(0).isDead()) {
144 DeadOrKillToUnset = &BBI->getOperand(0);
145 LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
146 << " from load immediate " << *BBI
147 << " is a unsetting candidate\n");
149 // This loop scans instructions after BBI to see if there is any
150 // redundant load immediate.
151 for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
152 ++AfterBBI) {
153 // Track the operand that kill Reg. We would unset the kill flag of
154 // the operand if there is a following redundant load immediate.
155 int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
157 // We can't just clear implicit kills, so if we encounter one, stop
158 // looking further.
159 if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
160 LLVM_DEBUG(dbgs()
161 << "Encountered an implicit kill, cannot proceed: ");
162 LLVM_DEBUG(AfterBBI->dump());
163 break;
166 if (KillIdx != -1) {
167 assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
168 DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
169 LLVM_DEBUG(dbgs()
170 << " Kill flag of " << *DeadOrKillToUnset << " from "
171 << *AfterBBI << " is a unsetting candidate\n");
174 if (!AfterBBI->modifiesRegister(Reg, TRI))
175 continue;
176 // Finish scanning because Reg is overwritten by a non-load
177 // instruction.
178 if (AfterBBI->getOpcode() != Opc)
179 break;
180 assert(AfterBBI->getOperand(0).isReg() &&
181 "Expected a register for the first operand");
182 // Finish scanning because Reg is overwritten by a relocation or a
183 // different value.
184 if (!AfterBBI->getOperand(1).isImm() ||
185 AfterBBI->getOperand(1).getImm() != Imm)
186 break;
188 // It loads same immediate value to the same Reg, which is redundant.
189 // We would unset kill flag in previous Reg usage to extend live range
190 // of Reg first, then remove the redundancy.
191 if (DeadOrKillToUnset) {
192 LLVM_DEBUG(dbgs()
193 << " Unset dead/kill flag of " << *DeadOrKillToUnset
194 << " from " << *DeadOrKillToUnset->getParent());
195 if (DeadOrKillToUnset->isDef())
196 DeadOrKillToUnset->setIsDead(false);
197 else
198 DeadOrKillToUnset->setIsKill(false);
200 DeadOrKillToUnset =
201 AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
202 if (DeadOrKillToUnset)
203 LLVM_DEBUG(dbgs()
204 << " Dead flag of " << *DeadOrKillToUnset << " from "
205 << *AfterBBI << " is a unsetting candidate\n");
206 InstrsToErase.insert(&*AfterBBI);
207 LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
208 AfterBBI->dump());
212 for (MachineInstr *MI : InstrsToErase) {
213 MI->eraseFromParent();
215 NumRemovedInPreEmit += InstrsToErase.size();
216 return !InstrsToErase.empty();
219 // Check if this instruction is a PLDpc that is part of a GOT indirect
220 // access.
221 bool isGOTPLDpc(MachineInstr &Instr) {
222 if (Instr.getOpcode() != PPC::PLDpc)
223 return false;
225 // The result must be a register.
226 const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
227 if (!LoadedAddressReg.isReg())
228 return false;
230 // Make sure that this is a global symbol.
231 const MachineOperand &SymbolOp = Instr.getOperand(1);
232 if (!SymbolOp.isGlobal())
233 return false;
235 // Finally return true only if the GOT flag is present.
236 return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
239 bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
240 MachineFunction *MF = MBB.getParent();
241 // If the linker opt is disabled then just return.
242 if (!EnablePCRelLinkerOpt)
243 return false;
245 // Add this linker opt only if we are using PC Relative memops.
246 if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
247 return false;
249 // Struct to keep track of one def/use pair for a GOT indirect access.
250 struct GOTDefUsePair {
251 MachineBasicBlock::iterator DefInst;
252 MachineBasicBlock::iterator UseInst;
253 Register DefReg;
254 Register UseReg;
255 bool StillValid;
257 // Vector of def/ues pairs in this basic block.
258 SmallVector<GOTDefUsePair, 4> CandPairs;
259 SmallVector<GOTDefUsePair, 4> ValidPairs;
260 bool MadeChange = false;
262 // Run through all of the instructions in the basic block and try to
263 // collect potential pairs of GOT indirect access instructions.
264 for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
265 // Look for the initial GOT indirect load.
266 if (isGOTPLDpc(*BBI)) {
267 GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
268 BBI->getOperand(0).getReg(),
269 PPC::NoRegister, true};
270 CandPairs.push_back(CurrentPair);
271 continue;
274 // We haven't encountered any new PLD instructions, nothing to check.
275 if (CandPairs.empty())
276 continue;
278 // Run through the candidate pairs and see if any of the registers
279 // defined in the PLD instructions are used by this instruction.
280 // Note: the size of CandPairs can change in the loop.
281 for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
282 GOTDefUsePair &Pair = CandPairs[Idx];
283 // The instruction does not use or modify this PLD's def reg,
284 // ignore it.
285 if (!BBI->readsRegister(Pair.DefReg, TRI) &&
286 !BBI->modifiesRegister(Pair.DefReg, TRI))
287 continue;
289 // The use needs to be used in the address compuation and not
290 // as the register being stored for a store.
291 const MachineOperand *UseOp =
292 hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
294 // Check for a valid use.
295 if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
296 UseOp->isUse() && UseOp->isKill()) {
297 Pair.UseInst = BBI;
298 Pair.UseReg = BBI->getOperand(0).getReg();
299 ValidPairs.push_back(Pair);
301 CandPairs.erase(CandPairs.begin() + Idx);
305 // Go through all of the pairs and check for any more valid uses.
306 for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
307 // We shouldn't be here if we don't have a valid pair.
308 assert(Pair->UseInst.isValid() && Pair->StillValid &&
309 "Kept an invalid def/use pair for GOT PCRel opt");
310 // We have found a potential pair. Search through the instructions
311 // between the def and the use to see if it is valid to mark this as a
312 // linker opt.
313 MachineBasicBlock::iterator BBI = Pair->DefInst;
314 ++BBI;
315 for (; BBI != Pair->UseInst; ++BBI) {
316 if (BBI->readsRegister(Pair->UseReg, TRI) ||
317 BBI->modifiesRegister(Pair->UseReg, TRI)) {
318 Pair->StillValid = false;
319 break;
323 if (!Pair->StillValid)
324 continue;
326 // The load/store instruction that uses the address from the PLD will
327 // either use a register (for a store) or define a register (for the
328 // load). That register will be added as an implicit def to the PLD
329 // and as an implicit use on the second memory op. This is a precaution
330 // to prevent future passes from using that register between the two
331 // instructions.
332 MachineOperand ImplDef =
333 MachineOperand::CreateReg(Pair->UseReg, true, true);
334 MachineOperand ImplUse =
335 MachineOperand::CreateReg(Pair->UseReg, false, true);
336 Pair->DefInst->addOperand(ImplDef);
337 Pair->UseInst->addOperand(ImplUse);
339 // Create the symbol.
340 MCContext &Context = MF->getContext();
341 MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
342 MachineOperand PCRelLabel =
343 MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
344 Pair->DefInst->addOperand(*MF, PCRelLabel);
345 Pair->UseInst->addOperand(*MF, PCRelLabel);
346 MadeChange |= true;
348 return MadeChange;
351 // This function removes redundant pairs of accumulator prime/unprime
352 // instructions. In some situations, it's possible the compiler inserts an
353 // accumulator prime instruction followed by an unprime instruction (e.g.
354 // when we store an accumulator after restoring it from a spill). If the
355 // accumulator is not used between the two, they can be removed. This
356 // function removes these redundant pairs from basic blocks.
357 // The algorithm is quite straightforward - every time we encounter a prime
358 // instruction, the primed register is added to a candidate set. Any use
359 // other than a prime removes the candidate from the set and any de-prime
360 // of a current candidate marks both the prime and de-prime for removal.
361 // This way we ensure we only remove prime/de-prime *pairs* with no
362 // intervening uses.
363 bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
364 DenseSet<MachineInstr *> InstrsToErase;
365 // Initially, none of the acc registers are candidates.
366 SmallVector<MachineInstr *, 8> Candidates(
367 PPC::UACCRCRegClass.getNumRegs(), nullptr);
369 for (MachineInstr &BBI : MBB.instrs()) {
370 unsigned Opc = BBI.getOpcode();
371 // If we are visiting a xxmtacc instruction, we add it and its operand
372 // register to the candidate set.
373 if (Opc == PPC::XXMTACC) {
374 Register Acc = BBI.getOperand(0).getReg();
375 assert(PPC::ACCRCRegClass.contains(Acc) &&
376 "Unexpected register for XXMTACC");
377 Candidates[Acc - PPC::ACC0] = &BBI;
379 // If we are visiting a xxmfacc instruction and its operand register is
380 // in the candidate set, we mark the two instructions for removal.
381 else if (Opc == PPC::XXMFACC) {
382 Register Acc = BBI.getOperand(0).getReg();
383 assert(PPC::ACCRCRegClass.contains(Acc) &&
384 "Unexpected register for XXMFACC");
385 if (!Candidates[Acc - PPC::ACC0])
386 continue;
387 InstrsToErase.insert(&BBI);
388 InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
390 // If we are visiting an instruction using an accumulator register
391 // as operand, we remove it from the candidate set.
392 else {
393 for (MachineOperand &Operand : BBI.operands()) {
394 if (!Operand.isReg())
395 continue;
396 Register Reg = Operand.getReg();
397 if (PPC::ACCRCRegClass.contains(Reg))
398 Candidates[Reg - PPC::ACC0] = nullptr;
403 for (MachineInstr *MI : InstrsToErase)
404 MI->eraseFromParent();
405 NumRemovedInPreEmit += InstrsToErase.size();
406 return !InstrsToErase.empty();
409 bool runOnMachineFunction(MachineFunction &MF) override {
410 if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
411 // Remove UNENCODED_NOP even when this pass is disabled.
412 // This needs to be done unconditionally so we don't emit zeros
413 // in the instruction stream.
414 SmallVector<MachineInstr *, 4> InstrsToErase;
415 for (MachineBasicBlock &MBB : MF)
416 for (MachineInstr &MI : MBB)
417 if (MI.getOpcode() == PPC::UNENCODED_NOP)
418 InstrsToErase.push_back(&MI);
419 for (MachineInstr *MI : InstrsToErase)
420 MI->eraseFromParent();
421 return false;
423 bool Changed = false;
424 const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
425 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
426 SmallVector<MachineInstr *, 4> InstrsToErase;
427 for (MachineBasicBlock &MBB : MF) {
428 Changed |= removeRedundantLIs(MBB, TRI);
429 Changed |= addLinkerOpt(MBB, TRI);
430 Changed |= removeAccPrimeUnprime(MBB);
431 for (MachineInstr &MI : MBB) {
432 unsigned Opc = MI.getOpcode();
433 if (Opc == PPC::UNENCODED_NOP) {
434 InstrsToErase.push_back(&MI);
435 continue;
437 // Detect self copies - these can result from running AADB.
438 if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
439 const MCInstrDesc &MCID = TII->get(Opc);
440 if (MCID.getNumOperands() == 3 &&
441 MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
442 MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
443 NumberOfSelfCopies++;
444 LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
445 LLVM_DEBUG(MI.dump());
446 InstrsToErase.push_back(&MI);
447 continue;
449 else if (MCID.getNumOperands() == 2 &&
450 MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
451 NumberOfSelfCopies++;
452 LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
453 LLVM_DEBUG(MI.dump());
454 InstrsToErase.push_back(&MI);
455 continue;
458 MachineInstr *DefMIToErase = nullptr;
459 if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
460 Changed = true;
461 NumRRConvertedInPreEmit++;
462 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
463 LLVM_DEBUG(MI.dump());
464 if (DefMIToErase) {
465 InstrsToErase.push_back(DefMIToErase);
468 if (TII->foldFrameOffset(MI)) {
469 Changed = true;
470 NumFrameOffFoldInPreEmit++;
471 LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
472 LLVM_DEBUG(MI.dump());
476 // Eliminate conditional branch based on a constant CR bit by
477 // CRSET or CRUNSET. We eliminate the conditional branch or
478 // convert it into an unconditional branch. Also, if the CR bit
479 // is not used by other instructions, we eliminate CRSET as well.
480 auto I = MBB.getFirstInstrTerminator();
481 if (I == MBB.instr_end())
482 continue;
483 MachineInstr *Br = &*I;
484 if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
485 continue;
486 MachineInstr *CRSetMI = nullptr;
487 Register CRBit = Br->getOperand(0).getReg();
488 unsigned CRReg = getCRFromCRBit(CRBit);
489 bool SeenUse = false;
490 MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
491 for (It++; It != Er; It++) {
492 if (It->modifiesRegister(CRBit, TRI)) {
493 if ((It->getOpcode() == PPC::CRUNSET ||
494 It->getOpcode() == PPC::CRSET) &&
495 It->getOperand(0).getReg() == CRBit)
496 CRSetMI = &*It;
497 break;
499 if (It->readsRegister(CRBit, TRI))
500 SeenUse = true;
502 if (!CRSetMI) continue;
504 unsigned CRSetOp = CRSetMI->getOpcode();
505 if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
506 (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
507 // Remove this branch since it cannot be taken.
508 InstrsToErase.push_back(Br);
509 MBB.removeSuccessor(Br->getOperand(1).getMBB());
511 else {
512 // This conditional branch is always taken. So, remove all branches
513 // and insert an unconditional branch to the destination of this.
514 MachineBasicBlock::iterator It = Br, Er = MBB.end();
515 for (; It != Er; It++) {
516 if (It->isDebugInstr()) continue;
517 assert(It->isTerminator() && "Non-terminator after a terminator");
518 InstrsToErase.push_back(&*It);
520 if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
521 ArrayRef<MachineOperand> NoCond;
522 TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
523 NoCond, Br->getDebugLoc());
525 for (auto &Succ : MBB.successors())
526 if (Succ != Br->getOperand(1).getMBB()) {
527 MBB.removeSuccessor(Succ);
528 break;
532 // If the CRBit is not used by another instruction, we can eliminate
533 // CRSET/CRUNSET instruction.
534 if (!SeenUse) {
535 // We need to check use of the CRBit in successors.
536 for (auto &SuccMBB : MBB.successors())
537 if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
538 SeenUse = true;
539 break;
541 if (!SeenUse)
542 InstrsToErase.push_back(CRSetMI);
545 for (MachineInstr *MI : InstrsToErase) {
546 LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
547 LLVM_DEBUG(MI->dump());
548 MI->eraseFromParent();
549 NumRemovedInPreEmit++;
551 return Changed;
556 INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
557 false, false)
558 char PPCPreEmitPeephole::ID = 0;
560 FunctionPass *llvm::createPPCPreEmitPeepholePass() {
561 return new PPCPreEmitPeephole();