[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / llvm / lib / Target / BPF / BPFMIPeephole.cpp
blobf0edf706bd8fd7a6026ddb785889dc2f1fa5ff5f
1 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs peephole optimizations to cleanup ugly code sequences at
10 // MachineInstruction layer.
12 // Currently, there are two optimizations implemented:
13 // - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14 // zero extend 32-bit subregisters to 64-bit registers, if the compiler
15 // could prove the subregisters is defined by 32-bit operations in which
16 // case the upper half of the underlying 64-bit registers were zeroed
17 // implicitly.
19 // - One post-RA PreEmit pass to do final cleanup on some redundant
20 // instructions generated due to bad RA on subregister.
21 //===----------------------------------------------------------------------===//
23 #include "BPF.h"
24 #include "BPFInstrInfo.h"
25 #include "BPFTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include <set>
33 using namespace llvm;
35 #define DEBUG_TYPE "bpf-mi-zext-elim"
37 static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
38 cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));
40 STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
42 namespace {
44 struct BPFMIPeephole : public MachineFunctionPass {
46 static char ID;
47 const BPFInstrInfo *TII;
48 MachineFunction *MF;
49 MachineRegisterInfo *MRI;
51 BPFMIPeephole() : MachineFunctionPass(ID) {
52 initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
55 private:
56 // Initialize class variables.
57 void initialize(MachineFunction &MFParm);
59 bool isCopyFrom32Def(MachineInstr *CopyMI);
60 bool isInsnFrom32Def(MachineInstr *DefInsn);
61 bool isPhiFrom32Def(MachineInstr *MovMI);
62 bool isMovFrom32Def(MachineInstr *MovMI);
63 bool eliminateZExtSeq();
64 bool eliminateZExt();
66 std::set<MachineInstr *> PhiInsns;
68 public:
70 // Main entry point for this pass.
71 bool runOnMachineFunction(MachineFunction &MF) override {
72 if (skipFunction(MF.getFunction()))
73 return false;
75 initialize(MF);
77 // First try to eliminate (zext, lshift, rshift) and then
78 // try to eliminate zext.
79 bool ZExtSeqExist, ZExtExist;
80 ZExtSeqExist = eliminateZExtSeq();
81 ZExtExist = eliminateZExt();
82 return ZExtSeqExist || ZExtExist;
86 // Initialize class variables.
87 void BPFMIPeephole::initialize(MachineFunction &MFParm) {
88 MF = &MFParm;
89 MRI = &MF->getRegInfo();
90 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
91 LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
94 bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
96 MachineOperand &opnd = CopyMI->getOperand(1);
98 if (!opnd.isReg())
99 return false;
101 // Return false if getting value from a 32bit physical register.
102 // Most likely, this physical register is aliased to
103 // function call return value or current function parameters.
104 Register Reg = opnd.getReg();
105 if (!Reg.isVirtual())
106 return false;
108 if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
109 return false;
111 MachineInstr *DefInsn = MRI->getVRegDef(Reg);
112 if (!isInsnFrom32Def(DefInsn))
113 return false;
115 return true;
118 bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
120 for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
121 MachineOperand &opnd = PhiMI->getOperand(i);
123 if (!opnd.isReg())
124 return false;
126 MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
127 if (!PhiDef)
128 return false;
129 if (PhiDef->isPHI()) {
130 if (!PhiInsns.insert(PhiDef).second)
131 return false;
132 if (!isPhiFrom32Def(PhiDef))
133 return false;
135 if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
136 return false;
139 return true;
142 // The \p DefInsn instruction defines a virtual register.
143 bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
145 if (!DefInsn)
146 return false;
148 if (DefInsn->isPHI()) {
149 if (!PhiInsns.insert(DefInsn).second)
150 return false;
151 if (!isPhiFrom32Def(DefInsn))
152 return false;
153 } else if (DefInsn->getOpcode() == BPF::COPY) {
154 if (!isCopyFrom32Def(DefInsn))
155 return false;
158 return true;
161 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
163 MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
165 LLVM_DEBUG(dbgs() << " Def of Mov Src:");
166 LLVM_DEBUG(DefInsn->dump());
168 PhiInsns.clear();
169 if (!isInsnFrom32Def(DefInsn))
170 return false;
172 LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
174 return true;
177 bool BPFMIPeephole::eliminateZExtSeq() {
178 MachineInstr* ToErase = nullptr;
179 bool Eliminated = false;
181 for (MachineBasicBlock &MBB : *MF) {
182 for (MachineInstr &MI : MBB) {
183 // If the previous instruction was marked for elimination, remove it now.
184 if (ToErase) {
185 ToErase->eraseFromParent();
186 ToErase = nullptr;
189 // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
191 // MOV_32_64 rB, wA
192 // SLL_ri rB, rB, 32
193 // SRL_ri rB, rB, 32
194 if (MI.getOpcode() == BPF::SRL_ri &&
195 MI.getOperand(2).getImm() == 32) {
196 Register DstReg = MI.getOperand(0).getReg();
197 Register ShfReg = MI.getOperand(1).getReg();
198 MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
200 LLVM_DEBUG(dbgs() << "Starting SRL found:");
201 LLVM_DEBUG(MI.dump());
203 if (!SllMI ||
204 SllMI->isPHI() ||
205 SllMI->getOpcode() != BPF::SLL_ri ||
206 SllMI->getOperand(2).getImm() != 32)
207 continue;
209 LLVM_DEBUG(dbgs() << " SLL found:");
210 LLVM_DEBUG(SllMI->dump());
212 MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
213 if (!MovMI ||
214 MovMI->isPHI() ||
215 MovMI->getOpcode() != BPF::MOV_32_64)
216 continue;
218 LLVM_DEBUG(dbgs() << " Type cast Mov found:");
219 LLVM_DEBUG(MovMI->dump());
221 Register SubReg = MovMI->getOperand(1).getReg();
222 if (!isMovFrom32Def(MovMI)) {
223 LLVM_DEBUG(dbgs()
224 << " One ZExt elim sequence failed qualifying elim.\n");
225 continue;
228 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
229 .addImm(0).addReg(SubReg).addImm(BPF::sub_32);
231 SllMI->eraseFromParent();
232 MovMI->eraseFromParent();
233 // MI is the right shift, we can't erase it in it's own iteration.
234 // Mark it to ToErase, and erase in the next iteration.
235 ToErase = &MI;
236 ZExtElemNum++;
237 Eliminated = true;
242 return Eliminated;
245 bool BPFMIPeephole::eliminateZExt() {
246 MachineInstr* ToErase = nullptr;
247 bool Eliminated = false;
249 for (MachineBasicBlock &MBB : *MF) {
250 for (MachineInstr &MI : MBB) {
251 // If the previous instruction was marked for elimination, remove it now.
252 if (ToErase) {
253 ToErase->eraseFromParent();
254 ToErase = nullptr;
257 if (MI.getOpcode() != BPF::MOV_32_64)
258 continue;
260 // Eliminate MOV_32_64 if possible.
261 // MOV_32_64 rA, wB
263 // If wB has been zero extended, replace it with a SUBREG_TO_REG.
264 // This is to workaround BPF programs where pkt->{data, data_end}
265 // is encoded as u32, but actually the verifier populates them
266 // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
267 LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
268 LLVM_DEBUG(MI.dump());
270 if (!isMovFrom32Def(&MI))
271 continue;
273 LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
275 Register dst = MI.getOperand(0).getReg();
276 Register src = MI.getOperand(1).getReg();
278 // Build a SUBREG_TO_REG instruction.
279 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
280 .addImm(0).addReg(src).addImm(BPF::sub_32);
282 ToErase = &MI;
283 Eliminated = true;
287 return Eliminated;
290 } // end default namespace
292 INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
293 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
294 false, false)
296 char BPFMIPeephole::ID = 0;
297 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
299 STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
301 namespace {
303 struct BPFMIPreEmitPeephole : public MachineFunctionPass {
305 static char ID;
306 MachineFunction *MF;
307 const TargetRegisterInfo *TRI;
308 const BPFInstrInfo *TII;
309 bool SupportGotol;
311 BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
312 initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
315 private:
316 // Initialize class variables.
317 void initialize(MachineFunction &MFParm);
319 bool in16BitRange(int Num);
320 bool eliminateRedundantMov();
321 bool adjustBranch();
323 public:
325 // Main entry point for this pass.
326 bool runOnMachineFunction(MachineFunction &MF) override {
327 if (skipFunction(MF.getFunction()))
328 return false;
330 initialize(MF);
332 bool Changed;
333 Changed = eliminateRedundantMov();
334 if (SupportGotol)
335 Changed = adjustBranch() || Changed;
336 return Changed;
340 // Initialize class variables.
341 void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
342 MF = &MFParm;
343 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
344 TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
345 SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
346 LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
349 bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
350 MachineInstr* ToErase = nullptr;
351 bool Eliminated = false;
353 for (MachineBasicBlock &MBB : *MF) {
354 for (MachineInstr &MI : MBB) {
355 // If the previous instruction was marked for elimination, remove it now.
356 if (ToErase) {
357 LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");
358 LLVM_DEBUG(ToErase->dump());
359 ToErase->eraseFromParent();
360 ToErase = nullptr;
363 // Eliminate identical move:
365 // MOV rA, rA
367 // Note that we cannot remove
368 // MOV_32_64 rA, wA
369 // MOV_rr_32 wA, wA
370 // as these two instructions having side effects, zeroing out
371 // top 32 bits of rA.
372 unsigned Opcode = MI.getOpcode();
373 if (Opcode == BPF::MOV_rr) {
374 Register dst = MI.getOperand(0).getReg();
375 Register src = MI.getOperand(1).getReg();
377 if (dst != src)
378 continue;
380 ToErase = &MI;
381 RedundantMovElemNum++;
382 Eliminated = true;
387 return Eliminated;
390 bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
391 // Well, the cut-off is not precisely at 16bit range since
392 // new codes are added during the transformation. So let us
393 // a little bit conservative.
394 return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
397 // Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
398 // is supported for both unconditional (JMP) and condition (JEQ, JSGT,
399 // etc.) branches. In certain cases, e.g., full unrolling, the branch
400 // target offset might exceed 16bit range. If this happens, the llvm
401 // will generate incorrect code as the offset is truncated to 16bit.
403 // To fix this rare case, a new insn JMPL is introduced. This new
404 // insn supports supports 32bit branch target offset. The compiler
405 // does not use this insn during insn selection. Rather, BPF backend
406 // will estimate the branch target offset and do JMP -> JMPL and
407 // JEQ -> JEQ + JMPL conversion if the estimated branch target offset
408 // is beyond 16bit.
409 bool BPFMIPreEmitPeephole::adjustBranch() {
410 bool Changed = false;
411 int CurrNumInsns = 0;
412 DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
413 DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
414 std::vector<MachineBasicBlock *> MBBs;
416 MachineBasicBlock *PrevBB = nullptr;
417 for (MachineBasicBlock &MBB : *MF) {
418 // MBB.size() is the number of insns in this basic block, including some
419 // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
420 // Typically we have way more normal insns than DEBUG_VALUE insns.
421 // Also, if we indeed need to convert conditional branch like JEQ to
422 // JEQ + JMPL, we actually introduced some new insns like below.
423 CurrNumInsns += (int)MBB.size();
424 SoFarNumInsns[&MBB] = CurrNumInsns;
425 if (PrevBB != nullptr)
426 FollowThroughBB[PrevBB] = &MBB;
427 PrevBB = &MBB;
428 // A list of original BBs to make later traveral easier.
429 MBBs.push_back(&MBB);
431 FollowThroughBB[PrevBB] = nullptr;
433 for (unsigned i = 0; i < MBBs.size(); i++) {
434 // We have four cases here:
435 // (1). no terminator, simple follow through.
436 // (2). jmp to another bb.
437 // (3). conditional jmp to another bb or follow through.
438 // (4). conditional jmp followed by an unconditional jmp.
439 MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;
441 MachineBasicBlock *MBB = MBBs[i];
442 for (MachineInstr &Term : MBB->terminators()) {
443 if (Term.isConditionalBranch()) {
444 assert(CondJmp == nullptr);
445 CondJmp = &Term;
446 } else if (Term.isUnconditionalBranch()) {
447 assert(UncondJmp == nullptr);
448 UncondJmp = &Term;
452 // (1). no terminator, simple follow through.
453 if (!CondJmp && !UncondJmp)
454 continue;
456 MachineBasicBlock *CondTargetBB, *JmpBB;
457 CurrNumInsns = SoFarNumInsns[MBB];
459 // (2). jmp to another bb.
460 if (!CondJmp && UncondJmp) {
461 JmpBB = UncondJmp->getOperand(0).getMBB();
462 if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
463 continue;
465 // replace this insn as a JMPL.
466 BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
467 UncondJmp->eraseFromParent();
468 Changed = true;
469 continue;
472 const BasicBlock *TermBB = MBB->getBasicBlock();
473 int Dist;
475 // (3). conditional jmp to another bb or follow through.
476 if (!UncondJmp) {
477 CondTargetBB = CondJmp->getOperand(2).getMBB();
478 MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
479 Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
480 if (in16BitRange(Dist))
481 continue;
483 // We have
484 // B2: ...
485 // if (cond) goto B5
486 // B3: ...
487 // where B2 -> B5 is beyond 16bit range.
489 // We do not have 32bit cond jmp insn. So we try to do
490 // the following.
491 // B2: ...
492 // if (cond) goto New_B1
493 // New_B0 goto B3
494 // New_B1: gotol B5
495 // B3: ...
496 // Basically two new basic blocks are created.
497 MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);
498 MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB);
500 // Insert New_B0 and New_B1 into function block list.
501 MachineFunction::iterator MBB_I = ++MBB->getIterator();
502 MF->insert(MBB_I, New_B0);
503 MF->insert(MBB_I, New_B1);
505 // replace B2 cond jump
506 if (CondJmp->getOperand(1).isReg())
507 BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
508 .addReg(CondJmp->getOperand(0).getReg())
509 .addReg(CondJmp->getOperand(1).getReg())
510 .addMBB(New_B1);
511 else
512 BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
513 .addReg(CondJmp->getOperand(0).getReg())
514 .addImm(CondJmp->getOperand(1).getImm())
515 .addMBB(New_B1);
517 // it is possible that CondTargetBB and FollowBB are the same. But the
518 // above Dist checking should already filtered this case.
519 MBB->removeSuccessor(CondTargetBB);
520 MBB->removeSuccessor(FollowBB);
521 MBB->addSuccessor(New_B0);
522 MBB->addSuccessor(New_B1);
524 // Populate insns in New_B0 and New_B1.
525 BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB);
526 BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))
527 .addMBB(CondTargetBB);
529 New_B0->addSuccessor(FollowBB);
530 New_B1->addSuccessor(CondTargetBB);
531 CondJmp->eraseFromParent();
532 Changed = true;
533 continue;
536 // (4). conditional jmp followed by an unconditional jmp.
537 CondTargetBB = CondJmp->getOperand(2).getMBB();
538 JmpBB = UncondJmp->getOperand(0).getMBB();
540 // We have
541 // B2: ...
542 // if (cond) goto B5
543 // JMP B7
544 // B3: ...
546 // If only B2->B5 is out of 16bit range, we can do
547 // B2: ...
548 // if (cond) goto new_B
549 // JMP B7
550 // New_B: gotol B5
551 // B3: ...
553 // If only 'JMP B7' is out of 16bit range, we can replace
554 // 'JMP B7' with 'JMPL B7'.
556 // If both B2->B5 and 'JMP B7' is out of range, just do
557 // both the above transformations.
558 Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
559 if (!in16BitRange(Dist)) {
560 MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);
562 // Insert New_B0 into function block list.
563 MF->insert(++MBB->getIterator(), New_B);
565 // replace B2 cond jump
566 if (CondJmp->getOperand(1).isReg())
567 BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
568 .addReg(CondJmp->getOperand(0).getReg())
569 .addReg(CondJmp->getOperand(1).getReg())
570 .addMBB(New_B);
571 else
572 BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
573 .addReg(CondJmp->getOperand(0).getReg())
574 .addImm(CondJmp->getOperand(1).getImm())
575 .addMBB(New_B);
577 if (CondTargetBB != JmpBB)
578 MBB->removeSuccessor(CondTargetBB);
579 MBB->addSuccessor(New_B);
581 // Populate insn in New_B.
582 BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);
584 New_B->addSuccessor(CondTargetBB);
585 CondJmp->eraseFromParent();
586 Changed = true;
589 if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {
590 BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
591 UncondJmp->eraseFromParent();
592 Changed = true;
596 return Changed;
599 } // end default namespace
601 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
602 "BPF PreEmit Peephole Optimization", false, false)
604 char BPFMIPreEmitPeephole::ID = 0;
605 FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
607 return new BPFMIPreEmitPeephole();