1 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs peephole optimizations to cleanup ugly code sequences at
10 // MachineInstruction layer.
12 // Currently, there are two optimizations implemented:
13 // - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14 // zero extend 32-bit subregisters to 64-bit registers, if the compiler
15 // could prove the subregisters is defined by 32-bit operations in which
16 // case the upper half of the underlying 64-bit registers were zeroed
19 // - One post-RA PreEmit pass to do final cleanup on some redundant
20 // instructions generated due to bad RA on subregister.
21 //===----------------------------------------------------------------------===//
24 #include "BPFInstrInfo.h"
25 #include "BPFTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/Support/Debug.h"
35 #define DEBUG_TYPE "bpf-mi-zext-elim"
37 static cl::opt
<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden
,
38 cl::init(INT16_MAX
>> 1), cl::desc("Specify gotol lower bound"));
40 STATISTIC(ZExtElemNum
, "Number of zero extension shifts eliminated");
44 struct BPFMIPeephole
: public MachineFunctionPass
{
47 const BPFInstrInfo
*TII
;
49 MachineRegisterInfo
*MRI
;
51 BPFMIPeephole() : MachineFunctionPass(ID
) {
52 initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
56 // Initialize class variables.
57 void initialize(MachineFunction
&MFParm
);
59 bool isCopyFrom32Def(MachineInstr
*CopyMI
);
60 bool isInsnFrom32Def(MachineInstr
*DefInsn
);
61 bool isPhiFrom32Def(MachineInstr
*MovMI
);
62 bool isMovFrom32Def(MachineInstr
*MovMI
);
63 bool eliminateZExtSeq();
66 std::set
<MachineInstr
*> PhiInsns
;
70 // Main entry point for this pass.
71 bool runOnMachineFunction(MachineFunction
&MF
) override
{
72 if (skipFunction(MF
.getFunction()))
77 // First try to eliminate (zext, lshift, rshift) and then
78 // try to eliminate zext.
79 bool ZExtSeqExist
, ZExtExist
;
80 ZExtSeqExist
= eliminateZExtSeq();
81 ZExtExist
= eliminateZExt();
82 return ZExtSeqExist
|| ZExtExist
;
86 // Initialize class variables.
87 void BPFMIPeephole::initialize(MachineFunction
&MFParm
) {
89 MRI
= &MF
->getRegInfo();
90 TII
= MF
->getSubtarget
<BPFSubtarget
>().getInstrInfo();
91 LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
94 bool BPFMIPeephole::isCopyFrom32Def(MachineInstr
*CopyMI
)
96 MachineOperand
&opnd
= CopyMI
->getOperand(1);
101 // Return false if getting value from a 32bit physical register.
102 // Most likely, this physical register is aliased to
103 // function call return value or current function parameters.
104 Register Reg
= opnd
.getReg();
105 if (!Reg
.isVirtual())
108 if (MRI
->getRegClass(Reg
) == &BPF::GPRRegClass
)
111 MachineInstr
*DefInsn
= MRI
->getVRegDef(Reg
);
112 if (!isInsnFrom32Def(DefInsn
))
118 bool BPFMIPeephole::isPhiFrom32Def(MachineInstr
*PhiMI
)
120 for (unsigned i
= 1, e
= PhiMI
->getNumOperands(); i
< e
; i
+= 2) {
121 MachineOperand
&opnd
= PhiMI
->getOperand(i
);
126 MachineInstr
*PhiDef
= MRI
->getVRegDef(opnd
.getReg());
129 if (PhiDef
->isPHI()) {
130 if (!PhiInsns
.insert(PhiDef
).second
)
132 if (!isPhiFrom32Def(PhiDef
))
135 if (PhiDef
->getOpcode() == BPF::COPY
&& !isCopyFrom32Def(PhiDef
))
142 // The \p DefInsn instruction defines a virtual register.
143 bool BPFMIPeephole::isInsnFrom32Def(MachineInstr
*DefInsn
)
148 if (DefInsn
->isPHI()) {
149 if (!PhiInsns
.insert(DefInsn
).second
)
151 if (!isPhiFrom32Def(DefInsn
))
153 } else if (DefInsn
->getOpcode() == BPF::COPY
) {
154 if (!isCopyFrom32Def(DefInsn
))
161 bool BPFMIPeephole::isMovFrom32Def(MachineInstr
*MovMI
)
163 MachineInstr
*DefInsn
= MRI
->getVRegDef(MovMI
->getOperand(1).getReg());
165 LLVM_DEBUG(dbgs() << " Def of Mov Src:");
166 LLVM_DEBUG(DefInsn
->dump());
169 if (!isInsnFrom32Def(DefInsn
))
172 LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
177 bool BPFMIPeephole::eliminateZExtSeq() {
178 MachineInstr
* ToErase
= nullptr;
179 bool Eliminated
= false;
181 for (MachineBasicBlock
&MBB
: *MF
) {
182 for (MachineInstr
&MI
: MBB
) {
183 // If the previous instruction was marked for elimination, remove it now.
185 ToErase
->eraseFromParent();
189 // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
194 if (MI
.getOpcode() == BPF::SRL_ri
&&
195 MI
.getOperand(2).getImm() == 32) {
196 Register DstReg
= MI
.getOperand(0).getReg();
197 Register ShfReg
= MI
.getOperand(1).getReg();
198 MachineInstr
*SllMI
= MRI
->getVRegDef(ShfReg
);
200 LLVM_DEBUG(dbgs() << "Starting SRL found:");
201 LLVM_DEBUG(MI
.dump());
205 SllMI
->getOpcode() != BPF::SLL_ri
||
206 SllMI
->getOperand(2).getImm() != 32)
209 LLVM_DEBUG(dbgs() << " SLL found:");
210 LLVM_DEBUG(SllMI
->dump());
212 MachineInstr
*MovMI
= MRI
->getVRegDef(SllMI
->getOperand(1).getReg());
215 MovMI
->getOpcode() != BPF::MOV_32_64
)
218 LLVM_DEBUG(dbgs() << " Type cast Mov found:");
219 LLVM_DEBUG(MovMI
->dump());
221 Register SubReg
= MovMI
->getOperand(1).getReg();
222 if (!isMovFrom32Def(MovMI
)) {
224 << " One ZExt elim sequence failed qualifying elim.\n");
228 BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(BPF::SUBREG_TO_REG
), DstReg
)
229 .addImm(0).addReg(SubReg
).addImm(BPF::sub_32
);
231 SllMI
->eraseFromParent();
232 MovMI
->eraseFromParent();
233 // MI is the right shift, we can't erase it in it's own iteration.
234 // Mark it to ToErase, and erase in the next iteration.
245 bool BPFMIPeephole::eliminateZExt() {
246 MachineInstr
* ToErase
= nullptr;
247 bool Eliminated
= false;
249 for (MachineBasicBlock
&MBB
: *MF
) {
250 for (MachineInstr
&MI
: MBB
) {
251 // If the previous instruction was marked for elimination, remove it now.
253 ToErase
->eraseFromParent();
257 if (MI
.getOpcode() != BPF::MOV_32_64
)
260 // Eliminate MOV_32_64 if possible.
263 // If wB has been zero extended, replace it with a SUBREG_TO_REG.
264 // This is to workaround BPF programs where pkt->{data, data_end}
265 // is encoded as u32, but actually the verifier populates them
266 // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
267 LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
268 LLVM_DEBUG(MI
.dump());
270 if (!isMovFrom32Def(&MI
))
273 LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
275 Register dst
= MI
.getOperand(0).getReg();
276 Register src
= MI
.getOperand(1).getReg();
278 // Build a SUBREG_TO_REG instruction.
279 BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(BPF::SUBREG_TO_REG
), dst
)
280 .addImm(0).addReg(src
).addImm(BPF::sub_32
);
290 } // end default namespace
292 INITIALIZE_PASS(BPFMIPeephole
, DEBUG_TYPE
,
293 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
296 char BPFMIPeephole::ID
= 0;
297 FunctionPass
* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
299 STATISTIC(RedundantMovElemNum
, "Number of redundant moves eliminated");
303 struct BPFMIPreEmitPeephole
: public MachineFunctionPass
{
307 const TargetRegisterInfo
*TRI
;
308 const BPFInstrInfo
*TII
;
311 BPFMIPreEmitPeephole() : MachineFunctionPass(ID
) {
312 initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
316 // Initialize class variables.
317 void initialize(MachineFunction
&MFParm
);
319 bool in16BitRange(int Num
);
320 bool eliminateRedundantMov();
325 // Main entry point for this pass.
326 bool runOnMachineFunction(MachineFunction
&MF
) override
{
327 if (skipFunction(MF
.getFunction()))
333 Changed
= eliminateRedundantMov();
335 Changed
= adjustBranch() || Changed
;
340 // Initialize class variables.
341 void BPFMIPreEmitPeephole::initialize(MachineFunction
&MFParm
) {
343 TII
= MF
->getSubtarget
<BPFSubtarget
>().getInstrInfo();
344 TRI
= MF
->getSubtarget
<BPFSubtarget
>().getRegisterInfo();
345 SupportGotol
= MF
->getSubtarget
<BPFSubtarget
>().hasGotol();
346 LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
349 bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
350 MachineInstr
* ToErase
= nullptr;
351 bool Eliminated
= false;
353 for (MachineBasicBlock
&MBB
: *MF
) {
354 for (MachineInstr
&MI
: MBB
) {
355 // If the previous instruction was marked for elimination, remove it now.
357 LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");
358 LLVM_DEBUG(ToErase
->dump());
359 ToErase
->eraseFromParent();
363 // Eliminate identical move:
367 // Note that we cannot remove
370 // as these two instructions having side effects, zeroing out
371 // top 32 bits of rA.
372 unsigned Opcode
= MI
.getOpcode();
373 if (Opcode
== BPF::MOV_rr
) {
374 Register dst
= MI
.getOperand(0).getReg();
375 Register src
= MI
.getOperand(1).getReg();
381 RedundantMovElemNum
++;
390 bool BPFMIPreEmitPeephole::in16BitRange(int Num
) {
391 // Well, the cut-off is not precisely at 16bit range since
392 // new codes are added during the transformation. So let us
393 // a little bit conservative.
394 return Num
>= -GotolAbsLowBound
&& Num
<= GotolAbsLowBound
;
397 // Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
398 // is supported for both unconditional (JMP) and condition (JEQ, JSGT,
399 // etc.) branches. In certain cases, e.g., full unrolling, the branch
400 // target offset might exceed 16bit range. If this happens, the llvm
401 // will generate incorrect code as the offset is truncated to 16bit.
403 // To fix this rare case, a new insn JMPL is introduced. This new
404 // insn supports supports 32bit branch target offset. The compiler
405 // does not use this insn during insn selection. Rather, BPF backend
406 // will estimate the branch target offset and do JMP -> JMPL and
407 // JEQ -> JEQ + JMPL conversion if the estimated branch target offset
409 bool BPFMIPreEmitPeephole::adjustBranch() {
410 bool Changed
= false;
411 int CurrNumInsns
= 0;
412 DenseMap
<MachineBasicBlock
*, int> SoFarNumInsns
;
413 DenseMap
<MachineBasicBlock
*, MachineBasicBlock
*> FollowThroughBB
;
414 std::vector
<MachineBasicBlock
*> MBBs
;
416 MachineBasicBlock
*PrevBB
= nullptr;
417 for (MachineBasicBlock
&MBB
: *MF
) {
418 // MBB.size() is the number of insns in this basic block, including some
419 // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
420 // Typically we have way more normal insns than DEBUG_VALUE insns.
421 // Also, if we indeed need to convert conditional branch like JEQ to
422 // JEQ + JMPL, we actually introduced some new insns like below.
423 CurrNumInsns
+= (int)MBB
.size();
424 SoFarNumInsns
[&MBB
] = CurrNumInsns
;
425 if (PrevBB
!= nullptr)
426 FollowThroughBB
[PrevBB
] = &MBB
;
428 // A list of original BBs to make later traveral easier.
429 MBBs
.push_back(&MBB
);
431 FollowThroughBB
[PrevBB
] = nullptr;
433 for (unsigned i
= 0; i
< MBBs
.size(); i
++) {
434 // We have four cases here:
435 // (1). no terminator, simple follow through.
436 // (2). jmp to another bb.
437 // (3). conditional jmp to another bb or follow through.
438 // (4). conditional jmp followed by an unconditional jmp.
439 MachineInstr
*CondJmp
= nullptr, *UncondJmp
= nullptr;
441 MachineBasicBlock
*MBB
= MBBs
[i
];
442 for (MachineInstr
&Term
: MBB
->terminators()) {
443 if (Term
.isConditionalBranch()) {
444 assert(CondJmp
== nullptr);
446 } else if (Term
.isUnconditionalBranch()) {
447 assert(UncondJmp
== nullptr);
452 // (1). no terminator, simple follow through.
453 if (!CondJmp
&& !UncondJmp
)
456 MachineBasicBlock
*CondTargetBB
, *JmpBB
;
457 CurrNumInsns
= SoFarNumInsns
[MBB
];
459 // (2). jmp to another bb.
460 if (!CondJmp
&& UncondJmp
) {
461 JmpBB
= UncondJmp
->getOperand(0).getMBB();
462 if (in16BitRange(SoFarNumInsns
[JmpBB
] - JmpBB
->size() - CurrNumInsns
))
465 // replace this insn as a JMPL.
466 BuildMI(MBB
, UncondJmp
->getDebugLoc(), TII
->get(BPF::JMPL
)).addMBB(JmpBB
);
467 UncondJmp
->eraseFromParent();
472 const BasicBlock
*TermBB
= MBB
->getBasicBlock();
475 // (3). conditional jmp to another bb or follow through.
477 CondTargetBB
= CondJmp
->getOperand(2).getMBB();
478 MachineBasicBlock
*FollowBB
= FollowThroughBB
[MBB
];
479 Dist
= SoFarNumInsns
[CondTargetBB
] - CondTargetBB
->size() - CurrNumInsns
;
480 if (in16BitRange(Dist
))
487 // where B2 -> B5 is beyond 16bit range.
489 // We do not have 32bit cond jmp insn. So we try to do
492 // if (cond) goto New_B1
496 // Basically two new basic blocks are created.
497 MachineBasicBlock
*New_B0
= MF
->CreateMachineBasicBlock(TermBB
);
498 MachineBasicBlock
*New_B1
= MF
->CreateMachineBasicBlock(TermBB
);
500 // Insert New_B0 and New_B1 into function block list.
501 MachineFunction::iterator MBB_I
= ++MBB
->getIterator();
502 MF
->insert(MBB_I
, New_B0
);
503 MF
->insert(MBB_I
, New_B1
);
505 // replace B2 cond jump
506 if (CondJmp
->getOperand(1).isReg())
507 BuildMI(*MBB
, MachineBasicBlock::iterator(*CondJmp
), CondJmp
->getDebugLoc(), TII
->get(CondJmp
->getOpcode()))
508 .addReg(CondJmp
->getOperand(0).getReg())
509 .addReg(CondJmp
->getOperand(1).getReg())
512 BuildMI(*MBB
, MachineBasicBlock::iterator(*CondJmp
), CondJmp
->getDebugLoc(), TII
->get(CondJmp
->getOpcode()))
513 .addReg(CondJmp
->getOperand(0).getReg())
514 .addImm(CondJmp
->getOperand(1).getImm())
517 // it is possible that CondTargetBB and FollowBB are the same. But the
518 // above Dist checking should already filtered this case.
519 MBB
->removeSuccessor(CondTargetBB
);
520 MBB
->removeSuccessor(FollowBB
);
521 MBB
->addSuccessor(New_B0
);
522 MBB
->addSuccessor(New_B1
);
524 // Populate insns in New_B0 and New_B1.
525 BuildMI(New_B0
, CondJmp
->getDebugLoc(), TII
->get(BPF::JMP
)).addMBB(FollowBB
);
526 BuildMI(New_B1
, CondJmp
->getDebugLoc(), TII
->get(BPF::JMPL
))
527 .addMBB(CondTargetBB
);
529 New_B0
->addSuccessor(FollowBB
);
530 New_B1
->addSuccessor(CondTargetBB
);
531 CondJmp
->eraseFromParent();
536 // (4). conditional jmp followed by an unconditional jmp.
537 CondTargetBB
= CondJmp
->getOperand(2).getMBB();
538 JmpBB
= UncondJmp
->getOperand(0).getMBB();
546 // If only B2->B5 is out of 16bit range, we can do
548 // if (cond) goto new_B
553 // If only 'JMP B7' is out of 16bit range, we can replace
554 // 'JMP B7' with 'JMPL B7'.
556 // If both B2->B5 and 'JMP B7' is out of range, just do
557 // both the above transformations.
558 Dist
= SoFarNumInsns
[CondTargetBB
] - CondTargetBB
->size() - CurrNumInsns
;
559 if (!in16BitRange(Dist
)) {
560 MachineBasicBlock
*New_B
= MF
->CreateMachineBasicBlock(TermBB
);
562 // Insert New_B0 into function block list.
563 MF
->insert(++MBB
->getIterator(), New_B
);
565 // replace B2 cond jump
566 if (CondJmp
->getOperand(1).isReg())
567 BuildMI(*MBB
, MachineBasicBlock::iterator(*CondJmp
), CondJmp
->getDebugLoc(), TII
->get(CondJmp
->getOpcode()))
568 .addReg(CondJmp
->getOperand(0).getReg())
569 .addReg(CondJmp
->getOperand(1).getReg())
572 BuildMI(*MBB
, MachineBasicBlock::iterator(*CondJmp
), CondJmp
->getDebugLoc(), TII
->get(CondJmp
->getOpcode()))
573 .addReg(CondJmp
->getOperand(0).getReg())
574 .addImm(CondJmp
->getOperand(1).getImm())
577 if (CondTargetBB
!= JmpBB
)
578 MBB
->removeSuccessor(CondTargetBB
);
579 MBB
->addSuccessor(New_B
);
581 // Populate insn in New_B.
582 BuildMI(New_B
, CondJmp
->getDebugLoc(), TII
->get(BPF::JMPL
)).addMBB(CondTargetBB
);
584 New_B
->addSuccessor(CondTargetBB
);
585 CondJmp
->eraseFromParent();
589 if (!in16BitRange(SoFarNumInsns
[JmpBB
] - CurrNumInsns
)) {
590 BuildMI(MBB
, UncondJmp
->getDebugLoc(), TII
->get(BPF::JMPL
)).addMBB(JmpBB
);
591 UncondJmp
->eraseFromParent();
599 } // end default namespace
601 INITIALIZE_PASS(BPFMIPreEmitPeephole
, "bpf-mi-pemit-peephole",
602 "BPF PreEmit Peephole Optimization", false, false)
604 char BPFMIPreEmitPeephole::ID
= 0;
605 FunctionPass
* llvm::createBPFMIPreEmitPeepholePass()
607 return new BPFMIPreEmitPeephole();