1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // The purpose of this pass is to employ a canonical code transformation so
11 // that code compiled with slightly different IR passes can be diffed more
12 // effectively than otherwise. This is done by renaming vregs in a given
13 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
14 // move defs closer to their use inorder to reduce diffs caused by slightly
15 // different schedules.
19 // llc -o - -run-pass mir-canonicalizer example.mir
21 // Reorders instructions canonically.
22 // Renames virtual register operands canonically.
23 // Strips certain MIR artifacts (optionally).
25 //===----------------------------------------------------------------------===//
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/Support/raw_ostream.h"
40 extern char &MIRCanonicalizerID
;
43 #define DEBUG_TYPE "mir-canonicalizer"
45 static cl::opt
<unsigned>
46 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden
, cl::init(~0u),
48 cl::desc("Function number to canonicalize."));
50 static cl::opt
<unsigned>
51 CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden
, cl::init(~0u),
53 cl::desc("BasicBlock number to canonicalize."));
57 class MIRCanonicalizer
: public MachineFunctionPass
{
60 MIRCanonicalizer() : MachineFunctionPass(ID
) {}
62 StringRef
getPassName() const override
{
63 return "Rename register operands in a canonical ordering.";
66 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
68 MachineFunctionPass::getAnalysisUsage(AU
);
71 bool runOnMachineFunction(MachineFunction
&MF
) override
;
74 } // end anonymous namespace
76 enum VRType
{ RSE_Reg
= 0, RSE_FrameIndex
, RSE_NewCandidate
};
82 TypedVReg(unsigned reg
) : type(RSE_Reg
), reg(reg
) {}
83 TypedVReg(VRType type
) : type(type
), reg(~0U) {
84 assert(type
!= RSE_Reg
&& "Expected a non-register type.");
87 bool isReg() const { return type
== RSE_Reg
; }
88 bool isFrameIndex() const { return type
== RSE_FrameIndex
; }
89 bool isCandidate() const { return type
== RSE_NewCandidate
; }
91 VRType
getType() const { return type
; }
92 unsigned getReg() const {
93 assert(this->isReg() && "Expected a virtual or physical register.");
98 char MIRCanonicalizer::ID
;
100 char &llvm::MIRCanonicalizerID
= MIRCanonicalizer::ID
;
102 INITIALIZE_PASS_BEGIN(MIRCanonicalizer
, "mir-canonicalizer",
103 "Rename Register Operands Canonically", false, false)
105 INITIALIZE_PASS_END(MIRCanonicalizer
, "mir-canonicalizer",
106 "Rename Register Operands Canonically", false, false)
108 static std::vector
<MachineBasicBlock
*> GetRPOList(MachineFunction
&MF
) {
109 ReversePostOrderTraversal
<MachineBasicBlock
*> RPOT(&*MF
.begin());
110 std::vector
<MachineBasicBlock
*> RPOList
;
111 for (auto MBB
: RPOT
) {
112 RPOList
.push_back(MBB
);
118 // Set a dummy vreg. We use this vregs register class to generate throw-away
119 // vregs that are used to skip vreg numbers so that vreg numbers line up.
120 static unsigned GetDummyVReg(const MachineFunction
&MF
) {
121 for (auto &MBB
: MF
) {
122 for (auto &MI
: MBB
) {
123 for (auto &MO
: MI
.operands()) {
124 if (!MO
.isReg() || !TargetRegisterInfo::isVirtualRegister(MO
.getReg()))
134 static bool rescheduleCanonically(MachineBasicBlock
*MBB
) {
136 bool Changed
= false;
138 // Calculates the distance of MI from the begining of its parent BB.
139 auto getInstrIdx
= [](const MachineInstr
&MI
) {
141 for (auto &CurMI
: *MI
.getParent()) {
149 // Pre-Populate vector of instructions to reschedule so that we don't
150 // clobber the iterator.
151 std::vector
<MachineInstr
*> Instructions
;
152 for (auto &MI
: *MBB
) {
153 Instructions
.push_back(&MI
);
156 for (auto *II
: Instructions
) {
157 if (II
->getNumOperands() == 0)
160 MachineOperand
&MO
= II
->getOperand(0);
161 if (!MO
.isReg() || !TargetRegisterInfo::isVirtualRegister(MO
.getReg()))
164 DEBUG(dbgs() << "Operand " << 0 << " of "; II
->dump(); MO
.dump(););
166 MachineInstr
*Def
= II
;
167 unsigned Distance
= ~0U;
168 MachineInstr
*UseToBringDefCloserTo
= nullptr;
169 MachineRegisterInfo
*MRI
= &MBB
->getParent()->getRegInfo();
170 for (auto &UO
: MRI
->use_nodbg_operands(MO
.getReg())) {
171 MachineInstr
*UseInst
= UO
.getParent();
173 const unsigned DefLoc
= getInstrIdx(*Def
);
174 const unsigned UseLoc
= getInstrIdx(*UseInst
);
175 const unsigned Delta
= (UseLoc
- DefLoc
);
177 if (UseInst
->getParent() != Def
->getParent())
179 if (DefLoc
>= UseLoc
)
182 if (Delta
< Distance
) {
184 UseToBringDefCloserTo
= UseInst
;
188 const auto BBE
= MBB
->instr_end();
189 MachineBasicBlock::iterator DefI
= BBE
;
190 MachineBasicBlock::iterator UseI
= BBE
;
192 for (auto BBI
= MBB
->instr_begin(); BBI
!= BBE
; ++BBI
) {
194 if (DefI
!= BBE
&& UseI
!= BBE
)
197 if ((&*BBI
!= Def
) && (&*BBI
!= UseToBringDefCloserTo
))
205 if (&*BBI
== UseToBringDefCloserTo
) {
211 if (DefI
== BBE
|| UseI
== BBE
)
215 dbgs() << "Splicing ";
217 dbgs() << " right before: ";
222 MBB
->splice(UseI
, MBB
, DefI
);
228 /// Here we find our candidates. What makes an interesting candidate?
229 /// An candidate for a canonicalization tree root is normally any kind of
230 /// instruction that causes side effects such as a store to memory or a copy to
231 /// a physical register or a return instruction. We use these as an expression
232 /// tree root that we walk inorder to build a canonical walk which should result
233 /// in canoncal vreg renaming.
234 static std::vector
<MachineInstr
*> populateCandidates(MachineBasicBlock
*MBB
) {
235 std::vector
<MachineInstr
*> Candidates
;
236 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
238 for (auto II
= MBB
->begin(), IE
= MBB
->end(); II
!= IE
; ++II
) {
239 MachineInstr
*MI
= &*II
;
241 bool DoesMISideEffect
= false;
243 if (MI
->getNumOperands() > 0 && MI
->getOperand(0).isReg()) {
244 const unsigned Dst
= MI
->getOperand(0).getReg();
245 DoesMISideEffect
|= !TargetRegisterInfo::isVirtualRegister(Dst
);
247 for (auto UI
= MRI
.use_begin(Dst
); UI
!= MRI
.use_end(); ++UI
) {
248 if (DoesMISideEffect
) break;
249 DoesMISideEffect
|= (UI
->getParent()->getParent() != MI
->getParent());
253 if (!MI
->mayStore() && !MI
->isBranch() && !DoesMISideEffect
)
256 DEBUG(dbgs() << "Found Candidate: "; MI
->dump(););
257 Candidates
.push_back(MI
);
263 static void doCandidateWalk(std::vector
<TypedVReg
> &VRegs
,
264 std::queue
<TypedVReg
> &RegQueue
,
265 std::vector
<MachineInstr
*> &VisitedMIs
,
266 const MachineBasicBlock
*MBB
) {
268 const MachineFunction
&MF
= *MBB
->getParent();
269 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
271 while (!RegQueue
.empty()) {
273 auto TReg
= RegQueue
.front();
276 if (TReg
.isFrameIndex()) {
277 DEBUG(dbgs() << "Popping frame index.\n";);
278 VRegs
.push_back(TypedVReg(RSE_FrameIndex
));
282 assert(TReg
.isReg() && "Expected vreg or physreg.");
283 unsigned Reg
= TReg
.getReg();
285 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
287 dbgs() << "Popping vreg ";
288 MRI
.def_begin(Reg
)->dump();
292 if (!llvm::any_of(VRegs
, [&](const TypedVReg
&TR
) {
293 return TR
.isReg() && TR
.getReg() == Reg
;
295 VRegs
.push_back(TypedVReg(Reg
));
298 DEBUG(dbgs() << "Popping physreg.\n";);
299 VRegs
.push_back(TypedVReg(Reg
));
303 for (auto RI
= MRI
.def_begin(Reg
), RE
= MRI
.def_end(); RI
!= RE
; ++RI
) {
304 MachineInstr
*Def
= RI
->getParent();
306 if (Def
->getParent() != MBB
)
309 if (llvm::any_of(VisitedMIs
,
310 [&](const MachineInstr
*VMI
) { return Def
== VMI
; })) {
315 dbgs() << "\n========================\n";
316 dbgs() << "Visited MI: ";
318 dbgs() << "BB Name: " << Def
->getParent()->getName() << "\n";
319 dbgs() << "\n========================\n";
321 VisitedMIs
.push_back(Def
);
322 for (unsigned I
= 1, E
= Def
->getNumOperands(); I
!= E
; ++I
) {
324 MachineOperand
&MO
= Def
->getOperand(I
);
326 DEBUG(dbgs() << "Pushing frame index.\n";);
327 RegQueue
.push(TypedVReg(RSE_FrameIndex
));
332 RegQueue
.push(TypedVReg(MO
.getReg()));
338 // TODO: Work to remove this in the future. One day when we have named vregs
339 // we should be able to form the canonical name based on some characteristic
340 // we see in that point of the expression tree (like if we were to name based
341 // on some sort of value numbering scheme).
342 static void SkipVRegs(unsigned &VRegGapIndex
, MachineRegisterInfo
&MRI
,
343 const TargetRegisterClass
*RC
) {
344 const unsigned VR_GAP
= (++VRegGapIndex
* 1000);
347 dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex
<< " to "
351 unsigned I
= MRI
.createVirtualRegister(RC
);
352 const unsigned E
= (((I
+ VR_GAP
) / VR_GAP
) + 1) * VR_GAP
;
354 I
= MRI
.createVirtualRegister(RC
);
358 static std::map
<unsigned, unsigned>
359 GetVRegRenameMap(const std::vector
<TypedVReg
> &VRegs
,
360 const std::vector
<unsigned> &renamedInOtherBB
,
361 MachineRegisterInfo
&MRI
,
362 const TargetRegisterClass
*RC
) {
363 std::map
<unsigned, unsigned> VRegRenameMap
;
364 unsigned LastRenameReg
= MRI
.createVirtualRegister(RC
);
365 bool FirstCandidate
= true;
367 for (auto &vreg
: VRegs
) {
368 if (vreg
.isFrameIndex()) {
369 // We skip one vreg for any frame index because there is a good chance
370 // (especially when comparing SelectionDAG to GlobalISel generated MIR)
371 // that in the other file we are just getting an incoming vreg that comes
372 // from a copy from a frame index. So it's safe to skip by one.
373 LastRenameReg
= MRI
.createVirtualRegister(RC
);
374 DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg
<< "\n";);
376 } else if (vreg
.isCandidate()) {
378 // After the first candidate, for every subsequent candidate, we skip mod
379 // 10 registers so that the candidates are more likely to start at the
380 // same vreg number making it more likely that the canonical walk from the
381 // candidate insruction. We don't need to skip from the first candidate of
382 // the BasicBlock because we already skip ahead several vregs for each BB.
383 while (LastRenameReg
% 10) {
384 if (!FirstCandidate
) break;
385 LastRenameReg
= MRI
.createVirtualRegister(RC
);
388 dbgs() << "Skipping rename for new candidate " << LastRenameReg
392 FirstCandidate
= false;
394 } else if (!TargetRegisterInfo::isVirtualRegister(vreg
.getReg())) {
395 LastRenameReg
= MRI
.createVirtualRegister(RC
);
397 dbgs() << "Skipping rename for Phys Reg " << LastRenameReg
<< "\n";
402 auto Reg
= vreg
.getReg();
403 if (llvm::find(renamedInOtherBB
, Reg
) != renamedInOtherBB
.end()) {
404 DEBUG(dbgs() << "Vreg " << Reg
<< " already renamed in other BB.\n";);
408 auto Rename
= MRI
.createVirtualRegister(MRI
.getRegClass(Reg
));
409 LastRenameReg
= Rename
;
411 if (VRegRenameMap
.find(Reg
) == VRegRenameMap
.end()) {
412 DEBUG(dbgs() << "Mapping vreg ";);
413 if (MRI
.reg_begin(Reg
) != MRI
.reg_end()) {
414 DEBUG(auto foo
= &*MRI
.reg_begin(Reg
); foo
->dump(););
416 DEBUG(dbgs() << Reg
;);
418 DEBUG(dbgs() << " to ";);
419 if (MRI
.reg_begin(Rename
) != MRI
.reg_end()) {
420 DEBUG(auto foo
= &*MRI
.reg_begin(Rename
); foo
->dump(););
422 DEBUG(dbgs() << Rename
;);
424 DEBUG(dbgs() << "\n";);
426 VRegRenameMap
.insert(std::pair
<unsigned, unsigned>(Reg
, Rename
));
430 return VRegRenameMap
;
433 static bool doVRegRenaming(std::vector
<unsigned> &RenamedInOtherBB
,
434 const std::map
<unsigned, unsigned> &VRegRenameMap
,
435 MachineRegisterInfo
&MRI
) {
436 bool Changed
= false;
437 for (auto I
= VRegRenameMap
.begin(), E
= VRegRenameMap
.end(); I
!= E
; ++I
) {
439 auto VReg
= I
->first
;
440 auto Rename
= I
->second
;
442 RenamedInOtherBB
.push_back(Rename
);
444 std::vector
<MachineOperand
*> RenameMOs
;
445 for (auto &MO
: MRI
.reg_operands(VReg
)) {
446 RenameMOs
.push_back(&MO
);
449 for (auto *MO
: RenameMOs
) {
454 MO
->setIsKill(false);
461 static bool doDefKillClear(MachineBasicBlock
*MBB
) {
462 bool Changed
= false;
464 for (auto &MI
: *MBB
) {
465 for (auto &MO
: MI
.operands()) {
468 if (!MO
.isDef() && MO
.isKill()) {
473 if (MO
.isDef() && MO
.isDead()) {
483 static bool runOnBasicBlock(MachineBasicBlock
*MBB
,
484 std::vector
<StringRef
> &bbNames
,
485 std::vector
<unsigned> &renamedInOtherBB
,
486 unsigned &basicBlockNum
, unsigned &VRegGapIndex
) {
488 if (CanonicalizeBasicBlockNumber
!= ~0U) {
489 if (CanonicalizeBasicBlockNumber
!= basicBlockNum
++)
491 DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB
->getName() << "\n";);
494 if (llvm::find(bbNames
, MBB
->getName()) != bbNames
.end()) {
496 dbgs() << "Found potentially duplicate BasicBlocks: " << MBB
->getName()
503 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB
->getName() << " \n\n";
504 dbgs() << "\n\n================================================\n\n";
507 bool Changed
= false;
508 MachineFunction
&MF
= *MBB
->getParent();
509 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
511 const unsigned DummyVReg
= GetDummyVReg(MF
);
512 const TargetRegisterClass
*DummyRC
=
513 (DummyVReg
== ~0U) ? nullptr : MRI
.getRegClass(DummyVReg
);
514 if (!DummyRC
) return false;
516 bbNames
.push_back(MBB
->getName());
517 DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB
->getName() << "\n\n";);
519 DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB
->dump(););
520 Changed
|= rescheduleCanonically(MBB
);
521 DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB
->dump(););
523 std::vector
<MachineInstr
*> Candidates
= populateCandidates(MBB
);
524 std::vector
<MachineInstr
*> VisitedMIs
;
525 std::copy(Candidates
.begin(), Candidates
.end(),
526 std::back_inserter(VisitedMIs
));
528 std::vector
<TypedVReg
> VRegs
;
529 for (auto candidate
: Candidates
) {
530 VRegs
.push_back(TypedVReg(RSE_NewCandidate
));
532 std::queue
<TypedVReg
> RegQueue
;
534 // Here we walk the vreg operands of a non-root node along our walk.
535 // The root nodes are the original candidates (stores normally).
536 // These are normally not the root nodes (except for the case of copies to
537 // physical registers).
538 for (unsigned i
= 1; i
< candidate
->getNumOperands(); i
++) {
539 if (candidate
->mayStore() || candidate
->isBranch())
542 MachineOperand
&MO
= candidate
->getOperand(i
);
543 if (!(MO
.isReg() && TargetRegisterInfo::isVirtualRegister(MO
.getReg())))
546 DEBUG(dbgs() << "Enqueue register"; MO
.dump(); dbgs() << "\n";);
547 RegQueue
.push(TypedVReg(MO
.getReg()));
550 // Here we walk the root candidates. We start from the 0th operand because
551 // the root is normally a store to a vreg.
552 for (unsigned i
= 0; i
< candidate
->getNumOperands(); i
++) {
554 if (!candidate
->mayStore() && !candidate
->isBranch())
557 MachineOperand
&MO
= candidate
->getOperand(i
);
559 // TODO: Do we want to only add vregs here?
560 if (!MO
.isReg() && !MO
.isFI())
563 DEBUG(dbgs() << "Enqueue Reg/FI"; MO
.dump(); dbgs() << "\n";);
565 RegQueue
.push(MO
.isReg() ? TypedVReg(MO
.getReg()) :
566 TypedVReg(RSE_FrameIndex
));
569 doCandidateWalk(VRegs
, RegQueue
, VisitedMIs
, MBB
);
572 // If we have populated no vregs to rename then bail.
573 // The rest of this function does the vreg remaping.
574 if (VRegs
.size() == 0)
577 // Skip some vregs, so we can recon where we'll land next.
578 SkipVRegs(VRegGapIndex
, MRI
, DummyRC
);
580 auto VRegRenameMap
= GetVRegRenameMap(VRegs
, renamedInOtherBB
, MRI
, DummyRC
);
581 Changed
|= doVRegRenaming(renamedInOtherBB
, VRegRenameMap
, MRI
);
582 Changed
|= doDefKillClear(MBB
);
584 DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB
->dump(); dbgs() << "\n";);
585 DEBUG(dbgs() << "\n\n================================================\n\n");
589 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction
&MF
) {
591 static unsigned functionNum
= 0;
592 if (CanonicalizeFunctionNumber
!= ~0U) {
593 if (CanonicalizeFunctionNumber
!= functionNum
++)
595 DEBUG(dbgs() << "\n Canonicalizing Function " << MF
.getName() << "\n";);
598 // we need a valid vreg to create a vreg type for skipping all those
599 // stray vreg numbers so reach alignment/canonical vreg values.
600 std::vector
<MachineBasicBlock
*> RPOList
= GetRPOList(MF
);
603 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF
.getName() << " \n\n";
604 dbgs() << "\n\n================================================\n\n";
605 dbgs() << "Total Basic Blocks: " << RPOList
.size() << "\n";
606 for (auto MBB
: RPOList
) {
607 dbgs() << MBB
->getName() << "\n";
609 dbgs() << "\n\n================================================\n\n";
612 std::vector
<StringRef
> BBNames
;
613 std::vector
<unsigned> RenamedInOtherBB
;
618 bool Changed
= false;
620 for (auto MBB
: RPOList
)
621 Changed
|= runOnBasicBlock(MBB
, BBNames
, RenamedInOtherBB
, BBNum
, GapIdx
);