1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
18 // llc -o - -run-pass mir-canonicalizer example.mir
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
24 //===----------------------------------------------------------------------===//
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/Support/raw_ostream.h"
40 extern char &MIRCanonicalizerID
;
43 #define DEBUG_TYPE "mir-canonicalizer"
45 static cl::opt
<unsigned>
46 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden
, cl::init(~0u),
48 cl::desc("Function number to canonicalize."));
50 static cl::opt
<unsigned> CanonicalizeBasicBlockNumber(
51 "canon-nth-basicblock", cl::Hidden
, cl::init(~0u), cl::value_desc("N"),
52 cl::desc("BasicBlock number to canonicalize."));
56 class MIRCanonicalizer
: public MachineFunctionPass
{
59 MIRCanonicalizer() : MachineFunctionPass(ID
) {}
61 StringRef
getPassName() const override
{
62 return "Rename register operands in a canonical ordering.";
65 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
67 MachineFunctionPass::getAnalysisUsage(AU
);
70 bool runOnMachineFunction(MachineFunction
&MF
) override
;
73 } // end anonymous namespace
75 char MIRCanonicalizer::ID
;
77 char &llvm::MIRCanonicalizerID
= MIRCanonicalizer::ID
;
79 INITIALIZE_PASS_BEGIN(MIRCanonicalizer
, "mir-canonicalizer",
80 "Rename Register Operands Canonically", false, false)
82 INITIALIZE_PASS_END(MIRCanonicalizer
, "mir-canonicalizer",
83 "Rename Register Operands Canonically", false, false)
85 static std::vector
<MachineBasicBlock
*> GetRPOList(MachineFunction
&MF
) {
88 ReversePostOrderTraversal
<MachineBasicBlock
*> RPOT(&*MF
.begin());
89 std::vector
<MachineBasicBlock
*> RPOList
;
90 for (auto MBB
: RPOT
) {
91 RPOList
.push_back(MBB
);
98 rescheduleLexographically(std::vector
<MachineInstr
*> instructions
,
99 MachineBasicBlock
*MBB
,
100 std::function
<MachineBasicBlock::iterator()> getPos
) {
102 bool Changed
= false;
103 using StringInstrPair
= std::pair
<std::string
, MachineInstr
*>;
104 std::vector
<StringInstrPair
> StringInstrMap
;
106 for (auto *II
: instructions
) {
108 raw_string_ostream
OS(S
);
112 // Trim the assignment, or start from the begining in the case of a store.
113 const size_t i
= S
.find("=");
114 StringInstrMap
.push_back({(i
== std::string::npos
) ? S
: S
.substr(i
), II
});
117 llvm::sort(StringInstrMap
,
118 [](const StringInstrPair
&a
, const StringInstrPair
&b
) -> bool {
119 return (a
.first
< b
.first
);
122 for (auto &II
: StringInstrMap
) {
125 dbgs() << "Splicing ";
127 dbgs() << " right before: ";
132 MBB
->splice(getPos(), MBB
, II
.second
);
138 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount
,
139 MachineBasicBlock
*MBB
) {
141 bool Changed
= false;
143 // Calculates the distance of MI from the begining of its parent BB.
144 auto getInstrIdx
= [](const MachineInstr
&MI
) {
146 for (auto &CurMI
: *MI
.getParent()) {
154 // Pre-Populate vector of instructions to reschedule so that we don't
155 // clobber the iterator.
156 std::vector
<MachineInstr
*> Instructions
;
157 for (auto &MI
: *MBB
) {
158 Instructions
.push_back(&MI
);
161 std::map
<MachineInstr
*, std::vector
<MachineInstr
*>> MultiUsers
;
162 std::map
<unsigned, MachineInstr
*> MultiUserLookup
;
163 unsigned UseToBringDefCloserToCount
= 0;
164 std::vector
<MachineInstr
*> PseudoIdempotentInstructions
;
165 std::vector
<unsigned> PhysRegDefs
;
166 for (auto *II
: Instructions
) {
167 for (unsigned i
= 1; i
< II
->getNumOperands(); i
++) {
168 MachineOperand
&MO
= II
->getOperand(i
);
172 if (Register::isVirtualRegister(MO
.getReg()))
178 PhysRegDefs
.push_back(MO
.getReg());
182 for (auto *II
: Instructions
) {
183 if (II
->getNumOperands() == 0)
185 if (II
->mayLoadOrStore())
188 MachineOperand
&MO
= II
->getOperand(0);
189 if (!MO
.isReg() || !Register::isVirtualRegister(MO
.getReg()))
194 bool IsPseudoIdempotent
= true;
195 for (unsigned i
= 1; i
< II
->getNumOperands(); i
++) {
197 if (II
->getOperand(i
).isImm()) {
201 if (II
->getOperand(i
).isReg()) {
202 if (!Register::isVirtualRegister(II
->getOperand(i
).getReg()))
203 if (llvm::find(PhysRegDefs
, II
->getOperand(i
).getReg()) ==
209 IsPseudoIdempotent
= false;
213 if (IsPseudoIdempotent
) {
214 PseudoIdempotentInstructions
.push_back(II
);
218 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II
->dump(); MO
.dump(););
220 MachineInstr
*Def
= II
;
221 unsigned Distance
= ~0U;
222 MachineInstr
*UseToBringDefCloserTo
= nullptr;
223 MachineRegisterInfo
*MRI
= &MBB
->getParent()->getRegInfo();
224 for (auto &UO
: MRI
->use_nodbg_operands(MO
.getReg())) {
225 MachineInstr
*UseInst
= UO
.getParent();
227 const unsigned DefLoc
= getInstrIdx(*Def
);
228 const unsigned UseLoc
= getInstrIdx(*UseInst
);
229 const unsigned Delta
= (UseLoc
- DefLoc
);
231 if (UseInst
->getParent() != Def
->getParent())
233 if (DefLoc
>= UseLoc
)
236 if (Delta
< Distance
) {
238 UseToBringDefCloserTo
= UseInst
;
239 MultiUserLookup
[UseToBringDefCloserToCount
++] = UseToBringDefCloserTo
;
243 const auto BBE
= MBB
->instr_end();
244 MachineBasicBlock::iterator DefI
= BBE
;
245 MachineBasicBlock::iterator UseI
= BBE
;
247 for (auto BBI
= MBB
->instr_begin(); BBI
!= BBE
; ++BBI
) {
249 if (DefI
!= BBE
&& UseI
!= BBE
)
257 if (&*BBI
== UseToBringDefCloserTo
) {
263 if (DefI
== BBE
|| UseI
== BBE
)
267 dbgs() << "Splicing ";
269 dbgs() << " right before: ";
273 MultiUsers
[UseToBringDefCloserTo
].push_back(Def
);
275 MBB
->splice(UseI
, MBB
, DefI
);
278 // Sort the defs for users of multiple defs lexographically.
279 for (const auto &E
: MultiUserLookup
) {
282 std::find_if(MBB
->instr_begin(), MBB
->instr_end(),
283 [&](MachineInstr
&MI
) -> bool { return &MI
== E
.second
; });
285 if (UseI
== MBB
->instr_end())
289 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
290 Changed
|= rescheduleLexographically(
291 MultiUsers
[E
.second
], MBB
,
292 [&]() -> MachineBasicBlock::iterator
{ return UseI
; });
295 PseudoIdempotentInstCount
= PseudoIdempotentInstructions
.size();
297 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
298 Changed
|= rescheduleLexographically(
299 PseudoIdempotentInstructions
, MBB
,
300 [&]() -> MachineBasicBlock::iterator
{ return MBB
->begin(); });
305 static bool propagateLocalCopies(MachineBasicBlock
*MBB
) {
306 bool Changed
= false;
307 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
309 std::vector
<MachineInstr
*> Copies
;
310 for (MachineInstr
&MI
: MBB
->instrs()) {
312 Copies
.push_back(&MI
);
315 for (MachineInstr
*MI
: Copies
) {
317 if (!MI
->getOperand(0).isReg())
319 if (!MI
->getOperand(1).isReg())
322 const Register Dst
= MI
->getOperand(0).getReg();
323 const Register Src
= MI
->getOperand(1).getReg();
325 if (!Register::isVirtualRegister(Dst
))
327 if (!Register::isVirtualRegister(Src
))
329 // Not folding COPY instructions if regbankselect has not set the RCs.
330 // Why are we only considering Register Classes? Because the verifier
331 // sometimes gets upset if the register classes don't match even if the
332 // types do. A future patch might add COPY folding for matching types in
333 // pre-registerbankselect code.
334 if (!MRI
.getRegClassOrNull(Dst
))
336 if (MRI
.getRegClass(Dst
) != MRI
.getRegClass(Src
))
339 std::vector
<MachineOperand
*> Uses
;
340 for (auto UI
= MRI
.use_begin(Dst
); UI
!= MRI
.use_end(); ++UI
)
341 Uses
.push_back(&*UI
);
342 for (auto *MO
: Uses
)
346 MI
->eraseFromParent();
352 static bool doDefKillClear(MachineBasicBlock
*MBB
) {
353 bool Changed
= false;
355 for (auto &MI
: *MBB
) {
356 for (auto &MO
: MI
.operands()) {
359 if (!MO
.isDef() && MO
.isKill()) {
364 if (MO
.isDef() && MO
.isDead()) {
374 static bool runOnBasicBlock(MachineBasicBlock
*MBB
,
375 std::vector
<StringRef
> &bbNames
,
376 unsigned &basicBlockNum
, NamedVRegCursor
&NVC
) {
378 if (CanonicalizeBasicBlockNumber
!= ~0U) {
379 if (CanonicalizeBasicBlockNumber
!= basicBlockNum
++)
381 LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB
->getName()
385 if (llvm::find(bbNames
, MBB
->getName()) != bbNames
.end()) {
387 dbgs() << "Found potentially duplicate BasicBlocks: " << MBB
->getName()
394 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB
->getName() << " \n\n";
395 dbgs() << "\n\n================================================\n\n";
398 bool Changed
= false;
399 MachineFunction
&MF
= *MBB
->getParent();
400 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
402 bbNames
.push_back(MBB
->getName());
403 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB
->getName() << "\n\n";);
405 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
407 Changed
|= propagateLocalCopies(MBB
);
408 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB
->dump(););
410 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB
->dump(););
411 unsigned IdempotentInstCount
= 0;
412 Changed
|= rescheduleCanonically(IdempotentInstCount
, MBB
);
413 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB
->dump(););
415 Changed
|= NVC
.renameVRegs(MBB
);
417 // Here we renumber the def vregs for the idempotent instructions from the top
418 // of the MachineBasicBlock so that they are named in the order that we sorted
419 // them alphabetically. Eventually we wont need SkipVRegs because we will use
420 // named vregs instead.
421 if (IdempotentInstCount
)
424 auto MII
= MBB
->begin();
425 for (unsigned i
= 0; i
< IdempotentInstCount
&& MII
!= MBB
->end(); ++i
) {
426 MachineInstr
&MI
= *MII
++;
428 Register vRegToRename
= MI
.getOperand(0).getReg();
429 auto Rename
= NVC
.createVirtualRegister(vRegToRename
);
431 std::vector
<MachineOperand
*> RenameMOs
;
432 for (auto &MO
: MRI
.reg_operands(vRegToRename
)) {
433 RenameMOs
.push_back(&MO
);
436 for (auto *MO
: RenameMOs
) {
441 Changed
|= doDefKillClear(MBB
);
443 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB
->dump();
446 dbgs() << "\n\n================================================\n\n");
450 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction
&MF
) {
452 static unsigned functionNum
= 0;
453 if (CanonicalizeFunctionNumber
!= ~0U) {
454 if (CanonicalizeFunctionNumber
!= functionNum
++)
456 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF
.getName()
460 // we need a valid vreg to create a vreg type for skipping all those
461 // stray vreg numbers so reach alignment/canonical vreg values.
462 std::vector
<MachineBasicBlock
*> RPOList
= GetRPOList(MF
);
465 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF
.getName() << " \n\n";
466 dbgs() << "\n\n================================================\n\n";
467 dbgs() << "Total Basic Blocks: " << RPOList
.size() << "\n";
469 : RPOList
) { dbgs() << MBB
->getName() << "\n"; } dbgs()
470 << "\n\n================================================\n\n";);
472 std::vector
<StringRef
> BBNames
;
476 bool Changed
= false;
478 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
479 NamedVRegCursor
NVC(MRI
);
480 for (auto MBB
: RPOList
)
481 Changed
|= runOnBasicBlock(MBB
, BBNames
, BBNum
, NVC
);