1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
18 // llc -o - -run-pass mir-canonicalizer example.mir
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
24 //===----------------------------------------------------------------------===//
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
41 extern char &MIRCanonicalizerID
;
44 #define DEBUG_TYPE "mir-canonicalizer"
46 static cl::opt
<unsigned>
47 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden
, cl::init(~0u),
49 cl::desc("Function number to canonicalize."));
51 static cl::opt
<unsigned> CanonicalizeBasicBlockNumber(
52 "canon-nth-basicblock", cl::Hidden
, cl::init(~0u), cl::value_desc("N"),
53 cl::desc("BasicBlock number to canonicalize."));
57 class MIRCanonicalizer
: public MachineFunctionPass
{
60 MIRCanonicalizer() : MachineFunctionPass(ID
) {}
62 StringRef
getPassName() const override
{
63 return "Rename register operands in a canonical ordering.";
66 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
68 MachineFunctionPass::getAnalysisUsage(AU
);
71 bool runOnMachineFunction(MachineFunction
&MF
) override
;
74 } // end anonymous namespace
76 char MIRCanonicalizer::ID
;
78 char &llvm::MIRCanonicalizerID
= MIRCanonicalizer::ID
;
80 INITIALIZE_PASS_BEGIN(MIRCanonicalizer
, "mir-canonicalizer",
81 "Rename Register Operands Canonically", false, false)
83 INITIALIZE_PASS_END(MIRCanonicalizer
, "mir-canonicalizer",
84 "Rename Register Operands Canonically", false, false)
86 static std::vector
<MachineBasicBlock
*> GetRPOList(MachineFunction
&MF
) {
89 ReversePostOrderTraversal
<MachineBasicBlock
*> RPOT(&*MF
.begin());
90 std::vector
<MachineBasicBlock
*> RPOList
;
91 for (auto MBB
: RPOT
) {
92 RPOList
.push_back(MBB
);
99 rescheduleLexographically(std::vector
<MachineInstr
*> instructions
,
100 MachineBasicBlock
*MBB
,
101 std::function
<MachineBasicBlock::iterator()> getPos
) {
103 bool Changed
= false;
104 using StringInstrPair
= std::pair
<std::string
, MachineInstr
*>;
105 std::vector
<StringInstrPair
> StringInstrMap
;
107 for (auto *II
: instructions
) {
109 raw_string_ostream
OS(S
);
113 // Trim the assignment, or start from the begining in the case of a store.
114 const size_t i
= S
.find("=");
115 StringInstrMap
.push_back({(i
== std::string::npos
) ? S
: S
.substr(i
), II
});
118 llvm::sort(StringInstrMap
,
119 [](const StringInstrPair
&a
, const StringInstrPair
&b
) -> bool {
120 return (a
.first
< b
.first
);
123 for (auto &II
: StringInstrMap
) {
126 dbgs() << "Splicing ";
128 dbgs() << " right before: ";
133 MBB
->splice(getPos(), MBB
, II
.second
);
139 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount
,
140 MachineBasicBlock
*MBB
) {
142 bool Changed
= false;
144 // Calculates the distance of MI from the begining of its parent BB.
145 auto getInstrIdx
= [](const MachineInstr
&MI
) {
147 for (auto &CurMI
: *MI
.getParent()) {
155 // Pre-Populate vector of instructions to reschedule so that we don't
156 // clobber the iterator.
157 std::vector
<MachineInstr
*> Instructions
;
158 for (auto &MI
: *MBB
) {
159 Instructions
.push_back(&MI
);
162 std::map
<MachineInstr
*, std::vector
<MachineInstr
*>> MultiUsers
;
163 std::map
<unsigned, MachineInstr
*> MultiUserLookup
;
164 unsigned UseToBringDefCloserToCount
= 0;
165 std::vector
<MachineInstr
*> PseudoIdempotentInstructions
;
166 std::vector
<unsigned> PhysRegDefs
;
167 for (auto *II
: Instructions
) {
168 for (unsigned i
= 1; i
< II
->getNumOperands(); i
++) {
169 MachineOperand
&MO
= II
->getOperand(i
);
173 if (Register::isVirtualRegister(MO
.getReg()))
179 PhysRegDefs
.push_back(MO
.getReg());
183 for (auto *II
: Instructions
) {
184 if (II
->getNumOperands() == 0)
186 if (II
->mayLoadOrStore())
189 MachineOperand
&MO
= II
->getOperand(0);
190 if (!MO
.isReg() || !Register::isVirtualRegister(MO
.getReg()))
195 bool IsPseudoIdempotent
= true;
196 for (unsigned i
= 1; i
< II
->getNumOperands(); i
++) {
198 if (II
->getOperand(i
).isImm()) {
202 if (II
->getOperand(i
).isReg()) {
203 if (!Register::isVirtualRegister(II
->getOperand(i
).getReg()))
204 if (llvm::find(PhysRegDefs
, II
->getOperand(i
).getReg()) ==
210 IsPseudoIdempotent
= false;
214 if (IsPseudoIdempotent
) {
215 PseudoIdempotentInstructions
.push_back(II
);
219 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II
->dump(); MO
.dump(););
221 MachineInstr
*Def
= II
;
222 unsigned Distance
= ~0U;
223 MachineInstr
*UseToBringDefCloserTo
= nullptr;
224 MachineRegisterInfo
*MRI
= &MBB
->getParent()->getRegInfo();
225 for (auto &UO
: MRI
->use_nodbg_operands(MO
.getReg())) {
226 MachineInstr
*UseInst
= UO
.getParent();
228 const unsigned DefLoc
= getInstrIdx(*Def
);
229 const unsigned UseLoc
= getInstrIdx(*UseInst
);
230 const unsigned Delta
= (UseLoc
- DefLoc
);
232 if (UseInst
->getParent() != Def
->getParent())
234 if (DefLoc
>= UseLoc
)
237 if (Delta
< Distance
) {
239 UseToBringDefCloserTo
= UseInst
;
240 MultiUserLookup
[UseToBringDefCloserToCount
++] = UseToBringDefCloserTo
;
244 const auto BBE
= MBB
->instr_end();
245 MachineBasicBlock::iterator DefI
= BBE
;
246 MachineBasicBlock::iterator UseI
= BBE
;
248 for (auto BBI
= MBB
->instr_begin(); BBI
!= BBE
; ++BBI
) {
250 if (DefI
!= BBE
&& UseI
!= BBE
)
258 if (&*BBI
== UseToBringDefCloserTo
) {
264 if (DefI
== BBE
|| UseI
== BBE
)
268 dbgs() << "Splicing ";
270 dbgs() << " right before: ";
274 MultiUsers
[UseToBringDefCloserTo
].push_back(Def
);
276 MBB
->splice(UseI
, MBB
, DefI
);
279 // Sort the defs for users of multiple defs lexographically.
280 for (const auto &E
: MultiUserLookup
) {
283 std::find_if(MBB
->instr_begin(), MBB
->instr_end(),
284 [&](MachineInstr
&MI
) -> bool { return &MI
== E
.second
; });
286 if (UseI
== MBB
->instr_end())
290 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
291 Changed
|= rescheduleLexographically(
292 MultiUsers
[E
.second
], MBB
,
293 [&]() -> MachineBasicBlock::iterator
{ return UseI
; });
296 PseudoIdempotentInstCount
= PseudoIdempotentInstructions
.size();
298 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
299 Changed
|= rescheduleLexographically(
300 PseudoIdempotentInstructions
, MBB
,
301 [&]() -> MachineBasicBlock::iterator
{ return MBB
->begin(); });
306 static bool propagateLocalCopies(MachineBasicBlock
*MBB
) {
307 bool Changed
= false;
308 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
310 std::vector
<MachineInstr
*> Copies
;
311 for (MachineInstr
&MI
: MBB
->instrs()) {
313 Copies
.push_back(&MI
);
316 for (MachineInstr
*MI
: Copies
) {
318 if (!MI
->getOperand(0).isReg())
320 if (!MI
->getOperand(1).isReg())
323 const Register Dst
= MI
->getOperand(0).getReg();
324 const Register Src
= MI
->getOperand(1).getReg();
326 if (!Register::isVirtualRegister(Dst
))
328 if (!Register::isVirtualRegister(Src
))
330 // Not folding COPY instructions if regbankselect has not set the RCs.
331 // Why are we only considering Register Classes? Because the verifier
332 // sometimes gets upset if the register classes don't match even if the
333 // types do. A future patch might add COPY folding for matching types in
334 // pre-registerbankselect code.
335 if (!MRI
.getRegClassOrNull(Dst
))
337 if (MRI
.getRegClass(Dst
) != MRI
.getRegClass(Src
))
340 std::vector
<MachineOperand
*> Uses
;
341 for (auto UI
= MRI
.use_begin(Dst
); UI
!= MRI
.use_end(); ++UI
)
342 Uses
.push_back(&*UI
);
343 for (auto *MO
: Uses
)
347 MI
->eraseFromParent();
353 static bool doDefKillClear(MachineBasicBlock
*MBB
) {
354 bool Changed
= false;
356 for (auto &MI
: *MBB
) {
357 for (auto &MO
: MI
.operands()) {
360 if (!MO
.isDef() && MO
.isKill()) {
365 if (MO
.isDef() && MO
.isDead()) {
375 static bool runOnBasicBlock(MachineBasicBlock
*MBB
,
376 std::vector
<StringRef
> &bbNames
,
377 unsigned &basicBlockNum
, NamedVRegCursor
&NVC
) {
379 if (CanonicalizeBasicBlockNumber
!= ~0U) {
380 if (CanonicalizeBasicBlockNumber
!= basicBlockNum
++)
382 LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB
->getName()
386 if (llvm::find(bbNames
, MBB
->getName()) != bbNames
.end()) {
388 dbgs() << "Found potentially duplicate BasicBlocks: " << MBB
->getName()
395 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB
->getName() << " \n\n";
396 dbgs() << "\n\n================================================\n\n";
399 bool Changed
= false;
400 MachineFunction
&MF
= *MBB
->getParent();
401 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
403 bbNames
.push_back(MBB
->getName());
404 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB
->getName() << "\n\n";);
406 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
408 Changed
|= propagateLocalCopies(MBB
);
409 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB
->dump(););
411 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB
->dump(););
412 unsigned IdempotentInstCount
= 0;
413 Changed
|= rescheduleCanonically(IdempotentInstCount
, MBB
);
414 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB
->dump(););
416 Changed
|= NVC
.renameVRegs(MBB
);
418 // Here we renumber the def vregs for the idempotent instructions from the top
419 // of the MachineBasicBlock so that they are named in the order that we sorted
420 // them alphabetically. Eventually we wont need SkipVRegs because we will use
421 // named vregs instead.
422 if (IdempotentInstCount
)
425 auto MII
= MBB
->begin();
426 for (unsigned i
= 0; i
< IdempotentInstCount
&& MII
!= MBB
->end(); ++i
) {
427 MachineInstr
&MI
= *MII
++;
429 Register vRegToRename
= MI
.getOperand(0).getReg();
430 auto Rename
= NVC
.createVirtualRegister(vRegToRename
);
432 std::vector
<MachineOperand
*> RenameMOs
;
433 for (auto &MO
: MRI
.reg_operands(vRegToRename
)) {
434 RenameMOs
.push_back(&MO
);
437 for (auto *MO
: RenameMOs
) {
442 Changed
|= doDefKillClear(MBB
);
444 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB
->dump();
447 dbgs() << "\n\n================================================\n\n");
451 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction
&MF
) {
453 static unsigned functionNum
= 0;
454 if (CanonicalizeFunctionNumber
!= ~0U) {
455 if (CanonicalizeFunctionNumber
!= functionNum
++)
457 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF
.getName()
461 // we need a valid vreg to create a vreg type for skipping all those
462 // stray vreg numbers so reach alignment/canonical vreg values.
463 std::vector
<MachineBasicBlock
*> RPOList
= GetRPOList(MF
);
466 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF
.getName() << " \n\n";
467 dbgs() << "\n\n================================================\n\n";
468 dbgs() << "Total Basic Blocks: " << RPOList
.size() << "\n";
470 : RPOList
) { dbgs() << MBB
->getName() << "\n"; } dbgs()
471 << "\n\n================================================\n\n";);
473 std::vector
<StringRef
> BBNames
;
477 bool Changed
= false;
479 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
480 NamedVRegCursor
NVC(MRI
);
481 for (auto MBB
: RPOList
)
482 Changed
|= runOnBasicBlock(MBB
, BBNames
, BBNum
, NVC
);