1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
18 // llc -o - -run-pass mir-canonicalizer example.mir
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
24 //===----------------------------------------------------------------------===//
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
41 #define DEBUG_TYPE "mir-canonicalizer"
43 static cl::opt
<unsigned>
44 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden
, cl::init(~0u),
46 cl::desc("Function number to canonicalize."));
50 class MIRCanonicalizer
: public MachineFunctionPass
{
53 MIRCanonicalizer() : MachineFunctionPass(ID
) {}
55 StringRef
getPassName() const override
{
56 return "Rename register operands in a canonical ordering.";
59 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
61 MachineFunctionPass::getAnalysisUsage(AU
);
64 bool runOnMachineFunction(MachineFunction
&MF
) override
;
67 } // end anonymous namespace
69 char MIRCanonicalizer::ID
;
71 char &llvm::MIRCanonicalizerID
= MIRCanonicalizer::ID
;
73 INITIALIZE_PASS_BEGIN(MIRCanonicalizer
, "mir-canonicalizer",
74 "Rename Register Operands Canonically", false, false)
76 INITIALIZE_PASS_END(MIRCanonicalizer
, "mir-canonicalizer",
77 "Rename Register Operands Canonically", false, false)
79 static std::vector
<MachineBasicBlock
*> GetRPOList(MachineFunction
&MF
) {
82 ReversePostOrderTraversal
<MachineBasicBlock
*> RPOT(&*MF
.begin());
83 std::vector
<MachineBasicBlock
*> RPOList
;
84 append_range(RPOList
, RPOT
);
90 rescheduleLexographically(std::vector
<MachineInstr
*> instructions
,
91 MachineBasicBlock
*MBB
,
92 std::function
<MachineBasicBlock::iterator()> getPos
) {
95 using StringInstrPair
= std::pair
<std::string
, MachineInstr
*>;
96 std::vector
<StringInstrPair
> StringInstrMap
;
98 for (auto *II
: instructions
) {
100 raw_string_ostream
OS(S
);
104 // Trim the assignment, or start from the beginning in the case of a store.
105 const size_t i
= S
.find('=');
106 StringInstrMap
.push_back({(i
== std::string::npos
) ? S
: S
.substr(i
), II
});
109 llvm::sort(StringInstrMap
,
110 [](const StringInstrPair
&a
, const StringInstrPair
&b
) -> bool {
111 return (a
.first
< b
.first
);
114 for (auto &II
: StringInstrMap
) {
117 dbgs() << "Splicing ";
119 dbgs() << " right before: ";
124 MBB
->splice(getPos(), MBB
, II
.second
);
130 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount
,
131 MachineBasicBlock
*MBB
) {
133 bool Changed
= false;
135 // Calculates the distance of MI from the beginning of its parent BB.
136 auto getInstrIdx
= [](const MachineInstr
&MI
) {
138 for (auto &CurMI
: *MI
.getParent()) {
146 // Pre-Populate vector of instructions to reschedule so that we don't
147 // clobber the iterator.
148 std::vector
<MachineInstr
*> Instructions
;
149 for (auto &MI
: *MBB
) {
150 Instructions
.push_back(&MI
);
153 std::map
<MachineInstr
*, std::vector
<MachineInstr
*>> MultiUsers
;
154 std::map
<unsigned, MachineInstr
*> MultiUserLookup
;
155 unsigned UseToBringDefCloserToCount
= 0;
156 std::vector
<MachineInstr
*> PseudoIdempotentInstructions
;
157 std::vector
<unsigned> PhysRegDefs
;
158 for (auto *II
: Instructions
) {
159 for (unsigned i
= 1; i
< II
->getNumOperands(); i
++) {
160 MachineOperand
&MO
= II
->getOperand(i
);
164 if (Register::isVirtualRegister(MO
.getReg()))
170 PhysRegDefs
.push_back(MO
.getReg());
174 for (auto *II
: Instructions
) {
175 if (II
->getNumOperands() == 0)
177 if (II
->mayLoadOrStore())
180 MachineOperand
&MO
= II
->getOperand(0);
181 if (!MO
.isReg() || !Register::isVirtualRegister(MO
.getReg()))
186 bool IsPseudoIdempotent
= true;
187 for (unsigned i
= 1; i
< II
->getNumOperands(); i
++) {
189 if (II
->getOperand(i
).isImm()) {
193 if (II
->getOperand(i
).isReg()) {
194 if (!Register::isVirtualRegister(II
->getOperand(i
).getReg()))
195 if (!llvm::is_contained(PhysRegDefs
, II
->getOperand(i
).getReg())) {
200 IsPseudoIdempotent
= false;
204 if (IsPseudoIdempotent
) {
205 PseudoIdempotentInstructions
.push_back(II
);
209 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II
->dump(); MO
.dump(););
211 MachineInstr
*Def
= II
;
212 unsigned Distance
= ~0U;
213 MachineInstr
*UseToBringDefCloserTo
= nullptr;
214 MachineRegisterInfo
*MRI
= &MBB
->getParent()->getRegInfo();
215 for (auto &UO
: MRI
->use_nodbg_operands(MO
.getReg())) {
216 MachineInstr
*UseInst
= UO
.getParent();
218 const unsigned DefLoc
= getInstrIdx(*Def
);
219 const unsigned UseLoc
= getInstrIdx(*UseInst
);
220 const unsigned Delta
= (UseLoc
- DefLoc
);
222 if (UseInst
->getParent() != Def
->getParent())
224 if (DefLoc
>= UseLoc
)
227 if (Delta
< Distance
) {
229 UseToBringDefCloserTo
= UseInst
;
230 MultiUserLookup
[UseToBringDefCloserToCount
++] = UseToBringDefCloserTo
;
234 const auto BBE
= MBB
->instr_end();
235 MachineBasicBlock::iterator DefI
= BBE
;
236 MachineBasicBlock::iterator UseI
= BBE
;
238 for (auto BBI
= MBB
->instr_begin(); BBI
!= BBE
; ++BBI
) {
240 if (DefI
!= BBE
&& UseI
!= BBE
)
248 if (&*BBI
== UseToBringDefCloserTo
) {
254 if (DefI
== BBE
|| UseI
== BBE
)
258 dbgs() << "Splicing ";
260 dbgs() << " right before: ";
264 MultiUsers
[UseToBringDefCloserTo
].push_back(Def
);
266 MBB
->splice(UseI
, MBB
, DefI
);
269 // Sort the defs for users of multiple defs lexographically.
270 for (const auto &E
: MultiUserLookup
) {
272 auto UseI
= llvm::find_if(MBB
->instrs(), [&](MachineInstr
&MI
) -> bool {
273 return &MI
== E
.second
;
276 if (UseI
== MBB
->instr_end())
280 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
281 Changed
|= rescheduleLexographically(
282 MultiUsers
[E
.second
], MBB
,
283 [&]() -> MachineBasicBlock::iterator
{ return UseI
; });
286 PseudoIdempotentInstCount
= PseudoIdempotentInstructions
.size();
288 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
289 Changed
|= rescheduleLexographically(
290 PseudoIdempotentInstructions
, MBB
,
291 [&]() -> MachineBasicBlock::iterator
{ return MBB
->begin(); });
296 static bool propagateLocalCopies(MachineBasicBlock
*MBB
) {
297 bool Changed
= false;
298 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
300 std::vector
<MachineInstr
*> Copies
;
301 for (MachineInstr
&MI
: MBB
->instrs()) {
303 Copies
.push_back(&MI
);
306 for (MachineInstr
*MI
: Copies
) {
308 if (!MI
->getOperand(0).isReg())
310 if (!MI
->getOperand(1).isReg())
313 const Register Dst
= MI
->getOperand(0).getReg();
314 const Register Src
= MI
->getOperand(1).getReg();
316 if (!Register::isVirtualRegister(Dst
))
318 if (!Register::isVirtualRegister(Src
))
320 // Not folding COPY instructions if regbankselect has not set the RCs.
321 // Why are we only considering Register Classes? Because the verifier
322 // sometimes gets upset if the register classes don't match even if the
323 // types do. A future patch might add COPY folding for matching types in
324 // pre-registerbankselect code.
325 if (!MRI
.getRegClassOrNull(Dst
))
327 if (MRI
.getRegClass(Dst
) != MRI
.getRegClass(Src
))
330 std::vector
<MachineOperand
*> Uses
;
331 for (MachineOperand
&MO
: MRI
.use_operands(Dst
))
333 for (auto *MO
: Uses
)
337 MI
->eraseFromParent();
343 static bool doDefKillClear(MachineBasicBlock
*MBB
) {
344 bool Changed
= false;
346 for (auto &MI
: *MBB
) {
347 for (auto &MO
: MI
.operands()) {
350 if (!MO
.isDef() && MO
.isKill()) {
355 if (MO
.isDef() && MO
.isDead()) {
365 static bool runOnBasicBlock(MachineBasicBlock
*MBB
,
366 unsigned BasicBlockNum
, VRegRenamer
&Renamer
) {
368 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB
->getName() << " \n\n";
369 dbgs() << "\n\n================================================\n\n";
372 bool Changed
= false;
374 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB
->getName() << "\n\n";);
376 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
378 Changed
|= propagateLocalCopies(MBB
);
379 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB
->dump(););
381 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB
->dump(););
382 unsigned IdempotentInstCount
= 0;
383 Changed
|= rescheduleCanonically(IdempotentInstCount
, MBB
);
384 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB
->dump(););
386 Changed
|= Renamer
.renameVRegs(MBB
, BasicBlockNum
);
388 // TODO: Consider dropping this. Dropping kill defs is probably not
389 // semantically sound.
390 Changed
|= doDefKillClear(MBB
);
392 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB
->dump();
395 dbgs() << "\n\n================================================\n\n");
399 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction
&MF
) {
401 static unsigned functionNum
= 0;
402 if (CanonicalizeFunctionNumber
!= ~0U) {
403 if (CanonicalizeFunctionNumber
!= functionNum
++)
405 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF
.getName()
409 // we need a valid vreg to create a vreg type for skipping all those
410 // stray vreg numbers so reach alignment/canonical vreg values.
411 std::vector
<MachineBasicBlock
*> RPOList
= GetRPOList(MF
);
414 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF
.getName() << " \n\n";
415 dbgs() << "\n\n================================================\n\n";
416 dbgs() << "Total Basic Blocks: " << RPOList
.size() << "\n";
418 : RPOList
) { dbgs() << MBB
->getName() << "\n"; } dbgs()
419 << "\n\n================================================\n\n";);
422 bool Changed
= false;
423 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
424 VRegRenamer
Renamer(MRI
);
425 for (auto MBB
: RPOList
)
426 Changed
|= runOnBasicBlock(MBB
, BBNum
++, Renamer
);