[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / llvm / lib / CodeGen / MIRCanonicalizerPass.cpp
blob3ec8c627f1313b35df3a885c99e657787a5e31a5
1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
16 // Basic Usage:
18 // llc -o - -run-pass mir-canonicalizer example.mir
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
24 //===----------------------------------------------------------------------===//
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
37 #include <queue>
39 using namespace llvm;
41 #define DEBUG_TYPE "mir-canonicalizer"
43 static cl::opt<unsigned>
44 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
45 cl::value_desc("N"),
46 cl::desc("Function number to canonicalize."));
48 namespace {
50 class MIRCanonicalizer : public MachineFunctionPass {
51 public:
52 static char ID;
53 MIRCanonicalizer() : MachineFunctionPass(ID) {}
55 StringRef getPassName() const override {
56 return "Rename register operands in a canonical ordering.";
59 void getAnalysisUsage(AnalysisUsage &AU) const override {
60 AU.setPreservesCFG();
61 MachineFunctionPass::getAnalysisUsage(AU);
64 bool runOnMachineFunction(MachineFunction &MF) override;
67 } // end anonymous namespace
69 char MIRCanonicalizer::ID;
71 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
73 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
74 "Rename Register Operands Canonically", false, false)
76 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
77 "Rename Register Operands Canonically", false, false)
79 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
80 if (MF.empty())
81 return {};
82 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
83 std::vector<MachineBasicBlock *> RPOList;
84 append_range(RPOList, RPOT);
86 return RPOList;
89 static bool
90 rescheduleLexographically(std::vector<MachineInstr *> instructions,
91 MachineBasicBlock *MBB,
92 std::function<MachineBasicBlock::iterator()> getPos) {
94 bool Changed = false;
95 using StringInstrPair = std::pair<std::string, MachineInstr *>;
96 std::vector<StringInstrPair> StringInstrMap;
98 for (auto *II : instructions) {
99 std::string S;
100 raw_string_ostream OS(S);
101 II->print(OS);
102 OS.flush();
104 // Trim the assignment, or start from the beginning in the case of a store.
105 const size_t i = S.find('=');
106 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
109 llvm::sort(StringInstrMap,
110 [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
111 return (a.first < b.first);
114 for (auto &II : StringInstrMap) {
116 LLVM_DEBUG({
117 dbgs() << "Splicing ";
118 II.second->dump();
119 dbgs() << " right before: ";
120 getPos()->dump();
123 Changed = true;
124 MBB->splice(getPos(), MBB, II.second);
127 return Changed;
130 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
131 MachineBasicBlock *MBB) {
133 bool Changed = false;
135 // Calculates the distance of MI from the beginning of its parent BB.
136 auto getInstrIdx = [](const MachineInstr &MI) {
137 unsigned i = 0;
138 for (auto &CurMI : *MI.getParent()) {
139 if (&CurMI == &MI)
140 return i;
141 i++;
143 return ~0U;
146 // Pre-Populate vector of instructions to reschedule so that we don't
147 // clobber the iterator.
148 std::vector<MachineInstr *> Instructions;
149 for (auto &MI : *MBB) {
150 Instructions.push_back(&MI);
153 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
154 std::map<unsigned, MachineInstr *> MultiUserLookup;
155 unsigned UseToBringDefCloserToCount = 0;
156 std::vector<MachineInstr *> PseudoIdempotentInstructions;
157 std::vector<unsigned> PhysRegDefs;
158 for (auto *II : Instructions) {
159 for (unsigned i = 1; i < II->getNumOperands(); i++) {
160 MachineOperand &MO = II->getOperand(i);
161 if (!MO.isReg())
162 continue;
164 if (Register::isVirtualRegister(MO.getReg()))
165 continue;
167 if (!MO.isDef())
168 continue;
170 PhysRegDefs.push_back(MO.getReg());
174 for (auto *II : Instructions) {
175 if (II->getNumOperands() == 0)
176 continue;
177 if (II->mayLoadOrStore())
178 continue;
180 MachineOperand &MO = II->getOperand(0);
181 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
182 continue;
183 if (!MO.isDef())
184 continue;
186 bool IsPseudoIdempotent = true;
187 for (unsigned i = 1; i < II->getNumOperands(); i++) {
189 if (II->getOperand(i).isImm()) {
190 continue;
193 if (II->getOperand(i).isReg()) {
194 if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
195 if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
196 continue;
200 IsPseudoIdempotent = false;
201 break;
204 if (IsPseudoIdempotent) {
205 PseudoIdempotentInstructions.push_back(II);
206 continue;
209 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
211 MachineInstr *Def = II;
212 unsigned Distance = ~0U;
213 MachineInstr *UseToBringDefCloserTo = nullptr;
214 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
215 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
216 MachineInstr *UseInst = UO.getParent();
218 const unsigned DefLoc = getInstrIdx(*Def);
219 const unsigned UseLoc = getInstrIdx(*UseInst);
220 const unsigned Delta = (UseLoc - DefLoc);
222 if (UseInst->getParent() != Def->getParent())
223 continue;
224 if (DefLoc >= UseLoc)
225 continue;
227 if (Delta < Distance) {
228 Distance = Delta;
229 UseToBringDefCloserTo = UseInst;
230 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
234 const auto BBE = MBB->instr_end();
235 MachineBasicBlock::iterator DefI = BBE;
236 MachineBasicBlock::iterator UseI = BBE;
238 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
240 if (DefI != BBE && UseI != BBE)
241 break;
243 if (&*BBI == Def) {
244 DefI = BBI;
245 continue;
248 if (&*BBI == UseToBringDefCloserTo) {
249 UseI = BBI;
250 continue;
254 if (DefI == BBE || UseI == BBE)
255 continue;
257 LLVM_DEBUG({
258 dbgs() << "Splicing ";
259 DefI->dump();
260 dbgs() << " right before: ";
261 UseI->dump();
264 MultiUsers[UseToBringDefCloserTo].push_back(Def);
265 Changed = true;
266 MBB->splice(UseI, MBB, DefI);
269 // Sort the defs for users of multiple defs lexographically.
270 for (const auto &E : MultiUserLookup) {
272 auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
273 return &MI == E.second;
276 if (UseI == MBB->instr_end())
277 continue;
279 LLVM_DEBUG(
280 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
281 Changed |= rescheduleLexographically(
282 MultiUsers[E.second], MBB,
283 [&]() -> MachineBasicBlock::iterator { return UseI; });
286 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
287 LLVM_DEBUG(
288 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
289 Changed |= rescheduleLexographically(
290 PseudoIdempotentInstructions, MBB,
291 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
293 return Changed;
296 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
297 bool Changed = false;
298 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
300 std::vector<MachineInstr *> Copies;
301 for (MachineInstr &MI : MBB->instrs()) {
302 if (MI.isCopy())
303 Copies.push_back(&MI);
306 for (MachineInstr *MI : Copies) {
308 if (!MI->getOperand(0).isReg())
309 continue;
310 if (!MI->getOperand(1).isReg())
311 continue;
313 const Register Dst = MI->getOperand(0).getReg();
314 const Register Src = MI->getOperand(1).getReg();
316 if (!Register::isVirtualRegister(Dst))
317 continue;
318 if (!Register::isVirtualRegister(Src))
319 continue;
320 // Not folding COPY instructions if regbankselect has not set the RCs.
321 // Why are we only considering Register Classes? Because the verifier
322 // sometimes gets upset if the register classes don't match even if the
323 // types do. A future patch might add COPY folding for matching types in
324 // pre-registerbankselect code.
325 if (!MRI.getRegClassOrNull(Dst))
326 continue;
327 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
328 continue;
330 std::vector<MachineOperand *> Uses;
331 for (MachineOperand &MO : MRI.use_operands(Dst))
332 Uses.push_back(&MO);
333 for (auto *MO : Uses)
334 MO->setReg(Src);
336 Changed = true;
337 MI->eraseFromParent();
340 return Changed;
343 static bool doDefKillClear(MachineBasicBlock *MBB) {
344 bool Changed = false;
346 for (auto &MI : *MBB) {
347 for (auto &MO : MI.operands()) {
348 if (!MO.isReg())
349 continue;
350 if (!MO.isDef() && MO.isKill()) {
351 Changed = true;
352 MO.setIsKill(false);
355 if (MO.isDef() && MO.isDead()) {
356 Changed = true;
357 MO.setIsDead(false);
362 return Changed;
365 static bool runOnBasicBlock(MachineBasicBlock *MBB,
366 unsigned BasicBlockNum, VRegRenamer &Renamer) {
367 LLVM_DEBUG({
368 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
369 dbgs() << "\n\n================================================\n\n";
372 bool Changed = false;
374 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
376 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
377 MBB->dump(););
378 Changed |= propagateLocalCopies(MBB);
379 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
381 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
382 unsigned IdempotentInstCount = 0;
383 Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
384 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
386 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
388 // TODO: Consider dropping this. Dropping kill defs is probably not
389 // semantically sound.
390 Changed |= doDefKillClear(MBB);
392 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
393 dbgs() << "\n";);
394 LLVM_DEBUG(
395 dbgs() << "\n\n================================================\n\n");
396 return Changed;
399 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
401 static unsigned functionNum = 0;
402 if (CanonicalizeFunctionNumber != ~0U) {
403 if (CanonicalizeFunctionNumber != functionNum++)
404 return false;
405 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
406 << "\n";);
409 // we need a valid vreg to create a vreg type for skipping all those
410 // stray vreg numbers so reach alignment/canonical vreg values.
411 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
413 LLVM_DEBUG(
414 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
415 dbgs() << "\n\n================================================\n\n";
416 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
417 for (auto MBB
418 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
419 << "\n\n================================================\n\n";);
421 unsigned BBNum = 0;
422 bool Changed = false;
423 MachineRegisterInfo &MRI = MF.getRegInfo();
424 VRegRenamer Renamer(MRI);
425 for (auto MBB : RPOList)
426 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
428 return Changed;