Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / RegReAssign.cpp
blob8b9dc9c1fdd506c89e9cb97b2ca1612687d6e407
1 //===- bolt/Passes/RegReAssign.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the RegReAssign class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/RegReAssign.h"
14 #include "bolt/Core/MCPlus.h"
15 #include "bolt/Passes/BinaryFunctionCallGraph.h"
16 #include "bolt/Passes/DataflowAnalysis.h"
17 #include "bolt/Passes/DataflowInfoManager.h"
18 #include "bolt/Utils/Utils.h"
19 #include <numeric>
21 #define DEBUG_TYPE "regreassign"
23 using namespace llvm;
25 namespace opts {
26 extern cl::OptionCategory BoltOptCategory;
27 extern cl::opt<bool> UpdateDebugSections;
29 static cl::opt<bool> AggressiveReAssign(
30 "use-aggr-reg-reassign",
31 cl::desc("use register liveness analysis to try to find more opportunities "
32 "for -reg-reassign optimization"),
33 cl::cat(BoltOptCategory));
36 namespace llvm {
37 namespace bolt {
39 void RegReAssign::swap(BinaryFunction &Function, MCPhysReg A, MCPhysReg B) {
40 BinaryContext &BC = Function.getBinaryContext();
41 const BitVector &AliasA = BC.MIB->getAliases(A, false);
42 const BitVector &AliasB = BC.MIB->getAliases(B, false);
44 // Regular instructions
45 for (BinaryBasicBlock &BB : Function) {
46 for (MCInst &Inst : BB) {
47 for (MCOperand &Operand : MCPlus::primeOperands(Inst)) {
48 if (!Operand.isReg())
49 continue;
51 unsigned Reg = Operand.getReg();
52 if (AliasA.test(Reg)) {
53 Operand.setReg(BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg)));
54 --StaticBytesSaved;
55 DynBytesSaved -= BB.getKnownExecutionCount();
56 continue;
58 if (!AliasB.test(Reg))
59 continue;
60 Operand.setReg(BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg)));
61 ++StaticBytesSaved;
62 DynBytesSaved += BB.getKnownExecutionCount();
67 // CFI
68 DenseSet<const MCCFIInstruction *> Changed;
69 for (BinaryBasicBlock &BB : Function) {
70 for (MCInst &Inst : BB) {
71 if (!BC.MIB->isCFI(Inst))
72 continue;
73 const MCCFIInstruction *CFI = Function.getCFIFor(Inst);
74 if (Changed.count(CFI))
75 continue;
76 Changed.insert(CFI);
78 switch (CFI->getOperation()) {
79 case MCCFIInstruction::OpRegister: {
80 const unsigned CFIReg2 = CFI->getRegister2();
81 const MCPhysReg Reg2 = *BC.MRI->getLLVMRegNum(CFIReg2, /*isEH=*/false);
82 if (AliasA.test(Reg2)) {
83 Function.setCFIFor(
84 Inst, MCCFIInstruction::createRegister(
85 nullptr, CFI->getRegister(),
86 BC.MRI->getDwarfRegNum(
87 BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg2)),
88 false)));
89 } else if (AliasB.test(Reg2)) {
90 Function.setCFIFor(
91 Inst, MCCFIInstruction::createRegister(
92 nullptr, CFI->getRegister(),
93 BC.MRI->getDwarfRegNum(
94 BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg2)),
95 false)));
98 [[fallthrough]];
99 case MCCFIInstruction::OpUndefined:
100 case MCCFIInstruction::OpDefCfa:
101 case MCCFIInstruction::OpOffset:
102 case MCCFIInstruction::OpRestore:
103 case MCCFIInstruction::OpSameValue:
104 case MCCFIInstruction::OpDefCfaRegister:
105 case MCCFIInstruction::OpRelOffset:
106 case MCCFIInstruction::OpEscape: {
107 unsigned CFIReg;
108 if (CFI->getOperation() != MCCFIInstruction::OpEscape) {
109 CFIReg = CFI->getRegister();
110 } else {
111 std::optional<uint8_t> Reg =
112 readDWARFExpressionTargetReg(CFI->getValues());
113 // Handle DW_CFA_def_cfa_expression
114 if (!Reg)
115 break;
116 CFIReg = *Reg;
118 const MCPhysReg Reg = *BC.MRI->getLLVMRegNum(CFIReg, /*isEH=*/false);
119 if (AliasA.test(Reg))
120 Function.mutateCFIRegisterFor(
121 Inst,
122 BC.MRI->getDwarfRegNum(
123 BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg)), false));
124 else if (AliasB.test(Reg))
125 Function.mutateCFIRegisterFor(
126 Inst,
127 BC.MRI->getDwarfRegNum(
128 BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg)), false));
129 break;
131 default:
132 break;
138 void RegReAssign::rankRegisters(BinaryFunction &Function) {
139 BinaryContext &BC = Function.getBinaryContext();
140 std::fill(RegScore.begin(), RegScore.end(), 0);
141 std::fill(RankedRegs.begin(), RankedRegs.end(), 0);
143 auto countRegScore = [&](BinaryBasicBlock &BB) {
144 for (MCInst &Inst : BB) {
145 const bool CannotUseREX = BC.MIB->cannotUseREX(Inst);
146 const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode());
148 // Disallow substituitions involving regs in implicit uses lists
149 for (MCPhysReg ImplicitUse : Desc.implicit_uses()) {
150 const size_t RegEC =
151 BC.MIB->getAliases(ImplicitUse, false).find_first();
152 RegScore[RegEC] =
153 std::numeric_limits<decltype(RegScore)::value_type>::min();
156 // Disallow substituitions involving regs in implicit defs lists
157 for (MCPhysReg ImplicitDef : Desc.implicit_defs()) {
158 const size_t RegEC =
159 BC.MIB->getAliases(ImplicitDef, false).find_first();
160 RegScore[RegEC] =
161 std::numeric_limits<decltype(RegScore)::value_type>::min();
164 for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {
165 const MCOperand &Operand = Inst.getOperand(I);
166 if (!Operand.isReg())
167 continue;
169 if (Desc.getOperandConstraint(I, MCOI::TIED_TO) != -1)
170 continue;
172 unsigned Reg = Operand.getReg();
173 size_t RegEC = BC.MIB->getAliases(Reg, false).find_first();
174 if (RegEC == 0)
175 continue;
177 // Disallow substituitions involving regs in instrs that cannot use REX
178 // The relationship of X86 registers is shown in the diagram. BL and BH
179 // do not have a direct alias relationship. However, if the BH register
180 // cannot be swapped, then the BX/EBX/RBX registers cannot be swapped as
181 // well, which means that BL register also cannot be swapped. Therefore,
182 // in the presence of BX/EBX/RBX registers, BL and BH have an alias
183 // relationship.
184 // ┌─────────────────┐
185 // │ RBX │
186 // ├─────┬───────────┤
187 // │ │ EBX │
188 // ├─────┴──┬────────┤
189 // │ │ BX │
190 // ├────────┼───┬────┤
191 // │ │BH │BL │
192 // └────────┴───┴────┘
193 if (CannotUseREX) {
194 RegScore[RegEC] =
195 std::numeric_limits<decltype(RegScore)::value_type>::min();
196 RegScore[BC.MIB->getAliasSized(Reg, 1)] = RegScore[RegEC];
197 continue;
200 // Unsupported substitution, cannot swap BH with R* regs, bail
201 if (BC.MIB->isUpper8BitReg(Reg) && ClassicCSR.test(Reg)) {
202 RegScore[RegEC] =
203 std::numeric_limits<decltype(RegScore)::value_type>::min();
204 RegScore[BC.MIB->getAliasSized(Reg, 1)] = RegScore[RegEC];
205 continue;
208 RegScore[RegEC] += BB.getKnownExecutionCount();
212 for (BinaryBasicBlock &BB : Function)
213 countRegScore(BB);
215 for (BinaryFunction *ChildFrag : Function.getFragments()) {
216 for (BinaryBasicBlock &BB : *ChildFrag)
217 countRegScore(BB);
220 std::iota(RankedRegs.begin(), RankedRegs.end(), 0); // 0, 1, 2, 3...
221 llvm::sort(RankedRegs,
222 [&](size_t A, size_t B) { return RegScore[A] > RegScore[B]; });
224 LLVM_DEBUG({
225 for (size_t Reg : RankedRegs) {
226 if (RegScore[Reg] == 0)
227 continue;
228 dbgs() << Reg << " ";
229 if (RegScore[Reg] > 0)
230 dbgs() << BC.MRI->getName(Reg) << ": " << RegScore[Reg] << "\n";
231 else
232 dbgs() << BC.MRI->getName(Reg) << ": (blacklisted)\n";
237 void RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
238 BinaryContext &BC = Function.getBinaryContext();
239 rankRegisters(Function);
241 // If there is a situation where function:
242 // A() -> A.cold()
243 // A.localalias() -> A.cold()
244 // simply swapping these two calls can cause issues.
245 for (BinaryFunction *ChildFrag : Function.getFragments()) {
246 if (ChildFrag->getParentFragments()->size() > 1)
247 return;
248 if (ChildFrag->empty())
249 return;
252 // Bail early if our registers are all black listed, before running expensive
253 // analysis passes
254 bool Bail = true;
255 int64_t LowScoreClassic = std::numeric_limits<int64_t>::max();
256 for (int J : ClassicRegs.set_bits()) {
257 if (RegScore[J] <= 0)
258 continue;
259 Bail = false;
260 if (RegScore[J] < LowScoreClassic)
261 LowScoreClassic = RegScore[J];
263 if (Bail)
264 return;
265 BitVector Extended = ClassicRegs;
266 Extended.flip();
267 Extended &= GPRegs;
268 Bail = true;
269 int64_t HighScoreExtended = 0;
270 for (int J : Extended.set_bits()) {
271 if (RegScore[J] <= 0)
272 continue;
273 Bail = false;
274 if (RegScore[J] > HighScoreExtended)
275 HighScoreExtended = RegScore[J];
277 // Also bail early if there is no profitable substitution even if we assume
278 // all registers can be exchanged
279 if (Bail || (LowScoreClassic << 1) >= HighScoreExtended)
280 return;
282 // -- expensive pass -- determine all regs alive during func start
283 DataflowInfoManager Info(Function, RA.get(), nullptr);
284 BitVector AliveAtStart = *Info.getLivenessAnalysis().getStateAt(
285 ProgramPoint::getFirstPointAt(*Function.begin()));
286 for (BinaryBasicBlock &BB : Function)
287 if (BB.pred_size() == 0)
288 AliveAtStart |= *Info.getLivenessAnalysis().getStateAt(
289 ProgramPoint::getFirstPointAt(BB));
291 // Mark frame pointer alive because of CFI
292 AliveAtStart |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false);
293 // Never touch return registers
294 BC.MIB->getDefaultLiveOut(AliveAtStart);
296 // Try swapping more profitable options first
297 auto Begin = RankedRegs.begin();
298 auto End = std::prev(RankedRegs.end());
299 while (Begin != End) {
300 MCPhysReg ClassicReg = *End;
301 if (!ClassicRegs[ClassicReg] || RegScore[ClassicReg] <= 0) {
302 --End;
303 continue;
306 MCPhysReg ExtReg = *Begin;
307 if (!Extended[ExtReg] || RegScore[ExtReg] <= 0) {
308 ++Begin;
309 continue;
312 if (RegScore[ClassicReg] << 1 >= RegScore[ExtReg]) {
313 LLVM_DEBUG(dbgs() << " Ending at " << BC.MRI->getName(ClassicReg)
314 << " with " << BC.MRI->getName(ExtReg)
315 << " because exchange is not profitable\n");
316 break;
319 BitVector AnyAliasAlive = AliveAtStart;
320 AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
321 if (AnyAliasAlive.any()) {
322 LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
323 << " with " << BC.MRI->getName(ExtReg)
324 << " because classic reg is alive\n");
325 --End;
326 continue;
328 AnyAliasAlive = AliveAtStart;
329 AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
330 if (AnyAliasAlive.any()) {
331 LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
332 << " with " << BC.MRI->getName(ExtReg)
333 << " because extended reg is alive\n");
334 ++Begin;
335 continue;
338 // Opportunity detected. Swap.
339 LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(ClassicReg)
340 << " with " << BC.MRI->getName(ExtReg) << "\n\n");
341 swap(Function, ClassicReg, ExtReg);
342 FuncsChanged.insert(&Function);
343 for (BinaryFunction *ChildFrag : Function.getFragments()) {
344 swap(*ChildFrag, ClassicReg, ExtReg);
345 FuncsChanged.insert(ChildFrag);
347 ++Begin;
348 if (Begin == End)
349 break;
350 --End;
354 bool RegReAssign::conservativePassOverFunction(BinaryFunction &Function) {
355 BinaryContext &BC = Function.getBinaryContext();
356 rankRegisters(Function);
358 for (BinaryFunction *ChildFrag : Function.getFragments()) {
359 if (ChildFrag->getParentFragments()->size() > 1)
360 return false;
361 if (ChildFrag->empty())
362 return false;
365 // Try swapping R12, R13, R14 or R15 with RBX (we work with all callee-saved
366 // regs except RBP)
367 MCPhysReg Candidate = 0;
368 for (int J : ExtendedCSR.set_bits())
369 if (RegScore[J] > RegScore[Candidate])
370 Candidate = J;
372 if (!Candidate || RegScore[Candidate] < 0)
373 return false;
375 // Check if our classic callee-saved reg (RBX is the only one) has lower
376 // score / utilization rate
377 MCPhysReg RBX = 0;
378 for (int I : ClassicCSR.set_bits()) {
379 int64_t ScoreRBX = RegScore[I];
380 if (ScoreRBX <= 0)
381 continue;
383 if (RegScore[Candidate] > (ScoreRBX + 10))
384 RBX = I;
387 if (!RBX)
388 return false;
390 // The high 8 bits of the register will never be swapped. To prevent the high
391 // 8 bits from being swapped incorrectly, we should switched to swapping the
392 // low 8 bits of the register instead.
393 if (BC.MIB->isUpper8BitReg(RBX)) {
394 RBX = BC.MIB->getAliasSized(RBX, 1);
395 if (RegScore[RBX] < 0 || RegScore[RBX] > RegScore[Candidate])
396 return false;
399 LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(RBX) << " with "
400 << BC.MRI->getName(Candidate) << "\n\n");
401 (void)BC;
402 swap(Function, RBX, Candidate);
403 FuncsChanged.insert(&Function);
404 for (BinaryFunction *ChildFrag : Function.getFragments()) {
405 swap(*ChildFrag, RBX, Candidate);
406 FuncsChanged.insert(ChildFrag);
408 return true;
411 void RegReAssign::setupAggressivePass(BinaryContext &BC,
412 std::map<uint64_t, BinaryFunction> &BFs) {
413 setupConservativePass(BC, BFs);
414 CG.reset(new BinaryFunctionCallGraph(buildCallGraph(BC)));
415 RA.reset(new RegAnalysis(BC, &BFs, &*CG));
417 GPRegs = BitVector(BC.MRI->getNumRegs(), false);
418 BC.MIB->getGPRegs(GPRegs);
421 void RegReAssign::setupConservativePass(
422 BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) {
423 // Set up constant bitvectors used throughout this analysis
424 ClassicRegs = BitVector(BC.MRI->getNumRegs(), false);
425 CalleeSaved = BitVector(BC.MRI->getNumRegs(), false);
426 ClassicCSR = BitVector(BC.MRI->getNumRegs(), false);
427 ExtendedCSR = BitVector(BC.MRI->getNumRegs(), false);
428 // Never consider the frame pointer
429 BC.MIB->getClassicGPRegs(ClassicRegs);
430 ClassicRegs.flip();
431 ClassicRegs |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false);
432 ClassicRegs.flip();
433 BC.MIB->getCalleeSavedRegs(CalleeSaved);
434 ClassicCSR |= ClassicRegs;
435 ClassicCSR &= CalleeSaved;
436 BC.MIB->getClassicGPRegs(ClassicRegs);
437 ExtendedCSR |= ClassicRegs;
438 ExtendedCSR.flip();
439 ExtendedCSR &= CalleeSaved;
441 LLVM_DEBUG({
442 RegStatePrinter P(BC);
443 dbgs() << "Starting register reassignment\nClassicRegs: ";
444 P.print(dbgs(), ClassicRegs);
445 dbgs() << "\nCalleeSaved: ";
446 P.print(dbgs(), CalleeSaved);
447 dbgs() << "\nClassicCSR: ";
448 P.print(dbgs(), ClassicCSR);
449 dbgs() << "\nExtendedCSR: ";
450 P.print(dbgs(), ExtendedCSR);
451 dbgs() << "\n";
455 void RegReAssign::runOnFunctions(BinaryContext &BC) {
456 RegScore = std::vector<int64_t>(BC.MRI->getNumRegs(), 0);
457 RankedRegs = std::vector<size_t>(BC.MRI->getNumRegs(), 0);
459 if (opts::AggressiveReAssign)
460 setupAggressivePass(BC, BC.getBinaryFunctions());
461 else
462 setupConservativePass(BC, BC.getBinaryFunctions());
464 for (auto &I : BC.getBinaryFunctions()) {
465 BinaryFunction &Function = I.second;
467 if (!Function.isSimple() || Function.isIgnored() || Function.isFragment())
468 continue;
470 LLVM_DEBUG(dbgs() << "====================================\n");
471 LLVM_DEBUG(dbgs() << " - " << Function.getPrintName() << "\n");
472 if (!conservativePassOverFunction(Function) && opts::AggressiveReAssign) {
473 aggressivePassOverFunction(Function);
474 LLVM_DEBUG({
475 if (FuncsChanged.count(&Function))
476 dbgs() << "Aggressive pass successful on " << Function.getPrintName()
477 << "\n";
482 if (FuncsChanged.empty()) {
483 outs() << "BOLT-INFO: Reg Reassignment Pass: no changes were made.\n";
484 return;
486 if (opts::UpdateDebugSections)
487 outs() << "BOLT-WARNING: You used -reg-reassign and -update-debug-sections."
488 << " Some registers were changed but associated AT_LOCATION for "
489 << "impacted variables were NOT updated! This operation is "
490 << "currently unsupported by BOLT.\n";
491 outs() << "BOLT-INFO: Reg Reassignment Pass Stats:\n";
492 outs() << "\t " << FuncsChanged.size() << " functions affected.\n";
493 outs() << "\t " << StaticBytesSaved << " static bytes saved.\n";
494 outs() << "\t " << DynBytesSaved << " dynamic bytes saved.\n";
497 } // namespace bolt
498 } // namespace llvm