1 //===- bolt/Passes/RegReAssign.cpp ----------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the RegReAssign class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/RegReAssign.h"
14 #include "bolt/Core/MCPlus.h"
15 #include "bolt/Passes/BinaryFunctionCallGraph.h"
16 #include "bolt/Passes/DataflowAnalysis.h"
17 #include "bolt/Passes/DataflowInfoManager.h"
18 #include "bolt/Utils/Utils.h"
21 #define DEBUG_TYPE "regreassign"
26 extern cl::OptionCategory BoltOptCategory
;
27 extern cl::opt
<bool> UpdateDebugSections
;
29 static cl::opt
<bool> AggressiveReAssign(
30 "use-aggr-reg-reassign",
31 cl::desc("use register liveness analysis to try to find more opportunities "
32 "for -reg-reassign optimization"),
33 cl::cat(BoltOptCategory
));
39 void RegReAssign::swap(BinaryFunction
&Function
, MCPhysReg A
, MCPhysReg B
) {
40 BinaryContext
&BC
= Function
.getBinaryContext();
41 const BitVector
&AliasA
= BC
.MIB
->getAliases(A
, false);
42 const BitVector
&AliasB
= BC
.MIB
->getAliases(B
, false);
44 // Regular instructions
45 for (BinaryBasicBlock
&BB
: Function
) {
46 for (MCInst
&Inst
: BB
) {
47 for (MCOperand
&Operand
: MCPlus::primeOperands(Inst
)) {
51 unsigned Reg
= Operand
.getReg();
52 if (AliasA
.test(Reg
)) {
53 Operand
.setReg(BC
.MIB
->getAliasSized(B
, BC
.MIB
->getRegSize(Reg
)));
55 DynBytesSaved
-= BB
.getKnownExecutionCount();
58 if (!AliasB
.test(Reg
))
60 Operand
.setReg(BC
.MIB
->getAliasSized(A
, BC
.MIB
->getRegSize(Reg
)));
62 DynBytesSaved
+= BB
.getKnownExecutionCount();
68 DenseSet
<const MCCFIInstruction
*> Changed
;
69 for (BinaryBasicBlock
&BB
: Function
) {
70 for (MCInst
&Inst
: BB
) {
71 if (!BC
.MIB
->isCFI(Inst
))
73 const MCCFIInstruction
*CFI
= Function
.getCFIFor(Inst
);
74 if (Changed
.count(CFI
))
78 switch (CFI
->getOperation()) {
79 case MCCFIInstruction::OpRegister
: {
80 const unsigned CFIReg2
= CFI
->getRegister2();
81 const MCPhysReg Reg2
= *BC
.MRI
->getLLVMRegNum(CFIReg2
, /*isEH=*/false);
82 if (AliasA
.test(Reg2
)) {
84 Inst
, MCCFIInstruction::createRegister(
85 nullptr, CFI
->getRegister(),
86 BC
.MRI
->getDwarfRegNum(
87 BC
.MIB
->getAliasSized(B
, BC
.MIB
->getRegSize(Reg2
)),
89 } else if (AliasB
.test(Reg2
)) {
91 Inst
, MCCFIInstruction::createRegister(
92 nullptr, CFI
->getRegister(),
93 BC
.MRI
->getDwarfRegNum(
94 BC
.MIB
->getAliasSized(A
, BC
.MIB
->getRegSize(Reg2
)),
99 case MCCFIInstruction::OpUndefined
:
100 case MCCFIInstruction::OpDefCfa
:
101 case MCCFIInstruction::OpOffset
:
102 case MCCFIInstruction::OpRestore
:
103 case MCCFIInstruction::OpSameValue
:
104 case MCCFIInstruction::OpDefCfaRegister
:
105 case MCCFIInstruction::OpRelOffset
:
106 case MCCFIInstruction::OpEscape
: {
108 if (CFI
->getOperation() != MCCFIInstruction::OpEscape
) {
109 CFIReg
= CFI
->getRegister();
111 std::optional
<uint8_t> Reg
=
112 readDWARFExpressionTargetReg(CFI
->getValues());
113 // Handle DW_CFA_def_cfa_expression
118 const MCPhysReg Reg
= *BC
.MRI
->getLLVMRegNum(CFIReg
, /*isEH=*/false);
119 if (AliasA
.test(Reg
))
120 Function
.mutateCFIRegisterFor(
122 BC
.MRI
->getDwarfRegNum(
123 BC
.MIB
->getAliasSized(B
, BC
.MIB
->getRegSize(Reg
)), false));
124 else if (AliasB
.test(Reg
))
125 Function
.mutateCFIRegisterFor(
127 BC
.MRI
->getDwarfRegNum(
128 BC
.MIB
->getAliasSized(A
, BC
.MIB
->getRegSize(Reg
)), false));
138 void RegReAssign::rankRegisters(BinaryFunction
&Function
) {
139 BinaryContext
&BC
= Function
.getBinaryContext();
140 std::fill(RegScore
.begin(), RegScore
.end(), 0);
141 std::fill(RankedRegs
.begin(), RankedRegs
.end(), 0);
143 auto countRegScore
= [&](BinaryBasicBlock
&BB
) {
144 for (MCInst
&Inst
: BB
) {
145 const bool CannotUseREX
= BC
.MIB
->cannotUseREX(Inst
);
146 const MCInstrDesc
&Desc
= BC
.MII
->get(Inst
.getOpcode());
148 // Disallow substituitions involving regs in implicit uses lists
149 for (MCPhysReg ImplicitUse
: Desc
.implicit_uses()) {
151 BC
.MIB
->getAliases(ImplicitUse
, false).find_first();
153 std::numeric_limits
<decltype(RegScore
)::value_type
>::min();
156 // Disallow substituitions involving regs in implicit defs lists
157 for (MCPhysReg ImplicitDef
: Desc
.implicit_defs()) {
159 BC
.MIB
->getAliases(ImplicitDef
, false).find_first();
161 std::numeric_limits
<decltype(RegScore
)::value_type
>::min();
164 for (int I
= 0, E
= MCPlus::getNumPrimeOperands(Inst
); I
!= E
; ++I
) {
165 const MCOperand
&Operand
= Inst
.getOperand(I
);
166 if (!Operand
.isReg())
169 if (Desc
.getOperandConstraint(I
, MCOI::TIED_TO
) != -1)
172 unsigned Reg
= Operand
.getReg();
173 size_t RegEC
= BC
.MIB
->getAliases(Reg
, false).find_first();
177 // Disallow substituitions involving regs in instrs that cannot use REX
178 // The relationship of X86 registers is shown in the diagram. BL and BH
179 // do not have a direct alias relationship. However, if the BH register
180 // cannot be swapped, then the BX/EBX/RBX registers cannot be swapped as
181 // well, which means that BL register also cannot be swapped. Therefore,
182 // in the presence of BX/EBX/RBX registers, BL and BH have an alias
184 // ┌─────────────────┐
186 // ├─────┬───────────┤
188 // ├─────┴──┬────────┤
190 // ├────────┼───┬────┤
192 // └────────┴───┴────┘
195 std::numeric_limits
<decltype(RegScore
)::value_type
>::min();
196 RegScore
[BC
.MIB
->getAliasSized(Reg
, 1)] = RegScore
[RegEC
];
200 // Unsupported substitution, cannot swap BH with R* regs, bail
201 if (BC
.MIB
->isUpper8BitReg(Reg
) && ClassicCSR
.test(Reg
)) {
203 std::numeric_limits
<decltype(RegScore
)::value_type
>::min();
204 RegScore
[BC
.MIB
->getAliasSized(Reg
, 1)] = RegScore
[RegEC
];
208 RegScore
[RegEC
] += BB
.getKnownExecutionCount();
212 for (BinaryBasicBlock
&BB
: Function
)
215 for (BinaryFunction
*ChildFrag
: Function
.getFragments()) {
216 for (BinaryBasicBlock
&BB
: *ChildFrag
)
220 std::iota(RankedRegs
.begin(), RankedRegs
.end(), 0); // 0, 1, 2, 3...
221 llvm::sort(RankedRegs
,
222 [&](size_t A
, size_t B
) { return RegScore
[A
] > RegScore
[B
]; });
225 for (size_t Reg
: RankedRegs
) {
226 if (RegScore
[Reg
] == 0)
228 dbgs() << Reg
<< " ";
229 if (RegScore
[Reg
] > 0)
230 dbgs() << BC
.MRI
->getName(Reg
) << ": " << RegScore
[Reg
] << "\n";
232 dbgs() << BC
.MRI
->getName(Reg
) << ": (blacklisted)\n";
237 void RegReAssign::aggressivePassOverFunction(BinaryFunction
&Function
) {
238 BinaryContext
&BC
= Function
.getBinaryContext();
239 rankRegisters(Function
);
241 // If there is a situation where function:
243 // A.localalias() -> A.cold()
244 // simply swapping these two calls can cause issues.
245 for (BinaryFunction
*ChildFrag
: Function
.getFragments()) {
246 if (ChildFrag
->getParentFragments()->size() > 1)
248 if (ChildFrag
->empty())
252 // Bail early if our registers are all black listed, before running expensive
255 int64_t LowScoreClassic
= std::numeric_limits
<int64_t>::max();
256 for (int J
: ClassicRegs
.set_bits()) {
257 if (RegScore
[J
] <= 0)
260 if (RegScore
[J
] < LowScoreClassic
)
261 LowScoreClassic
= RegScore
[J
];
265 BitVector Extended
= ClassicRegs
;
269 int64_t HighScoreExtended
= 0;
270 for (int J
: Extended
.set_bits()) {
271 if (RegScore
[J
] <= 0)
274 if (RegScore
[J
] > HighScoreExtended
)
275 HighScoreExtended
= RegScore
[J
];
277 // Also bail early if there is no profitable substitution even if we assume
278 // all registers can be exchanged
279 if (Bail
|| (LowScoreClassic
<< 1) >= HighScoreExtended
)
282 // -- expensive pass -- determine all regs alive during func start
283 DataflowInfoManager
Info(Function
, RA
.get(), nullptr);
284 BitVector AliveAtStart
= *Info
.getLivenessAnalysis().getStateAt(
285 ProgramPoint::getFirstPointAt(*Function
.begin()));
286 for (BinaryBasicBlock
&BB
: Function
)
287 if (BB
.pred_size() == 0)
288 AliveAtStart
|= *Info
.getLivenessAnalysis().getStateAt(
289 ProgramPoint::getFirstPointAt(BB
));
291 // Mark frame pointer alive because of CFI
292 AliveAtStart
|= BC
.MIB
->getAliases(BC
.MIB
->getFramePointer(), false);
293 // Never touch return registers
294 BC
.MIB
->getDefaultLiveOut(AliveAtStart
);
296 // Try swapping more profitable options first
297 auto Begin
= RankedRegs
.begin();
298 auto End
= std::prev(RankedRegs
.end());
299 while (Begin
!= End
) {
300 MCPhysReg ClassicReg
= *End
;
301 if (!ClassicRegs
[ClassicReg
] || RegScore
[ClassicReg
] <= 0) {
306 MCPhysReg ExtReg
= *Begin
;
307 if (!Extended
[ExtReg
] || RegScore
[ExtReg
] <= 0) {
312 if (RegScore
[ClassicReg
] << 1 >= RegScore
[ExtReg
]) {
313 LLVM_DEBUG(dbgs() << " Ending at " << BC
.MRI
->getName(ClassicReg
)
314 << " with " << BC
.MRI
->getName(ExtReg
)
315 << " because exchange is not profitable\n");
319 BitVector AnyAliasAlive
= AliveAtStart
;
320 AnyAliasAlive
&= BC
.MIB
->getAliases(ClassicReg
);
321 if (AnyAliasAlive
.any()) {
322 LLVM_DEBUG(dbgs() << " Bailed on " << BC
.MRI
->getName(ClassicReg
)
323 << " with " << BC
.MRI
->getName(ExtReg
)
324 << " because classic reg is alive\n");
328 AnyAliasAlive
= AliveAtStart
;
329 AnyAliasAlive
&= BC
.MIB
->getAliases(ExtReg
);
330 if (AnyAliasAlive
.any()) {
331 LLVM_DEBUG(dbgs() << " Bailed on " << BC
.MRI
->getName(ClassicReg
)
332 << " with " << BC
.MRI
->getName(ExtReg
)
333 << " because extended reg is alive\n");
338 // Opportunity detected. Swap.
339 LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC
.MRI
->getName(ClassicReg
)
340 << " with " << BC
.MRI
->getName(ExtReg
) << "\n\n");
341 swap(Function
, ClassicReg
, ExtReg
);
342 FuncsChanged
.insert(&Function
);
343 for (BinaryFunction
*ChildFrag
: Function
.getFragments()) {
344 swap(*ChildFrag
, ClassicReg
, ExtReg
);
345 FuncsChanged
.insert(ChildFrag
);
354 bool RegReAssign::conservativePassOverFunction(BinaryFunction
&Function
) {
355 BinaryContext
&BC
= Function
.getBinaryContext();
356 rankRegisters(Function
);
358 for (BinaryFunction
*ChildFrag
: Function
.getFragments()) {
359 if (ChildFrag
->getParentFragments()->size() > 1)
361 if (ChildFrag
->empty())
365 // Try swapping R12, R13, R14 or R15 with RBX (we work with all callee-saved
367 MCPhysReg Candidate
= 0;
368 for (int J
: ExtendedCSR
.set_bits())
369 if (RegScore
[J
] > RegScore
[Candidate
])
372 if (!Candidate
|| RegScore
[Candidate
] < 0)
375 // Check if our classic callee-saved reg (RBX is the only one) has lower
376 // score / utilization rate
378 for (int I
: ClassicCSR
.set_bits()) {
379 int64_t ScoreRBX
= RegScore
[I
];
383 if (RegScore
[Candidate
] > (ScoreRBX
+ 10))
390 // The high 8 bits of the register will never be swapped. To prevent the high
391 // 8 bits from being swapped incorrectly, we should switched to swapping the
392 // low 8 bits of the register instead.
393 if (BC
.MIB
->isUpper8BitReg(RBX
)) {
394 RBX
= BC
.MIB
->getAliasSized(RBX
, 1);
395 if (RegScore
[RBX
] < 0 || RegScore
[RBX
] > RegScore
[Candidate
])
399 LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC
.MRI
->getName(RBX
) << " with "
400 << BC
.MRI
->getName(Candidate
) << "\n\n");
402 swap(Function
, RBX
, Candidate
);
403 FuncsChanged
.insert(&Function
);
404 for (BinaryFunction
*ChildFrag
: Function
.getFragments()) {
405 swap(*ChildFrag
, RBX
, Candidate
);
406 FuncsChanged
.insert(ChildFrag
);
411 void RegReAssign::setupAggressivePass(BinaryContext
&BC
,
412 std::map
<uint64_t, BinaryFunction
> &BFs
) {
413 setupConservativePass(BC
, BFs
);
414 CG
.reset(new BinaryFunctionCallGraph(buildCallGraph(BC
)));
415 RA
.reset(new RegAnalysis(BC
, &BFs
, &*CG
));
417 GPRegs
= BitVector(BC
.MRI
->getNumRegs(), false);
418 BC
.MIB
->getGPRegs(GPRegs
);
421 void RegReAssign::setupConservativePass(
422 BinaryContext
&BC
, std::map
<uint64_t, BinaryFunction
> &BFs
) {
423 // Set up constant bitvectors used throughout this analysis
424 ClassicRegs
= BitVector(BC
.MRI
->getNumRegs(), false);
425 CalleeSaved
= BitVector(BC
.MRI
->getNumRegs(), false);
426 ClassicCSR
= BitVector(BC
.MRI
->getNumRegs(), false);
427 ExtendedCSR
= BitVector(BC
.MRI
->getNumRegs(), false);
428 // Never consider the frame pointer
429 BC
.MIB
->getClassicGPRegs(ClassicRegs
);
431 ClassicRegs
|= BC
.MIB
->getAliases(BC
.MIB
->getFramePointer(), false);
433 BC
.MIB
->getCalleeSavedRegs(CalleeSaved
);
434 ClassicCSR
|= ClassicRegs
;
435 ClassicCSR
&= CalleeSaved
;
436 BC
.MIB
->getClassicGPRegs(ClassicRegs
);
437 ExtendedCSR
|= ClassicRegs
;
439 ExtendedCSR
&= CalleeSaved
;
442 RegStatePrinter
P(BC
);
443 dbgs() << "Starting register reassignment\nClassicRegs: ";
444 P
.print(dbgs(), ClassicRegs
);
445 dbgs() << "\nCalleeSaved: ";
446 P
.print(dbgs(), CalleeSaved
);
447 dbgs() << "\nClassicCSR: ";
448 P
.print(dbgs(), ClassicCSR
);
449 dbgs() << "\nExtendedCSR: ";
450 P
.print(dbgs(), ExtendedCSR
);
455 void RegReAssign::runOnFunctions(BinaryContext
&BC
) {
456 RegScore
= std::vector
<int64_t>(BC
.MRI
->getNumRegs(), 0);
457 RankedRegs
= std::vector
<size_t>(BC
.MRI
->getNumRegs(), 0);
459 if (opts::AggressiveReAssign
)
460 setupAggressivePass(BC
, BC
.getBinaryFunctions());
462 setupConservativePass(BC
, BC
.getBinaryFunctions());
464 for (auto &I
: BC
.getBinaryFunctions()) {
465 BinaryFunction
&Function
= I
.second
;
467 if (!Function
.isSimple() || Function
.isIgnored() || Function
.isFragment())
470 LLVM_DEBUG(dbgs() << "====================================\n");
471 LLVM_DEBUG(dbgs() << " - " << Function
.getPrintName() << "\n");
472 if (!conservativePassOverFunction(Function
) && opts::AggressiveReAssign
) {
473 aggressivePassOverFunction(Function
);
475 if (FuncsChanged
.count(&Function
))
476 dbgs() << "Aggressive pass successful on " << Function
.getPrintName()
482 if (FuncsChanged
.empty()) {
483 outs() << "BOLT-INFO: Reg Reassignment Pass: no changes were made.\n";
486 if (opts::UpdateDebugSections
)
487 outs() << "BOLT-WARNING: You used -reg-reassign and -update-debug-sections."
488 << " Some registers were changed but associated AT_LOCATION for "
489 << "impacted variables were NOT updated! This operation is "
490 << "currently unsupported by BOLT.\n";
491 outs() << "BOLT-INFO: Reg Reassignment Pass Stats:\n";
492 outs() << "\t " << FuncsChanged
.size() << " functions affected.\n";
493 outs() << "\t " << StaticBytesSaved
<< " static bytes saved.\n";
494 outs() << "\t " << DynBytesSaved
<< " dynamic bytes saved.\n";