1 //===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
10 // register allocator for LLVM. This allocator works by constructing a PBQP
11 // problem representing the register allocation problem under consideration,
12 // solving this using a PBQP solver, and mapping the solution back to a
13 // register assignment. If any variables are selected for spilling then spill
14 // code is inserted and the process repeated.
16 // The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
17 // for register allocation. For more information on PBQP for register
18 // allocation, see the following papers:
20 // (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
21 // PBQP. In Proceedings of the 7th Joint Modular Languages Conference
22 // (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
24 // (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
25 // architectures. In Proceedings of the Joint Conference on Languages,
26 // Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
29 //===----------------------------------------------------------------------===//
31 #include "llvm/CodeGen/RegAllocPBQP.h"
32 #include "RegisterCoalescer.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/BitVector.h"
36 #include "llvm/ADT/DenseMap.h"
37 #include "llvm/ADT/DenseSet.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/StringRef.h"
42 #include "llvm/Analysis/AliasAnalysis.h"
43 #include "llvm/CodeGen/CalcSpillWeights.h"
44 #include "llvm/CodeGen/LiveInterval.h"
45 #include "llvm/CodeGen/LiveIntervals.h"
46 #include "llvm/CodeGen/LiveRangeEdit.h"
47 #include "llvm/CodeGen/LiveStacks.h"
48 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
49 #include "llvm/CodeGen/MachineDominators.h"
50 #include "llvm/CodeGen/MachineFunction.h"
51 #include "llvm/CodeGen/MachineFunctionPass.h"
52 #include "llvm/CodeGen/MachineInstr.h"
53 #include "llvm/CodeGen/MachineLoopInfo.h"
54 #include "llvm/CodeGen/MachineRegisterInfo.h"
55 #include "llvm/CodeGen/PBQP/Graph.h"
56 #include "llvm/CodeGen/PBQP/Math.h"
57 #include "llvm/CodeGen/PBQP/Solution.h"
58 #include "llvm/CodeGen/PBQPRAConstraint.h"
59 #include "llvm/CodeGen/RegAllocRegistry.h"
60 #include "llvm/CodeGen/SlotIndexes.h"
61 #include "llvm/CodeGen/TargetRegisterInfo.h"
62 #include "llvm/CodeGen/TargetSubtargetInfo.h"
63 #include "llvm/CodeGen/VirtRegMap.h"
64 #include "llvm/Config/llvm-config.h"
65 #include "llvm/IR/Function.h"
66 #include "llvm/IR/Module.h"
67 #include "llvm/MC/MCRegisterInfo.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/CommandLine.h"
70 #include "llvm/Support/Compiler.h"
71 #include "llvm/Support/Debug.h"
72 #include "llvm/Support/FileSystem.h"
73 #include "llvm/Support/Printable.h"
74 #include "llvm/Support/raw_ostream.h"
85 #include <system_error>
92 #define DEBUG_TYPE "regalloc"
94 static RegisterRegAlloc
95 RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
96 createDefaultPBQPRegisterAllocator
);
99 PBQPCoalescing("pbqp-coalescing",
100 cl::desc("Attempt coalescing during PBQP register allocation."),
101 cl::init(false), cl::Hidden
);
105 PBQPDumpGraphs("pbqp-dump-graphs",
106 cl::desc("Dump graphs for each function/round in the compilation unit."),
107 cl::init(false), cl::Hidden
);
113 /// PBQP based allocators solve the register allocation problem by mapping
114 /// register allocation problems to Partitioned Boolean Quadratic
115 /// Programming problems.
116 class RegAllocPBQP
: public MachineFunctionPass
{
120 /// Construct a PBQP register allocator.
121 RegAllocPBQP(char *cPassID
= nullptr)
122 : MachineFunctionPass(ID
), customPassID(cPassID
) {
123 initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
124 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
125 initializeLiveStacksPass(*PassRegistry::getPassRegistry());
126 initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
129 /// Return the pass name.
130 StringRef
getPassName() const override
{ return "PBQP Register Allocator"; }
132 /// PBQP analysis usage.
133 void getAnalysisUsage(AnalysisUsage
&au
) const override
;
135 /// Perform register allocation
136 bool runOnMachineFunction(MachineFunction
&MF
) override
;
138 MachineFunctionProperties
getRequiredProperties() const override
{
139 return MachineFunctionProperties().set(
140 MachineFunctionProperties::Property::NoPHIs
);
144 using LI2NodeMap
= std::map
<const LiveInterval
*, unsigned>;
145 using Node2LIMap
= std::vector
<const LiveInterval
*>;
146 using AllowedSet
= std::vector
<unsigned>;
147 using AllowedSetMap
= std::vector
<AllowedSet
>;
148 using RegPair
= std::pair
<unsigned, unsigned>;
149 using CoalesceMap
= std::map
<RegPair
, PBQP::PBQPNum
>;
150 using RegSet
= std::set
<unsigned>;
154 RegSet VRegsToAlloc
, EmptyIntervalVRegs
;
156 /// Inst which is a def of an original reg and whose defs are already all
157 /// dead after remat is saved in DeadRemats. The deletion of such inst is
158 /// postponed till all the allocations are done, so its remat expr is
159 /// always available for the remat of all the siblings of the original reg.
160 SmallPtrSet
<MachineInstr
*, 32> DeadRemats
;
162 /// Finds the initial set of vreg intervals to allocate.
163 void findVRegIntervalsToAlloc(const MachineFunction
&MF
, LiveIntervals
&LIS
);
165 /// Constructs an initial graph.
166 void initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
, Spiller
&VRegSpiller
);
168 /// Spill the given VReg.
169 void spillVReg(unsigned VReg
, SmallVectorImpl
<unsigned> &NewIntervals
,
170 MachineFunction
&MF
, LiveIntervals
&LIS
, VirtRegMap
&VRM
,
171 Spiller
&VRegSpiller
);
173 /// Given a solved PBQP problem maps this solution back to a register
175 bool mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
176 const PBQP::Solution
&Solution
,
178 Spiller
&VRegSpiller
);
180 /// Postprocessing before final spilling. Sets basic block "live in"
182 void finalizeAlloc(MachineFunction
&MF
, LiveIntervals
&LIS
,
183 VirtRegMap
&VRM
) const;
185 void postOptimization(Spiller
&VRegSpiller
, LiveIntervals
&LIS
);
188 char RegAllocPBQP::ID
= 0;
190 /// Set spill costs for each node in the PBQP reg-alloc graph.
191 class SpillCosts
: public PBQPRAConstraint
{
193 void apply(PBQPRAGraph
&G
) override
{
194 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
196 // A minimum spill costs, so that register constraints can can be set
197 // without normalization in the [0.0:MinSpillCost( interval.
198 const PBQP::PBQPNum MinSpillCost
= 10.0;
200 for (auto NId
: G
.nodeIds()) {
201 PBQP::PBQPNum SpillCost
=
202 LIS
.getInterval(G
.getNodeMetadata(NId
).getVReg()).weight
;
203 if (SpillCost
== 0.0)
204 SpillCost
= std::numeric_limits
<PBQP::PBQPNum
>::min();
206 SpillCost
+= MinSpillCost
;
207 PBQPRAGraph::RawVector
NodeCosts(G
.getNodeCosts(NId
));
208 NodeCosts
[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost
;
209 G
.setNodeCosts(NId
, std::move(NodeCosts
));
214 /// Add interference edges between overlapping vregs.
215 class Interference
: public PBQPRAConstraint
{
217 using AllowedRegVecPtr
= const PBQP::RegAlloc::AllowedRegVector
*;
218 using IKey
= std::pair
<AllowedRegVecPtr
, AllowedRegVecPtr
>;
219 using IMatrixCache
= DenseMap
<IKey
, PBQPRAGraph::MatrixPtr
>;
220 using DisjointAllowedRegsCache
= DenseSet
<IKey
>;
221 using IEdgeKey
= std::pair
<PBQP::GraphBase::NodeId
, PBQP::GraphBase::NodeId
>;
222 using IEdgeCache
= DenseSet
<IEdgeKey
>;
224 bool haveDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
225 PBQPRAGraph::NodeId MId
,
226 const DisjointAllowedRegsCache
&D
) const {
227 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
228 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
234 return D
.count(IKey(NRegs
, MRegs
)) > 0;
236 return D
.count(IKey(MRegs
, NRegs
)) > 0;
239 void setDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
240 PBQPRAGraph::NodeId MId
,
241 DisjointAllowedRegsCache
&D
) {
242 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
243 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
245 assert(NRegs
!= MRegs
&& "AllowedRegs can not be disjoint with itself");
248 D
.insert(IKey(NRegs
, MRegs
));
250 D
.insert(IKey(MRegs
, NRegs
));
253 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
254 // for the fast interference graph construction algorithm. The last is there
255 // to save us from looking up node ids via the VRegToNode map in the graph
258 std::tuple
<LiveInterval
*, size_t, PBQP::GraphBase::NodeId
>;
260 static SlotIndex
getStartPoint(const IntervalInfo
&I
) {
261 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].start
;
264 static SlotIndex
getEndPoint(const IntervalInfo
&I
) {
265 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].end
;
268 static PBQP::GraphBase::NodeId
getNodeId(const IntervalInfo
&I
) {
269 return std::get
<2>(I
);
272 static bool lowestStartPoint(const IntervalInfo
&I1
,
273 const IntervalInfo
&I2
) {
274 // Condition reversed because priority queue has the *highest* element at
275 // the front, rather than the lowest.
276 return getStartPoint(I1
) > getStartPoint(I2
);
279 static bool lowestEndPoint(const IntervalInfo
&I1
,
280 const IntervalInfo
&I2
) {
281 SlotIndex E1
= getEndPoint(I1
);
282 SlotIndex E2
= getEndPoint(I2
);
290 // If two intervals end at the same point, we need a way to break the tie or
291 // the set will assume they're actually equal and refuse to insert a
292 // "duplicate". Just compare the vregs - fast and guaranteed unique.
293 return std::get
<0>(I1
)->reg
< std::get
<0>(I2
)->reg
;
296 static bool isAtLastSegment(const IntervalInfo
&I
) {
297 return std::get
<1>(I
) == std::get
<0>(I
)->size() - 1;
300 static IntervalInfo
nextSegment(const IntervalInfo
&I
) {
301 return std::make_tuple(std::get
<0>(I
), std::get
<1>(I
) + 1, std::get
<2>(I
));
305 void apply(PBQPRAGraph
&G
) override
{
306 // The following is loosely based on the linear scan algorithm introduced in
307 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
308 // isn't linear, because the size of the active set isn't bound by the
309 // number of registers, but rather the size of the largest clique in the
310 // graph. Still, we expect this to be better than N^2.
311 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
313 // Interferenc matrices are incredibly regular - they're only a function of
314 // the allowed sets, so we cache them to avoid the overhead of constructing
315 // and uniquing them.
318 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
319 // cache locally edges we have already seen.
322 // Cache known disjoint allowed registers pairs
323 DisjointAllowedRegsCache D
;
325 using IntervalSet
= std::set
<IntervalInfo
, decltype(&lowestEndPoint
)>;
326 using IntervalQueue
=
327 std::priority_queue
<IntervalInfo
, std::vector
<IntervalInfo
>,
328 decltype(&lowestStartPoint
)>;
329 IntervalSet
Active(lowestEndPoint
);
330 IntervalQueue
Inactive(lowestStartPoint
);
332 // Start by building the inactive set.
333 for (auto NId
: G
.nodeIds()) {
334 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
335 LiveInterval
&LI
= LIS
.getInterval(VReg
);
336 assert(!LI
.empty() && "PBQP graph contains node for empty interval");
337 Inactive
.push(std::make_tuple(&LI
, 0, NId
));
340 while (!Inactive
.empty()) {
341 // Tentatively grab the "next" interval - this choice may be overriden
343 IntervalInfo Cur
= Inactive
.top();
345 // Retire any active intervals that end before Cur starts.
346 IntervalSet::iterator RetireItr
= Active
.begin();
347 while (RetireItr
!= Active
.end() &&
348 (getEndPoint(*RetireItr
) <= getStartPoint(Cur
))) {
349 // If this interval has subsequent segments, add the next one to the
351 if (!isAtLastSegment(*RetireItr
))
352 Inactive
.push(nextSegment(*RetireItr
));
356 Active
.erase(Active
.begin(), RetireItr
);
358 // One of the newly retired segments may actually start before the
359 // Cur segment, so re-grab the front of the inactive list.
360 Cur
= Inactive
.top();
363 // At this point we know that Cur overlaps all active intervals. Add the
364 // interference edges.
365 PBQP::GraphBase::NodeId NId
= getNodeId(Cur
);
366 for (const auto &A
: Active
) {
367 PBQP::GraphBase::NodeId MId
= getNodeId(A
);
369 // Do not add an edge when the nodes' allowed registers do not
370 // intersect: there is obviously no interference.
371 if (haveDisjointAllowedRegs(G
, NId
, MId
, D
))
374 // Check that we haven't already added this edge
375 IEdgeKey
EK(std::min(NId
, MId
), std::max(NId
, MId
));
379 // This is a new edge - add it to the graph.
380 if (!createInterferenceEdge(G
, NId
, MId
, C
))
381 setDisjointAllowedRegs(G
, NId
, MId
, D
);
386 // Finally, add Cur to the Active set.
392 // Create an Interference edge and add it to the graph, unless it is
393 // a null matrix, meaning the nodes' allowed registers do not have any
394 // interference. This case occurs frequently between integer and floating
395 // point registers for example.
396 // return true iff both nodes interferes.
397 bool createInterferenceEdge(PBQPRAGraph
&G
,
398 PBQPRAGraph::NodeId NId
, PBQPRAGraph::NodeId MId
,
400 const TargetRegisterInfo
&TRI
=
401 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
402 const auto &NRegs
= G
.getNodeMetadata(NId
).getAllowedRegs();
403 const auto &MRegs
= G
.getNodeMetadata(MId
).getAllowedRegs();
405 // Try looking the edge costs up in the IMatrixCache first.
406 IKey
K(&NRegs
, &MRegs
);
407 IMatrixCache::iterator I
= C
.find(K
);
409 G
.addEdgeBypassingCostAllocator(NId
, MId
, I
->second
);
413 PBQPRAGraph::RawMatrix
M(NRegs
.size() + 1, MRegs
.size() + 1, 0);
414 bool NodesInterfere
= false;
415 for (unsigned I
= 0; I
!= NRegs
.size(); ++I
) {
416 unsigned PRegN
= NRegs
[I
];
417 for (unsigned J
= 0; J
!= MRegs
.size(); ++J
) {
418 unsigned PRegM
= MRegs
[J
];
419 if (TRI
.regsOverlap(PRegN
, PRegM
)) {
420 M
[I
+ 1][J
+ 1] = std::numeric_limits
<PBQP::PBQPNum
>::infinity();
421 NodesInterfere
= true;
429 PBQPRAGraph::EdgeId EId
= G
.addEdge(NId
, MId
, std::move(M
));
430 C
[K
] = G
.getEdgeCostsPtr(EId
);
436 class Coalescing
: public PBQPRAConstraint
{
438 void apply(PBQPRAGraph
&G
) override
{
439 MachineFunction
&MF
= G
.getMetadata().MF
;
440 MachineBlockFrequencyInfo
&MBFI
= G
.getMetadata().MBFI
;
441 CoalescerPair
CP(*MF
.getSubtarget().getRegisterInfo());
443 // Scan the machine function and add a coalescing cost whenever CoalescerPair
445 for (const auto &MBB
: MF
) {
446 for (const auto &MI
: MBB
) {
447 // Skip not-coalescable or already coalesced copies.
448 if (!CP
.setRegisters(&MI
) || CP
.getSrcReg() == CP
.getDstReg())
451 unsigned DstReg
= CP
.getDstReg();
452 unsigned SrcReg
= CP
.getSrcReg();
454 const float Scale
= 1.0f
/ MBFI
.getEntryFreq();
455 PBQP::PBQPNum CBenefit
= MBFI
.getBlockFreq(&MBB
).getFrequency() * Scale
;
458 if (!MF
.getRegInfo().isAllocatable(DstReg
))
461 PBQPRAGraph::NodeId NId
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
463 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed
=
464 G
.getNodeMetadata(NId
).getAllowedRegs();
466 unsigned PRegOpt
= 0;
467 while (PRegOpt
< Allowed
.size() && Allowed
[PRegOpt
] != DstReg
)
470 if (PRegOpt
< Allowed
.size()) {
471 PBQPRAGraph::RawVector
NewCosts(G
.getNodeCosts(NId
));
472 NewCosts
[PRegOpt
+ 1] -= CBenefit
;
473 G
.setNodeCosts(NId
, std::move(NewCosts
));
476 PBQPRAGraph::NodeId N1Id
= G
.getMetadata().getNodeIdForVReg(DstReg
);
477 PBQPRAGraph::NodeId N2Id
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
478 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed1
=
479 &G
.getNodeMetadata(N1Id
).getAllowedRegs();
480 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed2
=
481 &G
.getNodeMetadata(N2Id
).getAllowedRegs();
483 PBQPRAGraph::EdgeId EId
= G
.findEdge(N1Id
, N2Id
);
484 if (EId
== G
.invalidEdgeId()) {
485 PBQPRAGraph::RawMatrix
Costs(Allowed1
->size() + 1,
486 Allowed2
->size() + 1, 0);
487 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
488 G
.addEdge(N1Id
, N2Id
, std::move(Costs
));
490 if (G
.getEdgeNode1Id(EId
) == N2Id
) {
491 std::swap(N1Id
, N2Id
);
492 std::swap(Allowed1
, Allowed2
);
494 PBQPRAGraph::RawMatrix
Costs(G
.getEdgeCosts(EId
));
495 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
496 G
.updateEdgeCosts(EId
, std::move(Costs
));
504 void addVirtRegCoalesce(
505 PBQPRAGraph::RawMatrix
&CostMat
,
506 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed1
,
507 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed2
,
508 PBQP::PBQPNum Benefit
) {
509 assert(CostMat
.getRows() == Allowed1
.size() + 1 && "Size mismatch.");
510 assert(CostMat
.getCols() == Allowed2
.size() + 1 && "Size mismatch.");
511 for (unsigned I
= 0; I
!= Allowed1
.size(); ++I
) {
512 unsigned PReg1
= Allowed1
[I
];
513 for (unsigned J
= 0; J
!= Allowed2
.size(); ++J
) {
514 unsigned PReg2
= Allowed2
[J
];
516 CostMat
[I
+ 1][J
+ 1] -= Benefit
;
522 } // end anonymous namespace
524 // Out-of-line destructor/anchor for PBQPRAConstraint.
525 PBQPRAConstraint::~PBQPRAConstraint() = default;
527 void PBQPRAConstraint::anchor() {}
529 void PBQPRAConstraintList::anchor() {}
531 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage
&au
) const {
532 au
.setPreservesCFG();
533 au
.addRequired
<AAResultsWrapperPass
>();
534 au
.addPreserved
<AAResultsWrapperPass
>();
535 au
.addRequired
<SlotIndexes
>();
536 au
.addPreserved
<SlotIndexes
>();
537 au
.addRequired
<LiveIntervals
>();
538 au
.addPreserved
<LiveIntervals
>();
539 //au.addRequiredID(SplitCriticalEdgesID);
541 au
.addRequiredID(*customPassID
);
542 au
.addRequired
<LiveStacks
>();
543 au
.addPreserved
<LiveStacks
>();
544 au
.addRequired
<MachineBlockFrequencyInfo
>();
545 au
.addPreserved
<MachineBlockFrequencyInfo
>();
546 au
.addRequired
<MachineLoopInfo
>();
547 au
.addPreserved
<MachineLoopInfo
>();
548 au
.addRequired
<MachineDominatorTree
>();
549 au
.addPreserved
<MachineDominatorTree
>();
550 au
.addRequired
<VirtRegMap
>();
551 au
.addPreserved
<VirtRegMap
>();
552 MachineFunctionPass::getAnalysisUsage(au
);
555 void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction
&MF
,
556 LiveIntervals
&LIS
) {
557 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
559 // Iterate over all live ranges.
560 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
561 unsigned Reg
= Register::index2VirtReg(I
);
562 if (MRI
.reg_nodbg_empty(Reg
))
564 VRegsToAlloc
.insert(Reg
);
568 static bool isACalleeSavedRegister(unsigned reg
, const TargetRegisterInfo
&TRI
,
569 const MachineFunction
&MF
) {
570 const MCPhysReg
*CSR
= MF
.getRegInfo().getCalleeSavedRegs();
571 for (unsigned i
= 0; CSR
[i
] != 0; ++i
)
572 if (TRI
.regsOverlap(reg
, CSR
[i
]))
577 void RegAllocPBQP::initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
,
578 Spiller
&VRegSpiller
) {
579 MachineFunction
&MF
= G
.getMetadata().MF
;
581 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
582 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
583 const TargetRegisterInfo
&TRI
=
584 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
586 std::vector
<unsigned> Worklist(VRegsToAlloc
.begin(), VRegsToAlloc
.end());
588 std::map
<unsigned, std::vector
<unsigned>> VRegAllowedMap
;
590 while (!Worklist
.empty()) {
591 unsigned VReg
= Worklist
.back();
594 LiveInterval
&VRegLI
= LIS
.getInterval(VReg
);
596 // If this is an empty interval move it to the EmptyIntervalVRegs set then
598 if (VRegLI
.empty()) {
599 EmptyIntervalVRegs
.insert(VRegLI
.reg
);
600 VRegsToAlloc
.erase(VRegLI
.reg
);
604 const TargetRegisterClass
*TRC
= MRI
.getRegClass(VReg
);
606 // Record any overlaps with regmask operands.
607 BitVector RegMaskOverlaps
;
608 LIS
.checkRegMaskInterference(VRegLI
, RegMaskOverlaps
);
610 // Compute an initial allowed set for the current vreg.
611 std::vector
<unsigned> VRegAllowed
;
612 ArrayRef
<MCPhysReg
> RawPRegOrder
= TRC
->getRawAllocationOrder(MF
);
613 for (unsigned I
= 0; I
!= RawPRegOrder
.size(); ++I
) {
614 unsigned PReg
= RawPRegOrder
[I
];
615 if (MRI
.isReserved(PReg
))
618 // vregLI crosses a regmask operand that clobbers preg.
619 if (!RegMaskOverlaps
.empty() && !RegMaskOverlaps
.test(PReg
))
622 // vregLI overlaps fixed regunit interference.
623 bool Interference
= false;
624 for (MCRegUnitIterator
Units(PReg
, &TRI
); Units
.isValid(); ++Units
) {
625 if (VRegLI
.overlaps(LIS
.getRegUnit(*Units
))) {
633 // preg is usable for this virtual register.
634 VRegAllowed
.push_back(PReg
);
637 // Check for vregs that have no allowed registers. These should be
638 // pre-spilled and the new vregs added to the worklist.
639 if (VRegAllowed
.empty()) {
640 SmallVector
<unsigned, 8> NewVRegs
;
641 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
642 Worklist
.insert(Worklist
.end(), NewVRegs
.begin(), NewVRegs
.end());
645 VRegAllowedMap
[VReg
] = std::move(VRegAllowed
);
648 for (auto &KV
: VRegAllowedMap
) {
649 auto VReg
= KV
.first
;
651 // Move empty intervals to the EmptyIntervalVReg set.
652 if (LIS
.getInterval(VReg
).empty()) {
653 EmptyIntervalVRegs
.insert(VReg
);
654 VRegsToAlloc
.erase(VReg
);
658 auto &VRegAllowed
= KV
.second
;
660 PBQPRAGraph::RawVector
NodeCosts(VRegAllowed
.size() + 1, 0);
662 // Tweak cost of callee saved registers, as using then force spilling and
663 // restoring them. This would only happen in the prologue / epilogue though.
664 for (unsigned i
= 0; i
!= VRegAllowed
.size(); ++i
)
665 if (isACalleeSavedRegister(VRegAllowed
[i
], TRI
, MF
))
666 NodeCosts
[1 + i
] += 1.0;
668 PBQPRAGraph::NodeId NId
= G
.addNode(std::move(NodeCosts
));
669 G
.getNodeMetadata(NId
).setVReg(VReg
);
670 G
.getNodeMetadata(NId
).setAllowedRegs(
671 G
.getMetadata().getAllowedRegs(std::move(VRegAllowed
)));
672 G
.getMetadata().setNodeIdForVReg(VReg
, NId
);
676 void RegAllocPBQP::spillVReg(unsigned VReg
,
677 SmallVectorImpl
<unsigned> &NewIntervals
,
678 MachineFunction
&MF
, LiveIntervals
&LIS
,
679 VirtRegMap
&VRM
, Spiller
&VRegSpiller
) {
680 VRegsToAlloc
.erase(VReg
);
681 LiveRangeEdit
LRE(&LIS
.getInterval(VReg
), NewIntervals
, MF
, LIS
, &VRM
,
682 nullptr, &DeadRemats
);
683 VRegSpiller
.spill(LRE
);
685 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
687 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg
, &TRI
) << " -> SPILLED (Cost: "
688 << LRE
.getParent().weight
<< ", New vregs: ");
690 // Copy any newly inserted live intervals into the list of regs to
692 for (LiveRangeEdit::iterator I
= LRE
.begin(), E
= LRE
.end();
694 const LiveInterval
&LI
= LIS
.getInterval(*I
);
695 assert(!LI
.empty() && "Empty spill range.");
696 LLVM_DEBUG(dbgs() << printReg(LI
.reg
, &TRI
) << " ");
697 VRegsToAlloc
.insert(LI
.reg
);
700 LLVM_DEBUG(dbgs() << ")\n");
703 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
704 const PBQP::Solution
&Solution
,
706 Spiller
&VRegSpiller
) {
707 MachineFunction
&MF
= G
.getMetadata().MF
;
708 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
709 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
712 // Set to true if we have any spills
713 bool AnotherRoundNeeded
= false;
715 // Clear the existing allocation.
718 // Iterate over the nodes mapping the PBQP solution to a register
720 for (auto NId
: G
.nodeIds()) {
721 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
722 unsigned AllocOption
= Solution
.getSelection(NId
);
724 if (AllocOption
!= PBQP::RegAlloc::getSpillOptionIdx()) {
725 unsigned PReg
= G
.getNodeMetadata(NId
).getAllowedRegs()[AllocOption
- 1];
726 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg
, &TRI
) << " -> "
727 << TRI
.getName(PReg
) << "\n");
728 assert(PReg
!= 0 && "Invalid preg selected.");
729 VRM
.assignVirt2Phys(VReg
, PReg
);
731 // Spill VReg. If this introduces new intervals we'll need another round
733 SmallVector
<unsigned, 8> NewVRegs
;
734 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
735 AnotherRoundNeeded
|= !NewVRegs
.empty();
739 return !AnotherRoundNeeded
;
742 void RegAllocPBQP::finalizeAlloc(MachineFunction
&MF
,
744 VirtRegMap
&VRM
) const {
745 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
747 // First allocate registers for the empty intervals.
748 for (RegSet::const_iterator
749 I
= EmptyIntervalVRegs
.begin(), E
= EmptyIntervalVRegs
.end();
751 LiveInterval
&LI
= LIS
.getInterval(*I
);
753 unsigned PReg
= MRI
.getSimpleHint(LI
.reg
);
756 const TargetRegisterClass
&RC
= *MRI
.getRegClass(LI
.reg
);
757 const ArrayRef
<MCPhysReg
> RawPRegOrder
= RC
.getRawAllocationOrder(MF
);
758 for (unsigned CandidateReg
: RawPRegOrder
) {
759 if (!VRM
.getRegInfo().isReserved(CandidateReg
)) {
765 "No un-reserved physical registers in this register class");
768 VRM
.assignVirt2Phys(LI
.reg
, PReg
);
772 void RegAllocPBQP::postOptimization(Spiller
&VRegSpiller
, LiveIntervals
&LIS
) {
773 VRegSpiller
.postOptimization();
774 /// Remove dead defs because of rematerialization.
775 for (auto DeadInst
: DeadRemats
) {
776 LIS
.RemoveMachineInstrFromMaps(*DeadInst
);
777 DeadInst
->eraseFromParent();
782 static inline float normalizePBQPSpillWeight(float UseDefFreq
, unsigned Size
,
784 // All intervals have a spill weight that is mostly proportional to the number
785 // of uses, with uses in loops having a bigger weight.
786 return NumInstr
* normalizeSpillWeight(UseDefFreq
, Size
, 1);
789 bool RegAllocPBQP::runOnMachineFunction(MachineFunction
&MF
) {
790 LiveIntervals
&LIS
= getAnalysis
<LiveIntervals
>();
791 MachineBlockFrequencyInfo
&MBFI
=
792 getAnalysis
<MachineBlockFrequencyInfo
>();
794 VirtRegMap
&VRM
= getAnalysis
<VirtRegMap
>();
796 calculateSpillWeightsAndHints(LIS
, MF
, &VRM
, getAnalysis
<MachineLoopInfo
>(),
797 MBFI
, normalizePBQPSpillWeight
);
799 std::unique_ptr
<Spiller
> VRegSpiller(createInlineSpiller(*this, MF
, VRM
));
801 MF
.getRegInfo().freezeReservedRegs(MF
);
803 LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF
.getName() << "\n");
805 // Allocator main loop:
807 // * Map current regalloc problem to a PBQP problem
808 // * Solve the PBQP problem
809 // * Map the solution back to a register allocation
810 // * Spill if necessary
812 // This process is continued till no more spills are generated.
814 // Find the vreg intervals in need of allocation.
815 findVRegIntervalsToAlloc(MF
, LIS
);
818 const Function
&F
= MF
.getFunction();
819 std::string FullyQualifiedName
=
820 F
.getParent()->getModuleIdentifier() + "." + F
.getName().str();
823 // If there are non-empty intervals allocate them using pbqp.
824 if (!VRegsToAlloc
.empty()) {
825 const TargetSubtargetInfo
&Subtarget
= MF
.getSubtarget();
826 std::unique_ptr
<PBQPRAConstraintList
> ConstraintsRoot
=
827 std::make_unique
<PBQPRAConstraintList
>();
828 ConstraintsRoot
->addConstraint(std::make_unique
<SpillCosts
>());
829 ConstraintsRoot
->addConstraint(std::make_unique
<Interference
>());
831 ConstraintsRoot
->addConstraint(std::make_unique
<Coalescing
>());
832 ConstraintsRoot
->addConstraint(Subtarget
.getCustomPBQPConstraints());
834 bool PBQPAllocComplete
= false;
837 while (!PBQPAllocComplete
) {
838 LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round
<< ":\n");
840 PBQPRAGraph
G(PBQPRAGraph::GraphMetadata(MF
, LIS
, MBFI
));
841 initializeGraph(G
, VRM
, *VRegSpiller
);
842 ConstraintsRoot
->apply(G
);
845 if (PBQPDumpGraphs
) {
846 std::ostringstream RS
;
848 std::string GraphFileName
= FullyQualifiedName
+ "." + RS
.str() +
851 raw_fd_ostream
OS(GraphFileName
, EC
, sys::fs::OF_Text
);
852 LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round
<< " to \""
853 << GraphFileName
<< "\"\n");
858 PBQP::Solution Solution
= PBQP::RegAlloc::solve(G
);
859 PBQPAllocComplete
= mapPBQPToRegAlloc(G
, Solution
, VRM
, *VRegSpiller
);
864 // Finalise allocation, allocate empty ranges.
865 finalizeAlloc(MF
, LIS
, VRM
);
866 postOptimization(*VRegSpiller
, LIS
);
867 VRegsToAlloc
.clear();
868 EmptyIntervalVRegs
.clear();
870 LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM
<< "\n");
875 /// Create Printable object for node and register info.
876 static Printable
PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId
,
877 const PBQP::RegAlloc::PBQPRAGraph
&G
) {
878 return Printable([NId
, &G
](raw_ostream
&OS
) {
879 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
880 const TargetRegisterInfo
*TRI
= MRI
.getTargetRegisterInfo();
881 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
882 const char *RegClassName
= TRI
->getRegClassName(MRI
.getRegClass(VReg
));
883 OS
<< NId
<< " (" << RegClassName
<< ':' << printReg(VReg
, TRI
) << ')';
887 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
888 LLVM_DUMP_METHOD
void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream
&OS
) const {
889 for (auto NId
: nodeIds()) {
890 const Vector
&Costs
= getNodeCosts(NId
);
891 assert(Costs
.getLength() != 0 && "Empty vector in graph.");
892 OS
<< PrintNodeInfo(NId
, *this) << ": " << Costs
<< '\n';
896 for (auto EId
: edgeIds()) {
897 NodeId N1Id
= getEdgeNode1Id(EId
);
898 NodeId N2Id
= getEdgeNode2Id(EId
);
899 assert(N1Id
!= N2Id
&& "PBQP graphs should not have self-edges.");
900 const Matrix
&M
= getEdgeCosts(EId
);
901 assert(M
.getRows() != 0 && "No rows in matrix.");
902 assert(M
.getCols() != 0 && "No cols in matrix.");
903 OS
<< PrintNodeInfo(N1Id
, *this) << ' ' << M
.getRows() << " rows / ";
904 OS
<< PrintNodeInfo(N2Id
, *this) << ' ' << M
.getCols() << " cols:\n";
909 LLVM_DUMP_METHOD
void PBQP::RegAlloc::PBQPRAGraph::dump() const {
914 void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream
&OS
) const {
916 for (auto NId
: nodeIds()) {
917 OS
<< " node" << NId
<< " [ label=\""
918 << PrintNodeInfo(NId
, *this) << "\\n"
919 << getNodeCosts(NId
) << "\" ]\n";
922 OS
<< " edge [ len=" << nodeIds().size() << " ]\n";
923 for (auto EId
: edgeIds()) {
924 OS
<< " node" << getEdgeNode1Id(EId
)
925 << " -- node" << getEdgeNode2Id(EId
)
927 const Matrix
&EdgeCosts
= getEdgeCosts(EId
);
928 for (unsigned i
= 0; i
< EdgeCosts
.getRows(); ++i
) {
929 OS
<< EdgeCosts
.getRowAsVector(i
) << "\\n";
936 FunctionPass
*llvm::createPBQPRegisterAllocator(char *customPassID
) {
937 return new RegAllocPBQP(customPassID
);
940 FunctionPass
* llvm::createDefaultPBQPRegisterAllocator() {
941 return createPBQPRegisterAllocator();