1 //===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
11 // register allocator for LLVM. This allocator works by constructing a PBQP
12 // problem representing the register allocation problem under consideration,
13 // solving this using a PBQP solver, and mapping the solution back to a
14 // register assignment. If any variables are selected for spilling then spill
15 // code is inserted and the process repeated.
17 // The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
18 // for register allocation. For more information on PBQP for register
19 // allocation, see the following papers:
21 // (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
22 // PBQP. In Proceedings of the 7th Joint Modular Languages Conference
23 // (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
25 // (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
26 // architectures. In Proceedings of the Joint Conference on Languages,
27 // Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
30 //===----------------------------------------------------------------------===//
32 #include "llvm/CodeGen/RegAllocPBQP.h"
33 #include "RegisterCoalescer.h"
35 #include "llvm/ADT/ArrayRef.h"
36 #include "llvm/ADT/BitVector.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseSet.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/CodeGen/CalcSpillWeights.h"
45 #include "llvm/CodeGen/LiveInterval.h"
46 #include "llvm/CodeGen/LiveIntervals.h"
47 #include "llvm/CodeGen/LiveRangeEdit.h"
48 #include "llvm/CodeGen/LiveStacks.h"
49 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
50 #include "llvm/CodeGen/MachineDominators.h"
51 #include "llvm/CodeGen/MachineFunction.h"
52 #include "llvm/CodeGen/MachineFunctionPass.h"
53 #include "llvm/CodeGen/MachineInstr.h"
54 #include "llvm/CodeGen/MachineLoopInfo.h"
55 #include "llvm/CodeGen/MachineRegisterInfo.h"
56 #include "llvm/CodeGen/PBQP/Graph.h"
57 #include "llvm/CodeGen/PBQP/Math.h"
58 #include "llvm/CodeGen/PBQP/Solution.h"
59 #include "llvm/CodeGen/PBQPRAConstraint.h"
60 #include "llvm/CodeGen/RegAllocRegistry.h"
61 #include "llvm/CodeGen/SlotIndexes.h"
62 #include "llvm/CodeGen/TargetRegisterInfo.h"
63 #include "llvm/CodeGen/TargetSubtargetInfo.h"
64 #include "llvm/CodeGen/VirtRegMap.h"
65 #include "llvm/Config/llvm-config.h"
66 #include "llvm/IR/Function.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/MC/MCRegisterInfo.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/FileSystem.h"
74 #include "llvm/Support/Printable.h"
75 #include "llvm/Support/raw_ostream.h"
86 #include <system_error>
93 #define DEBUG_TYPE "regalloc"
95 static RegisterRegAlloc
96 RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
97 createDefaultPBQPRegisterAllocator
);
100 PBQPCoalescing("pbqp-coalescing",
101 cl::desc("Attempt coalescing during PBQP register allocation."),
102 cl::init(false), cl::Hidden
);
106 PBQPDumpGraphs("pbqp-dump-graphs",
107 cl::desc("Dump graphs for each function/round in the compilation unit."),
108 cl::init(false), cl::Hidden
);
114 /// PBQP based allocators solve the register allocation problem by mapping
115 /// register allocation problems to Partitioned Boolean Quadratic
116 /// Programming problems.
117 class RegAllocPBQP
: public MachineFunctionPass
{
121 /// Construct a PBQP register allocator.
122 RegAllocPBQP(char *cPassID
= nullptr)
123 : MachineFunctionPass(ID
), customPassID(cPassID
) {
124 initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
125 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
126 initializeLiveStacksPass(*PassRegistry::getPassRegistry());
127 initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
130 /// Return the pass name.
131 StringRef
getPassName() const override
{ return "PBQP Register Allocator"; }
133 /// PBQP analysis usage.
134 void getAnalysisUsage(AnalysisUsage
&au
) const override
;
136 /// Perform register allocation
137 bool runOnMachineFunction(MachineFunction
&MF
) override
;
139 MachineFunctionProperties
getRequiredProperties() const override
{
140 return MachineFunctionProperties().set(
141 MachineFunctionProperties::Property::NoPHIs
);
145 using LI2NodeMap
= std::map
<const LiveInterval
*, unsigned>;
146 using Node2LIMap
= std::vector
<const LiveInterval
*>;
147 using AllowedSet
= std::vector
<unsigned>;
148 using AllowedSetMap
= std::vector
<AllowedSet
>;
149 using RegPair
= std::pair
<unsigned, unsigned>;
150 using CoalesceMap
= std::map
<RegPair
, PBQP::PBQPNum
>;
151 using RegSet
= std::set
<unsigned>;
155 RegSet VRegsToAlloc
, EmptyIntervalVRegs
;
157 /// Inst which is a def of an original reg and whose defs are already all
158 /// dead after remat is saved in DeadRemats. The deletion of such inst is
159 /// postponed till all the allocations are done, so its remat expr is
160 /// always available for the remat of all the siblings of the original reg.
161 SmallPtrSet
<MachineInstr
*, 32> DeadRemats
;
163 /// Finds the initial set of vreg intervals to allocate.
164 void findVRegIntervalsToAlloc(const MachineFunction
&MF
, LiveIntervals
&LIS
);
166 /// Constructs an initial graph.
167 void initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
, Spiller
&VRegSpiller
);
169 /// Spill the given VReg.
170 void spillVReg(unsigned VReg
, SmallVectorImpl
<unsigned> &NewIntervals
,
171 MachineFunction
&MF
, LiveIntervals
&LIS
, VirtRegMap
&VRM
,
172 Spiller
&VRegSpiller
);
174 /// Given a solved PBQP problem maps this solution back to a register
176 bool mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
177 const PBQP::Solution
&Solution
,
179 Spiller
&VRegSpiller
);
181 /// Postprocessing before final spilling. Sets basic block "live in"
183 void finalizeAlloc(MachineFunction
&MF
, LiveIntervals
&LIS
,
184 VirtRegMap
&VRM
) const;
186 void postOptimization(Spiller
&VRegSpiller
, LiveIntervals
&LIS
);
189 char RegAllocPBQP::ID
= 0;
191 /// Set spill costs for each node in the PBQP reg-alloc graph.
192 class SpillCosts
: public PBQPRAConstraint
{
194 void apply(PBQPRAGraph
&G
) override
{
195 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
197 // A minimum spill costs, so that register constraints can can be set
198 // without normalization in the [0.0:MinSpillCost( interval.
199 const PBQP::PBQPNum MinSpillCost
= 10.0;
201 for (auto NId
: G
.nodeIds()) {
202 PBQP::PBQPNum SpillCost
=
203 LIS
.getInterval(G
.getNodeMetadata(NId
).getVReg()).weight
;
204 if (SpillCost
== 0.0)
205 SpillCost
= std::numeric_limits
<PBQP::PBQPNum
>::min();
207 SpillCost
+= MinSpillCost
;
208 PBQPRAGraph::RawVector
NodeCosts(G
.getNodeCosts(NId
));
209 NodeCosts
[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost
;
210 G
.setNodeCosts(NId
, std::move(NodeCosts
));
215 /// Add interference edges between overlapping vregs.
216 class Interference
: public PBQPRAConstraint
{
218 using AllowedRegVecPtr
= const PBQP::RegAlloc::AllowedRegVector
*;
219 using IKey
= std::pair
<AllowedRegVecPtr
, AllowedRegVecPtr
>;
220 using IMatrixCache
= DenseMap
<IKey
, PBQPRAGraph::MatrixPtr
>;
221 using DisjointAllowedRegsCache
= DenseSet
<IKey
>;
222 using IEdgeKey
= std::pair
<PBQP::GraphBase::NodeId
, PBQP::GraphBase::NodeId
>;
223 using IEdgeCache
= DenseSet
<IEdgeKey
>;
225 bool haveDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
226 PBQPRAGraph::NodeId MId
,
227 const DisjointAllowedRegsCache
&D
) const {
228 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
229 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
235 return D
.count(IKey(NRegs
, MRegs
)) > 0;
237 return D
.count(IKey(MRegs
, NRegs
)) > 0;
240 void setDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
241 PBQPRAGraph::NodeId MId
,
242 DisjointAllowedRegsCache
&D
) {
243 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
244 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
246 assert(NRegs
!= MRegs
&& "AllowedRegs can not be disjoint with itself");
249 D
.insert(IKey(NRegs
, MRegs
));
251 D
.insert(IKey(MRegs
, NRegs
));
254 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
255 // for the fast interference graph construction algorithm. The last is there
256 // to save us from looking up node ids via the VRegToNode map in the graph
259 std::tuple
<LiveInterval
*, size_t, PBQP::GraphBase::NodeId
>;
261 static SlotIndex
getStartPoint(const IntervalInfo
&I
) {
262 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].start
;
265 static SlotIndex
getEndPoint(const IntervalInfo
&I
) {
266 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].end
;
269 static PBQP::GraphBase::NodeId
getNodeId(const IntervalInfo
&I
) {
270 return std::get
<2>(I
);
273 static bool lowestStartPoint(const IntervalInfo
&I1
,
274 const IntervalInfo
&I2
) {
275 // Condition reversed because priority queue has the *highest* element at
276 // the front, rather than the lowest.
277 return getStartPoint(I1
) > getStartPoint(I2
);
280 static bool lowestEndPoint(const IntervalInfo
&I1
,
281 const IntervalInfo
&I2
) {
282 SlotIndex E1
= getEndPoint(I1
);
283 SlotIndex E2
= getEndPoint(I2
);
291 // If two intervals end at the same point, we need a way to break the tie or
292 // the set will assume they're actually equal and refuse to insert a
293 // "duplicate". Just compare the vregs - fast and guaranteed unique.
294 return std::get
<0>(I1
)->reg
< std::get
<0>(I2
)->reg
;
297 static bool isAtLastSegment(const IntervalInfo
&I
) {
298 return std::get
<1>(I
) == std::get
<0>(I
)->size() - 1;
301 static IntervalInfo
nextSegment(const IntervalInfo
&I
) {
302 return std::make_tuple(std::get
<0>(I
), std::get
<1>(I
) + 1, std::get
<2>(I
));
306 void apply(PBQPRAGraph
&G
) override
{
307 // The following is loosely based on the linear scan algorithm introduced in
308 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
309 // isn't linear, because the size of the active set isn't bound by the
310 // number of registers, but rather the size of the largest clique in the
311 // graph. Still, we expect this to be better than N^2.
312 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
314 // Interferenc matrices are incredibly regular - they're only a function of
315 // the allowed sets, so we cache them to avoid the overhead of constructing
316 // and uniquing them.
319 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
320 // cache locally edges we have already seen.
323 // Cache known disjoint allowed registers pairs
324 DisjointAllowedRegsCache D
;
326 using IntervalSet
= std::set
<IntervalInfo
, decltype(&lowestEndPoint
)>;
327 using IntervalQueue
=
328 std::priority_queue
<IntervalInfo
, std::vector
<IntervalInfo
>,
329 decltype(&lowestStartPoint
)>;
330 IntervalSet
Active(lowestEndPoint
);
331 IntervalQueue
Inactive(lowestStartPoint
);
333 // Start by building the inactive set.
334 for (auto NId
: G
.nodeIds()) {
335 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
336 LiveInterval
&LI
= LIS
.getInterval(VReg
);
337 assert(!LI
.empty() && "PBQP graph contains node for empty interval");
338 Inactive
.push(std::make_tuple(&LI
, 0, NId
));
341 while (!Inactive
.empty()) {
342 // Tentatively grab the "next" interval - this choice may be overriden
344 IntervalInfo Cur
= Inactive
.top();
346 // Retire any active intervals that end before Cur starts.
347 IntervalSet::iterator RetireItr
= Active
.begin();
348 while (RetireItr
!= Active
.end() &&
349 (getEndPoint(*RetireItr
) <= getStartPoint(Cur
))) {
350 // If this interval has subsequent segments, add the next one to the
352 if (!isAtLastSegment(*RetireItr
))
353 Inactive
.push(nextSegment(*RetireItr
));
357 Active
.erase(Active
.begin(), RetireItr
);
359 // One of the newly retired segments may actually start before the
360 // Cur segment, so re-grab the front of the inactive list.
361 Cur
= Inactive
.top();
364 // At this point we know that Cur overlaps all active intervals. Add the
365 // interference edges.
366 PBQP::GraphBase::NodeId NId
= getNodeId(Cur
);
367 for (const auto &A
: Active
) {
368 PBQP::GraphBase::NodeId MId
= getNodeId(A
);
370 // Do not add an edge when the nodes' allowed registers do not
371 // intersect: there is obviously no interference.
372 if (haveDisjointAllowedRegs(G
, NId
, MId
, D
))
375 // Check that we haven't already added this edge
376 IEdgeKey
EK(std::min(NId
, MId
), std::max(NId
, MId
));
380 // This is a new edge - add it to the graph.
381 if (!createInterferenceEdge(G
, NId
, MId
, C
))
382 setDisjointAllowedRegs(G
, NId
, MId
, D
);
387 // Finally, add Cur to the Active set.
393 // Create an Interference edge and add it to the graph, unless it is
394 // a null matrix, meaning the nodes' allowed registers do not have any
395 // interference. This case occurs frequently between integer and floating
396 // point registers for example.
397 // return true iff both nodes interferes.
398 bool createInterferenceEdge(PBQPRAGraph
&G
,
399 PBQPRAGraph::NodeId NId
, PBQPRAGraph::NodeId MId
,
401 const TargetRegisterInfo
&TRI
=
402 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
403 const auto &NRegs
= G
.getNodeMetadata(NId
).getAllowedRegs();
404 const auto &MRegs
= G
.getNodeMetadata(MId
).getAllowedRegs();
406 // Try looking the edge costs up in the IMatrixCache first.
407 IKey
K(&NRegs
, &MRegs
);
408 IMatrixCache::iterator I
= C
.find(K
);
410 G
.addEdgeBypassingCostAllocator(NId
, MId
, I
->second
);
414 PBQPRAGraph::RawMatrix
M(NRegs
.size() + 1, MRegs
.size() + 1, 0);
415 bool NodesInterfere
= false;
416 for (unsigned I
= 0; I
!= NRegs
.size(); ++I
) {
417 unsigned PRegN
= NRegs
[I
];
418 for (unsigned J
= 0; J
!= MRegs
.size(); ++J
) {
419 unsigned PRegM
= MRegs
[J
];
420 if (TRI
.regsOverlap(PRegN
, PRegM
)) {
421 M
[I
+ 1][J
+ 1] = std::numeric_limits
<PBQP::PBQPNum
>::infinity();
422 NodesInterfere
= true;
430 PBQPRAGraph::EdgeId EId
= G
.addEdge(NId
, MId
, std::move(M
));
431 C
[K
] = G
.getEdgeCostsPtr(EId
);
437 class Coalescing
: public PBQPRAConstraint
{
439 void apply(PBQPRAGraph
&G
) override
{
440 MachineFunction
&MF
= G
.getMetadata().MF
;
441 MachineBlockFrequencyInfo
&MBFI
= G
.getMetadata().MBFI
;
442 CoalescerPair
CP(*MF
.getSubtarget().getRegisterInfo());
444 // Scan the machine function and add a coalescing cost whenever CoalescerPair
446 for (const auto &MBB
: MF
) {
447 for (const auto &MI
: MBB
) {
448 // Skip not-coalescable or already coalesced copies.
449 if (!CP
.setRegisters(&MI
) || CP
.getSrcReg() == CP
.getDstReg())
452 unsigned DstReg
= CP
.getDstReg();
453 unsigned SrcReg
= CP
.getSrcReg();
455 const float Scale
= 1.0f
/ MBFI
.getEntryFreq();
456 PBQP::PBQPNum CBenefit
= MBFI
.getBlockFreq(&MBB
).getFrequency() * Scale
;
459 if (!MF
.getRegInfo().isAllocatable(DstReg
))
462 PBQPRAGraph::NodeId NId
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
464 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed
=
465 G
.getNodeMetadata(NId
).getAllowedRegs();
467 unsigned PRegOpt
= 0;
468 while (PRegOpt
< Allowed
.size() && Allowed
[PRegOpt
] != DstReg
)
471 if (PRegOpt
< Allowed
.size()) {
472 PBQPRAGraph::RawVector
NewCosts(G
.getNodeCosts(NId
));
473 NewCosts
[PRegOpt
+ 1] -= CBenefit
;
474 G
.setNodeCosts(NId
, std::move(NewCosts
));
477 PBQPRAGraph::NodeId N1Id
= G
.getMetadata().getNodeIdForVReg(DstReg
);
478 PBQPRAGraph::NodeId N2Id
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
479 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed1
=
480 &G
.getNodeMetadata(N1Id
).getAllowedRegs();
481 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed2
=
482 &G
.getNodeMetadata(N2Id
).getAllowedRegs();
484 PBQPRAGraph::EdgeId EId
= G
.findEdge(N1Id
, N2Id
);
485 if (EId
== G
.invalidEdgeId()) {
486 PBQPRAGraph::RawMatrix
Costs(Allowed1
->size() + 1,
487 Allowed2
->size() + 1, 0);
488 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
489 G
.addEdge(N1Id
, N2Id
, std::move(Costs
));
491 if (G
.getEdgeNode1Id(EId
) == N2Id
) {
492 std::swap(N1Id
, N2Id
);
493 std::swap(Allowed1
, Allowed2
);
495 PBQPRAGraph::RawMatrix
Costs(G
.getEdgeCosts(EId
));
496 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
497 G
.updateEdgeCosts(EId
, std::move(Costs
));
505 void addVirtRegCoalesce(
506 PBQPRAGraph::RawMatrix
&CostMat
,
507 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed1
,
508 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed2
,
509 PBQP::PBQPNum Benefit
) {
510 assert(CostMat
.getRows() == Allowed1
.size() + 1 && "Size mismatch.");
511 assert(CostMat
.getCols() == Allowed2
.size() + 1 && "Size mismatch.");
512 for (unsigned I
= 0; I
!= Allowed1
.size(); ++I
) {
513 unsigned PReg1
= Allowed1
[I
];
514 for (unsigned J
= 0; J
!= Allowed2
.size(); ++J
) {
515 unsigned PReg2
= Allowed2
[J
];
517 CostMat
[I
+ 1][J
+ 1] -= Benefit
;
523 } // end anonymous namespace
525 // Out-of-line destructor/anchor for PBQPRAConstraint.
526 PBQPRAConstraint::~PBQPRAConstraint() = default;
528 void PBQPRAConstraint::anchor() {}
530 void PBQPRAConstraintList::anchor() {}
532 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage
&au
) const {
533 au
.setPreservesCFG();
534 au
.addRequired
<AAResultsWrapperPass
>();
535 au
.addPreserved
<AAResultsWrapperPass
>();
536 au
.addRequired
<SlotIndexes
>();
537 au
.addPreserved
<SlotIndexes
>();
538 au
.addRequired
<LiveIntervals
>();
539 au
.addPreserved
<LiveIntervals
>();
540 //au.addRequiredID(SplitCriticalEdgesID);
542 au
.addRequiredID(*customPassID
);
543 au
.addRequired
<LiveStacks
>();
544 au
.addPreserved
<LiveStacks
>();
545 au
.addRequired
<MachineBlockFrequencyInfo
>();
546 au
.addPreserved
<MachineBlockFrequencyInfo
>();
547 au
.addRequired
<MachineLoopInfo
>();
548 au
.addPreserved
<MachineLoopInfo
>();
549 au
.addRequired
<MachineDominatorTree
>();
550 au
.addPreserved
<MachineDominatorTree
>();
551 au
.addRequired
<VirtRegMap
>();
552 au
.addPreserved
<VirtRegMap
>();
553 MachineFunctionPass::getAnalysisUsage(au
);
556 void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction
&MF
,
557 LiveIntervals
&LIS
) {
558 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
560 // Iterate over all live ranges.
561 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
562 unsigned Reg
= TargetRegisterInfo::index2VirtReg(I
);
563 if (MRI
.reg_nodbg_empty(Reg
))
565 VRegsToAlloc
.insert(Reg
);
569 static bool isACalleeSavedRegister(unsigned reg
, const TargetRegisterInfo
&TRI
,
570 const MachineFunction
&MF
) {
571 const MCPhysReg
*CSR
= MF
.getRegInfo().getCalleeSavedRegs();
572 for (unsigned i
= 0; CSR
[i
] != 0; ++i
)
573 if (TRI
.regsOverlap(reg
, CSR
[i
]))
578 void RegAllocPBQP::initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
,
579 Spiller
&VRegSpiller
) {
580 MachineFunction
&MF
= G
.getMetadata().MF
;
582 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
583 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
584 const TargetRegisterInfo
&TRI
=
585 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
587 std::vector
<unsigned> Worklist(VRegsToAlloc
.begin(), VRegsToAlloc
.end());
589 std::map
<unsigned, std::vector
<unsigned>> VRegAllowedMap
;
591 while (!Worklist
.empty()) {
592 unsigned VReg
= Worklist
.back();
595 LiveInterval
&VRegLI
= LIS
.getInterval(VReg
);
597 // If this is an empty interval move it to the EmptyIntervalVRegs set then
599 if (VRegLI
.empty()) {
600 EmptyIntervalVRegs
.insert(VRegLI
.reg
);
601 VRegsToAlloc
.erase(VRegLI
.reg
);
605 const TargetRegisterClass
*TRC
= MRI
.getRegClass(VReg
);
607 // Record any overlaps with regmask operands.
608 BitVector RegMaskOverlaps
;
609 LIS
.checkRegMaskInterference(VRegLI
, RegMaskOverlaps
);
611 // Compute an initial allowed set for the current vreg.
612 std::vector
<unsigned> VRegAllowed
;
613 ArrayRef
<MCPhysReg
> RawPRegOrder
= TRC
->getRawAllocationOrder(MF
);
614 for (unsigned I
= 0; I
!= RawPRegOrder
.size(); ++I
) {
615 unsigned PReg
= RawPRegOrder
[I
];
616 if (MRI
.isReserved(PReg
))
619 // vregLI crosses a regmask operand that clobbers preg.
620 if (!RegMaskOverlaps
.empty() && !RegMaskOverlaps
.test(PReg
))
623 // vregLI overlaps fixed regunit interference.
624 bool Interference
= false;
625 for (MCRegUnitIterator
Units(PReg
, &TRI
); Units
.isValid(); ++Units
) {
626 if (VRegLI
.overlaps(LIS
.getRegUnit(*Units
))) {
634 // preg is usable for this virtual register.
635 VRegAllowed
.push_back(PReg
);
638 // Check for vregs that have no allowed registers. These should be
639 // pre-spilled and the new vregs added to the worklist.
640 if (VRegAllowed
.empty()) {
641 SmallVector
<unsigned, 8> NewVRegs
;
642 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
643 Worklist
.insert(Worklist
.end(), NewVRegs
.begin(), NewVRegs
.end());
646 VRegAllowedMap
[VReg
] = std::move(VRegAllowed
);
649 for (auto &KV
: VRegAllowedMap
) {
650 auto VReg
= KV
.first
;
652 // Move empty intervals to the EmptyIntervalVReg set.
653 if (LIS
.getInterval(VReg
).empty()) {
654 EmptyIntervalVRegs
.insert(VReg
);
655 VRegsToAlloc
.erase(VReg
);
659 auto &VRegAllowed
= KV
.second
;
661 PBQPRAGraph::RawVector
NodeCosts(VRegAllowed
.size() + 1, 0);
663 // Tweak cost of callee saved registers, as using then force spilling and
664 // restoring them. This would only happen in the prologue / epilogue though.
665 for (unsigned i
= 0; i
!= VRegAllowed
.size(); ++i
)
666 if (isACalleeSavedRegister(VRegAllowed
[i
], TRI
, MF
))
667 NodeCosts
[1 + i
] += 1.0;
669 PBQPRAGraph::NodeId NId
= G
.addNode(std::move(NodeCosts
));
670 G
.getNodeMetadata(NId
).setVReg(VReg
);
671 G
.getNodeMetadata(NId
).setAllowedRegs(
672 G
.getMetadata().getAllowedRegs(std::move(VRegAllowed
)));
673 G
.getMetadata().setNodeIdForVReg(VReg
, NId
);
677 void RegAllocPBQP::spillVReg(unsigned VReg
,
678 SmallVectorImpl
<unsigned> &NewIntervals
,
679 MachineFunction
&MF
, LiveIntervals
&LIS
,
680 VirtRegMap
&VRM
, Spiller
&VRegSpiller
) {
681 VRegsToAlloc
.erase(VReg
);
682 LiveRangeEdit
LRE(&LIS
.getInterval(VReg
), NewIntervals
, MF
, LIS
, &VRM
,
683 nullptr, &DeadRemats
);
684 VRegSpiller
.spill(LRE
);
686 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
688 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg
, &TRI
) << " -> SPILLED (Cost: "
689 << LRE
.getParent().weight
<< ", New vregs: ");
691 // Copy any newly inserted live intervals into the list of regs to
693 for (LiveRangeEdit::iterator I
= LRE
.begin(), E
= LRE
.end();
695 const LiveInterval
&LI
= LIS
.getInterval(*I
);
696 assert(!LI
.empty() && "Empty spill range.");
697 LLVM_DEBUG(dbgs() << printReg(LI
.reg
, &TRI
) << " ");
698 VRegsToAlloc
.insert(LI
.reg
);
701 LLVM_DEBUG(dbgs() << ")\n");
704 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
705 const PBQP::Solution
&Solution
,
707 Spiller
&VRegSpiller
) {
708 MachineFunction
&MF
= G
.getMetadata().MF
;
709 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
710 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
713 // Set to true if we have any spills
714 bool AnotherRoundNeeded
= false;
716 // Clear the existing allocation.
719 // Iterate over the nodes mapping the PBQP solution to a register
721 for (auto NId
: G
.nodeIds()) {
722 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
723 unsigned AllocOption
= Solution
.getSelection(NId
);
725 if (AllocOption
!= PBQP::RegAlloc::getSpillOptionIdx()) {
726 unsigned PReg
= G
.getNodeMetadata(NId
).getAllowedRegs()[AllocOption
- 1];
727 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg
, &TRI
) << " -> "
728 << TRI
.getName(PReg
) << "\n");
729 assert(PReg
!= 0 && "Invalid preg selected.");
730 VRM
.assignVirt2Phys(VReg
, PReg
);
732 // Spill VReg. If this introduces new intervals we'll need another round
734 SmallVector
<unsigned, 8> NewVRegs
;
735 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
736 AnotherRoundNeeded
|= !NewVRegs
.empty();
740 return !AnotherRoundNeeded
;
743 void RegAllocPBQP::finalizeAlloc(MachineFunction
&MF
,
745 VirtRegMap
&VRM
) const {
746 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
748 // First allocate registers for the empty intervals.
749 for (RegSet::const_iterator
750 I
= EmptyIntervalVRegs
.begin(), E
= EmptyIntervalVRegs
.end();
752 LiveInterval
&LI
= LIS
.getInterval(*I
);
754 unsigned PReg
= MRI
.getSimpleHint(LI
.reg
);
757 const TargetRegisterClass
&RC
= *MRI
.getRegClass(LI
.reg
);
758 const ArrayRef
<MCPhysReg
> RawPRegOrder
= RC
.getRawAllocationOrder(MF
);
759 for (unsigned CandidateReg
: RawPRegOrder
) {
760 if (!VRM
.getRegInfo().isReserved(CandidateReg
)) {
766 "No un-reserved physical registers in this register class");
769 VRM
.assignVirt2Phys(LI
.reg
, PReg
);
773 void RegAllocPBQP::postOptimization(Spiller
&VRegSpiller
, LiveIntervals
&LIS
) {
774 VRegSpiller
.postOptimization();
775 /// Remove dead defs because of rematerialization.
776 for (auto DeadInst
: DeadRemats
) {
777 LIS
.RemoveMachineInstrFromMaps(*DeadInst
);
778 DeadInst
->eraseFromParent();
783 static inline float normalizePBQPSpillWeight(float UseDefFreq
, unsigned Size
,
785 // All intervals have a spill weight that is mostly proportional to the number
786 // of uses, with uses in loops having a bigger weight.
787 return NumInstr
* normalizeSpillWeight(UseDefFreq
, Size
, 1);
790 bool RegAllocPBQP::runOnMachineFunction(MachineFunction
&MF
) {
791 LiveIntervals
&LIS
= getAnalysis
<LiveIntervals
>();
792 MachineBlockFrequencyInfo
&MBFI
=
793 getAnalysis
<MachineBlockFrequencyInfo
>();
795 VirtRegMap
&VRM
= getAnalysis
<VirtRegMap
>();
797 calculateSpillWeightsAndHints(LIS
, MF
, &VRM
, getAnalysis
<MachineLoopInfo
>(),
798 MBFI
, normalizePBQPSpillWeight
);
800 std::unique_ptr
<Spiller
> VRegSpiller(createInlineSpiller(*this, MF
, VRM
));
802 MF
.getRegInfo().freezeReservedRegs(MF
);
804 LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF
.getName() << "\n");
806 // Allocator main loop:
808 // * Map current regalloc problem to a PBQP problem
809 // * Solve the PBQP problem
810 // * Map the solution back to a register allocation
811 // * Spill if necessary
813 // This process is continued till no more spills are generated.
815 // Find the vreg intervals in need of allocation.
816 findVRegIntervalsToAlloc(MF
, LIS
);
819 const Function
&F
= MF
.getFunction();
820 std::string FullyQualifiedName
=
821 F
.getParent()->getModuleIdentifier() + "." + F
.getName().str();
824 // If there are non-empty intervals allocate them using pbqp.
825 if (!VRegsToAlloc
.empty()) {
826 const TargetSubtargetInfo
&Subtarget
= MF
.getSubtarget();
827 std::unique_ptr
<PBQPRAConstraintList
> ConstraintsRoot
=
828 llvm::make_unique
<PBQPRAConstraintList
>();
829 ConstraintsRoot
->addConstraint(llvm::make_unique
<SpillCosts
>());
830 ConstraintsRoot
->addConstraint(llvm::make_unique
<Interference
>());
832 ConstraintsRoot
->addConstraint(llvm::make_unique
<Coalescing
>());
833 ConstraintsRoot
->addConstraint(Subtarget
.getCustomPBQPConstraints());
835 bool PBQPAllocComplete
= false;
838 while (!PBQPAllocComplete
) {
839 LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round
<< ":\n");
841 PBQPRAGraph
G(PBQPRAGraph::GraphMetadata(MF
, LIS
, MBFI
));
842 initializeGraph(G
, VRM
, *VRegSpiller
);
843 ConstraintsRoot
->apply(G
);
846 if (PBQPDumpGraphs
) {
847 std::ostringstream RS
;
849 std::string GraphFileName
= FullyQualifiedName
+ "." + RS
.str() +
852 raw_fd_ostream
OS(GraphFileName
, EC
, sys::fs::F_Text
);
853 LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round
<< " to \""
854 << GraphFileName
<< "\"\n");
859 PBQP::Solution Solution
= PBQP::RegAlloc::solve(G
);
860 PBQPAllocComplete
= mapPBQPToRegAlloc(G
, Solution
, VRM
, *VRegSpiller
);
865 // Finalise allocation, allocate empty ranges.
866 finalizeAlloc(MF
, LIS
, VRM
);
867 postOptimization(*VRegSpiller
, LIS
);
868 VRegsToAlloc
.clear();
869 EmptyIntervalVRegs
.clear();
871 LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM
<< "\n");
876 /// Create Printable object for node and register info.
877 static Printable
PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId
,
878 const PBQP::RegAlloc::PBQPRAGraph
&G
) {
879 return Printable([NId
, &G
](raw_ostream
&OS
) {
880 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
881 const TargetRegisterInfo
*TRI
= MRI
.getTargetRegisterInfo();
882 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
883 const char *RegClassName
= TRI
->getRegClassName(MRI
.getRegClass(VReg
));
884 OS
<< NId
<< " (" << RegClassName
<< ':' << printReg(VReg
, TRI
) << ')';
888 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
889 LLVM_DUMP_METHOD
void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream
&OS
) const {
890 for (auto NId
: nodeIds()) {
891 const Vector
&Costs
= getNodeCosts(NId
);
892 assert(Costs
.getLength() != 0 && "Empty vector in graph.");
893 OS
<< PrintNodeInfo(NId
, *this) << ": " << Costs
<< '\n';
897 for (auto EId
: edgeIds()) {
898 NodeId N1Id
= getEdgeNode1Id(EId
);
899 NodeId N2Id
= getEdgeNode2Id(EId
);
900 assert(N1Id
!= N2Id
&& "PBQP graphs should not have self-edges.");
901 const Matrix
&M
= getEdgeCosts(EId
);
902 assert(M
.getRows() != 0 && "No rows in matrix.");
903 assert(M
.getCols() != 0 && "No cols in matrix.");
904 OS
<< PrintNodeInfo(N1Id
, *this) << ' ' << M
.getRows() << " rows / ";
905 OS
<< PrintNodeInfo(N2Id
, *this) << ' ' << M
.getCols() << " cols:\n";
910 LLVM_DUMP_METHOD
void PBQP::RegAlloc::PBQPRAGraph::dump() const {
915 void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream
&OS
) const {
917 for (auto NId
: nodeIds()) {
918 OS
<< " node" << NId
<< " [ label=\""
919 << PrintNodeInfo(NId
, *this) << "\\n"
920 << getNodeCosts(NId
) << "\" ]\n";
923 OS
<< " edge [ len=" << nodeIds().size() << " ]\n";
924 for (auto EId
: edgeIds()) {
925 OS
<< " node" << getEdgeNode1Id(EId
)
926 << " -- node" << getEdgeNode2Id(EId
)
928 const Matrix
&EdgeCosts
= getEdgeCosts(EId
);
929 for (unsigned i
= 0; i
< EdgeCosts
.getRows(); ++i
) {
930 OS
<< EdgeCosts
.getRowAsVector(i
) << "\\n";
937 FunctionPass
*llvm::createPBQPRegisterAllocator(char *customPassID
) {
938 return new RegAllocPBQP(customPassID
);
941 FunctionPass
* llvm::createDefaultPBQPRegisterAllocator() {
942 return createPBQPRegisterAllocator();