1 //===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
11 // register allocator for LLVM. This allocator works by constructing a PBQP
12 // problem representing the register allocation problem under consideration,
13 // solving this using a PBQP solver, and mapping the solution back to a
14 // register assignment. If any variables are selected for spilling then spill
15 // code is inserted and the process repeated.
17 // The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
18 // for register allocation. For more information on PBQP for register
19 // allocation, see the following papers:
21 // (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
22 // PBQP. In Proceedings of the 7th Joint Modular Languages Conference
23 // (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
25 // (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
26 // architectures. In Proceedings of the Joint Conference on Languages,
27 // Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
30 //===----------------------------------------------------------------------===//
32 #include "llvm/CodeGen/RegAllocPBQP.h"
33 #include "RegisterCoalescer.h"
35 #include "llvm/ADT/ArrayRef.h"
36 #include "llvm/ADT/BitVector.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseSet.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/CodeGen/CalcSpillWeights.h"
45 #include "llvm/CodeGen/LiveInterval.h"
46 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
47 #include "llvm/CodeGen/LiveRangeEdit.h"
48 #include "llvm/CodeGen/LiveStackAnalysis.h"
49 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
50 #include "llvm/CodeGen/MachineDominators.h"
51 #include "llvm/CodeGen/MachineFunction.h"
52 #include "llvm/CodeGen/MachineFunctionPass.h"
53 #include "llvm/CodeGen/MachineInstr.h"
54 #include "llvm/CodeGen/MachineLoopInfo.h"
55 #include "llvm/CodeGen/MachineRegisterInfo.h"
56 #include "llvm/CodeGen/PBQP/Graph.h"
57 #include "llvm/CodeGen/PBQP/Math.h"
58 #include "llvm/CodeGen/PBQP/Solution.h"
59 #include "llvm/CodeGen/PBQPRAConstraint.h"
60 #include "llvm/CodeGen/RegAllocRegistry.h"
61 #include "llvm/CodeGen/SlotIndexes.h"
62 #include "llvm/CodeGen/VirtRegMap.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/MC/MCRegisterInfo.h"
66 #include "llvm/Pass.h"
67 #include "llvm/Support/CommandLine.h"
68 #include "llvm/Support/Compiler.h"
69 #include "llvm/Support/Debug.h"
70 #include "llvm/Support/FileSystem.h"
71 #include "llvm/Support/Printable.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Target/TargetRegisterInfo.h"
74 #include "llvm/Target/TargetSubtargetInfo.h"
85 #include <system_error>
92 #define DEBUG_TYPE "regalloc"
94 static RegisterRegAlloc
95 RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
96 createDefaultPBQPRegisterAllocator
);
99 PBQPCoalescing("pbqp-coalescing",
100 cl::desc("Attempt coalescing during PBQP register allocation."),
101 cl::init(false), cl::Hidden
);
105 PBQPDumpGraphs("pbqp-dump-graphs",
106 cl::desc("Dump graphs for each function/round in the compilation unit."),
107 cl::init(false), cl::Hidden
);
113 /// PBQP based allocators solve the register allocation problem by mapping
114 /// register allocation problems to Partitioned Boolean Quadratic
115 /// Programming problems.
116 class RegAllocPBQP
: public MachineFunctionPass
{
120 /// Construct a PBQP register allocator.
121 RegAllocPBQP(char *cPassID
= nullptr)
122 : MachineFunctionPass(ID
), customPassID(cPassID
) {
123 initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
124 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
125 initializeLiveStacksPass(*PassRegistry::getPassRegistry());
126 initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
129 /// Return the pass name.
130 StringRef
getPassName() const override
{ return "PBQP Register Allocator"; }
132 /// PBQP analysis usage.
133 void getAnalysisUsage(AnalysisUsage
&au
) const override
;
135 /// Perform register allocation
136 bool runOnMachineFunction(MachineFunction
&MF
) override
;
138 MachineFunctionProperties
getRequiredProperties() const override
{
139 return MachineFunctionProperties().set(
140 MachineFunctionProperties::Property::NoPHIs
);
144 using LI2NodeMap
= std::map
<const LiveInterval
*, unsigned>;
145 using Node2LIMap
= std::vector
<const LiveInterval
*>;
146 using AllowedSet
= std::vector
<unsigned>;
147 using AllowedSetMap
= std::vector
<AllowedSet
>;
148 using RegPair
= std::pair
<unsigned, unsigned>;
149 using CoalesceMap
= std::map
<RegPair
, PBQP::PBQPNum
>;
150 using RegSet
= std::set
<unsigned>;
154 RegSet VRegsToAlloc
, EmptyIntervalVRegs
;
156 /// Inst which is a def of an original reg and whose defs are already all
157 /// dead after remat is saved in DeadRemats. The deletion of such inst is
158 /// postponed till all the allocations are done, so its remat expr is
159 /// always available for the remat of all the siblings of the original reg.
160 SmallPtrSet
<MachineInstr
*, 32> DeadRemats
;
162 /// \brief Finds the initial set of vreg intervals to allocate.
163 void findVRegIntervalsToAlloc(const MachineFunction
&MF
, LiveIntervals
&LIS
);
165 /// \brief Constructs an initial graph.
166 void initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
, Spiller
&VRegSpiller
);
168 /// \brief Spill the given VReg.
169 void spillVReg(unsigned VReg
, SmallVectorImpl
<unsigned> &NewIntervals
,
170 MachineFunction
&MF
, LiveIntervals
&LIS
, VirtRegMap
&VRM
,
171 Spiller
&VRegSpiller
);
173 /// \brief Given a solved PBQP problem maps this solution back to a register
175 bool mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
176 const PBQP::Solution
&Solution
,
178 Spiller
&VRegSpiller
);
180 /// \brief Postprocessing before final spilling. Sets basic block "live in"
182 void finalizeAlloc(MachineFunction
&MF
, LiveIntervals
&LIS
,
183 VirtRegMap
&VRM
) const;
185 void postOptimization(Spiller
&VRegSpiller
, LiveIntervals
&LIS
);
188 char RegAllocPBQP::ID
= 0;
190 /// @brief Set spill costs for each node in the PBQP reg-alloc graph.
191 class SpillCosts
: public PBQPRAConstraint
{
193 void apply(PBQPRAGraph
&G
) override
{
194 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
196 // A minimum spill costs, so that register constraints can can be set
197 // without normalization in the [0.0:MinSpillCost( interval.
198 const PBQP::PBQPNum MinSpillCost
= 10.0;
200 for (auto NId
: G
.nodeIds()) {
201 PBQP::PBQPNum SpillCost
=
202 LIS
.getInterval(G
.getNodeMetadata(NId
).getVReg()).weight
;
203 if (SpillCost
== 0.0)
204 SpillCost
= std::numeric_limits
<PBQP::PBQPNum
>::min();
206 SpillCost
+= MinSpillCost
;
207 PBQPRAGraph::RawVector
NodeCosts(G
.getNodeCosts(NId
));
208 NodeCosts
[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost
;
209 G
.setNodeCosts(NId
, std::move(NodeCosts
));
214 /// @brief Add interference edges between overlapping vregs.
215 class Interference
: public PBQPRAConstraint
{
217 using AllowedRegVecPtr
= const PBQP::RegAlloc::AllowedRegVector
*;
218 using IKey
= std::pair
<AllowedRegVecPtr
, AllowedRegVecPtr
>;
219 using IMatrixCache
= DenseMap
<IKey
, PBQPRAGraph::MatrixPtr
>;
220 using DisjointAllowedRegsCache
= DenseSet
<IKey
>;
221 using IEdgeKey
= std::pair
<PBQP::GraphBase::NodeId
, PBQP::GraphBase::NodeId
>;
222 using IEdgeCache
= DenseSet
<IEdgeKey
>;
224 bool haveDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
225 PBQPRAGraph::NodeId MId
,
226 const DisjointAllowedRegsCache
&D
) const {
227 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
228 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
234 return D
.count(IKey(NRegs
, MRegs
)) > 0;
236 return D
.count(IKey(MRegs
, NRegs
)) > 0;
239 void setDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
240 PBQPRAGraph::NodeId MId
,
241 DisjointAllowedRegsCache
&D
) {
242 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
243 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
245 assert(NRegs
!= MRegs
&& "AllowedRegs can not be disjoint with itself");
248 D
.insert(IKey(NRegs
, MRegs
));
250 D
.insert(IKey(MRegs
, NRegs
));
253 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
254 // for the fast interference graph construction algorithm. The last is there
255 // to save us from looking up node ids via the VRegToNode map in the graph
258 std::tuple
<LiveInterval
*, size_t, PBQP::GraphBase::NodeId
>;
260 static SlotIndex
getStartPoint(const IntervalInfo
&I
) {
261 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].start
;
264 static SlotIndex
getEndPoint(const IntervalInfo
&I
) {
265 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].end
;
268 static PBQP::GraphBase::NodeId
getNodeId(const IntervalInfo
&I
) {
269 return std::get
<2>(I
);
272 static bool lowestStartPoint(const IntervalInfo
&I1
,
273 const IntervalInfo
&I2
) {
274 // Condition reversed because priority queue has the *highest* element at
275 // the front, rather than the lowest.
276 return getStartPoint(I1
) > getStartPoint(I2
);
279 static bool lowestEndPoint(const IntervalInfo
&I1
,
280 const IntervalInfo
&I2
) {
281 SlotIndex E1
= getEndPoint(I1
);
282 SlotIndex E2
= getEndPoint(I2
);
290 // If two intervals end at the same point, we need a way to break the tie or
291 // the set will assume they're actually equal and refuse to insert a
292 // "duplicate". Just compare the vregs - fast and guaranteed unique.
293 return std::get
<0>(I1
)->reg
< std::get
<0>(I2
)->reg
;
296 static bool isAtLastSegment(const IntervalInfo
&I
) {
297 return std::get
<1>(I
) == std::get
<0>(I
)->size() - 1;
300 static IntervalInfo
nextSegment(const IntervalInfo
&I
) {
301 return std::make_tuple(std::get
<0>(I
), std::get
<1>(I
) + 1, std::get
<2>(I
));
305 void apply(PBQPRAGraph
&G
) override
{
306 // The following is loosely based on the linear scan algorithm introduced in
307 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
308 // isn't linear, because the size of the active set isn't bound by the
309 // number of registers, but rather the size of the largest clique in the
310 // graph. Still, we expect this to be better than N^2.
311 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
313 // Interferenc matrices are incredibly regular - they're only a function of
314 // the allowed sets, so we cache them to avoid the overhead of constructing
315 // and uniquing them.
318 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
319 // cache locally edges we have already seen.
322 // Cache known disjoint allowed registers pairs
323 DisjointAllowedRegsCache D
;
325 using IntervalSet
= std::set
<IntervalInfo
, decltype(&lowestEndPoint
)>;
326 using IntervalQueue
=
327 std::priority_queue
<IntervalInfo
, std::vector
<IntervalInfo
>,
328 decltype(&lowestStartPoint
)>;
329 IntervalSet
Active(lowestEndPoint
);
330 IntervalQueue
Inactive(lowestStartPoint
);
332 // Start by building the inactive set.
333 for (auto NId
: G
.nodeIds()) {
334 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
335 LiveInterval
&LI
= LIS
.getInterval(VReg
);
336 assert(!LI
.empty() && "PBQP graph contains node for empty interval");
337 Inactive
.push(std::make_tuple(&LI
, 0, NId
));
340 while (!Inactive
.empty()) {
341 // Tentatively grab the "next" interval - this choice may be overriden
343 IntervalInfo Cur
= Inactive
.top();
345 // Retire any active intervals that end before Cur starts.
346 IntervalSet::iterator RetireItr
= Active
.begin();
347 while (RetireItr
!= Active
.end() &&
348 (getEndPoint(*RetireItr
) <= getStartPoint(Cur
))) {
349 // If this interval has subsequent segments, add the next one to the
351 if (!isAtLastSegment(*RetireItr
))
352 Inactive
.push(nextSegment(*RetireItr
));
356 Active
.erase(Active
.begin(), RetireItr
);
358 // One of the newly retired segments may actually start before the
359 // Cur segment, so re-grab the front of the inactive list.
360 Cur
= Inactive
.top();
363 // At this point we know that Cur overlaps all active intervals. Add the
364 // interference edges.
365 PBQP::GraphBase::NodeId NId
= getNodeId(Cur
);
366 for (const auto &A
: Active
) {
367 PBQP::GraphBase::NodeId MId
= getNodeId(A
);
369 // Do not add an edge when the nodes' allowed registers do not
370 // intersect: there is obviously no interference.
371 if (haveDisjointAllowedRegs(G
, NId
, MId
, D
))
374 // Check that we haven't already added this edge
375 IEdgeKey
EK(std::min(NId
, MId
), std::max(NId
, MId
));
379 // This is a new edge - add it to the graph.
380 if (!createInterferenceEdge(G
, NId
, MId
, C
))
381 setDisjointAllowedRegs(G
, NId
, MId
, D
);
386 // Finally, add Cur to the Active set.
392 // Create an Interference edge and add it to the graph, unless it is
393 // a null matrix, meaning the nodes' allowed registers do not have any
394 // interference. This case occurs frequently between integer and floating
395 // point registers for example.
396 // return true iff both nodes interferes.
397 bool createInterferenceEdge(PBQPRAGraph
&G
,
398 PBQPRAGraph::NodeId NId
, PBQPRAGraph::NodeId MId
,
400 const TargetRegisterInfo
&TRI
=
401 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
402 const auto &NRegs
= G
.getNodeMetadata(NId
).getAllowedRegs();
403 const auto &MRegs
= G
.getNodeMetadata(MId
).getAllowedRegs();
405 // Try looking the edge costs up in the IMatrixCache first.
406 IKey
K(&NRegs
, &MRegs
);
407 IMatrixCache::iterator I
= C
.find(K
);
409 G
.addEdgeBypassingCostAllocator(NId
, MId
, I
->second
);
413 PBQPRAGraph::RawMatrix
M(NRegs
.size() + 1, MRegs
.size() + 1, 0);
414 bool NodesInterfere
= false;
415 for (unsigned I
= 0; I
!= NRegs
.size(); ++I
) {
416 unsigned PRegN
= NRegs
[I
];
417 for (unsigned J
= 0; J
!= MRegs
.size(); ++J
) {
418 unsigned PRegM
= MRegs
[J
];
419 if (TRI
.regsOverlap(PRegN
, PRegM
)) {
420 M
[I
+ 1][J
+ 1] = std::numeric_limits
<PBQP::PBQPNum
>::infinity();
421 NodesInterfere
= true;
429 PBQPRAGraph::EdgeId EId
= G
.addEdge(NId
, MId
, std::move(M
));
430 C
[K
] = G
.getEdgeCostsPtr(EId
);
436 class Coalescing
: public PBQPRAConstraint
{
438 void apply(PBQPRAGraph
&G
) override
{
439 MachineFunction
&MF
= G
.getMetadata().MF
;
440 MachineBlockFrequencyInfo
&MBFI
= G
.getMetadata().MBFI
;
441 CoalescerPair
CP(*MF
.getSubtarget().getRegisterInfo());
443 // Scan the machine function and add a coalescing cost whenever CoalescerPair
445 for (const auto &MBB
: MF
) {
446 for (const auto &MI
: MBB
) {
447 // Skip not-coalescable or already coalesced copies.
448 if (!CP
.setRegisters(&MI
) || CP
.getSrcReg() == CP
.getDstReg())
451 unsigned DstReg
= CP
.getDstReg();
452 unsigned SrcReg
= CP
.getSrcReg();
454 const float Scale
= 1.0f
/ MBFI
.getEntryFreq();
455 PBQP::PBQPNum CBenefit
= MBFI
.getBlockFreq(&MBB
).getFrequency() * Scale
;
458 if (!MF
.getRegInfo().isAllocatable(DstReg
))
461 PBQPRAGraph::NodeId NId
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
463 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed
=
464 G
.getNodeMetadata(NId
).getAllowedRegs();
466 unsigned PRegOpt
= 0;
467 while (PRegOpt
< Allowed
.size() && Allowed
[PRegOpt
] != DstReg
)
470 if (PRegOpt
< Allowed
.size()) {
471 PBQPRAGraph::RawVector
NewCosts(G
.getNodeCosts(NId
));
472 NewCosts
[PRegOpt
+ 1] -= CBenefit
;
473 G
.setNodeCosts(NId
, std::move(NewCosts
));
476 PBQPRAGraph::NodeId N1Id
= G
.getMetadata().getNodeIdForVReg(DstReg
);
477 PBQPRAGraph::NodeId N2Id
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
478 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed1
=
479 &G
.getNodeMetadata(N1Id
).getAllowedRegs();
480 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed2
=
481 &G
.getNodeMetadata(N2Id
).getAllowedRegs();
483 PBQPRAGraph::EdgeId EId
= G
.findEdge(N1Id
, N2Id
);
484 if (EId
== G
.invalidEdgeId()) {
485 PBQPRAGraph::RawMatrix
Costs(Allowed1
->size() + 1,
486 Allowed2
->size() + 1, 0);
487 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
488 G
.addEdge(N1Id
, N2Id
, std::move(Costs
));
490 if (G
.getEdgeNode1Id(EId
) == N2Id
) {
491 std::swap(N1Id
, N2Id
);
492 std::swap(Allowed1
, Allowed2
);
494 PBQPRAGraph::RawMatrix
Costs(G
.getEdgeCosts(EId
));
495 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
496 G
.updateEdgeCosts(EId
, std::move(Costs
));
504 void addVirtRegCoalesce(
505 PBQPRAGraph::RawMatrix
&CostMat
,
506 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed1
,
507 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed2
,
508 PBQP::PBQPNum Benefit
) {
509 assert(CostMat
.getRows() == Allowed1
.size() + 1 && "Size mismatch.");
510 assert(CostMat
.getCols() == Allowed2
.size() + 1 && "Size mismatch.");
511 for (unsigned I
= 0; I
!= Allowed1
.size(); ++I
) {
512 unsigned PReg1
= Allowed1
[I
];
513 for (unsigned J
= 0; J
!= Allowed2
.size(); ++J
) {
514 unsigned PReg2
= Allowed2
[J
];
516 CostMat
[I
+ 1][J
+ 1] -= Benefit
;
522 } // end anonymous namespace
524 // Out-of-line destructor/anchor for PBQPRAConstraint.
525 PBQPRAConstraint::~PBQPRAConstraint() = default;
527 void PBQPRAConstraint::anchor() {}
529 void PBQPRAConstraintList::anchor() {}
531 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage
&au
) const {
532 au
.setPreservesCFG();
533 au
.addRequired
<AAResultsWrapperPass
>();
534 au
.addPreserved
<AAResultsWrapperPass
>();
535 au
.addRequired
<SlotIndexes
>();
536 au
.addPreserved
<SlotIndexes
>();
537 au
.addRequired
<LiveIntervals
>();
538 au
.addPreserved
<LiveIntervals
>();
539 //au.addRequiredID(SplitCriticalEdgesID);
541 au
.addRequiredID(*customPassID
);
542 au
.addRequired
<LiveStacks
>();
543 au
.addPreserved
<LiveStacks
>();
544 au
.addRequired
<MachineBlockFrequencyInfo
>();
545 au
.addPreserved
<MachineBlockFrequencyInfo
>();
546 au
.addRequired
<MachineLoopInfo
>();
547 au
.addPreserved
<MachineLoopInfo
>();
548 au
.addRequired
<MachineDominatorTree
>();
549 au
.addPreserved
<MachineDominatorTree
>();
550 au
.addRequired
<VirtRegMap
>();
551 au
.addPreserved
<VirtRegMap
>();
552 MachineFunctionPass::getAnalysisUsage(au
);
555 void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction
&MF
,
556 LiveIntervals
&LIS
) {
557 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
559 // Iterate over all live ranges.
560 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
561 unsigned Reg
= TargetRegisterInfo::index2VirtReg(I
);
562 if (MRI
.reg_nodbg_empty(Reg
))
564 LiveInterval
&LI
= LIS
.getInterval(Reg
);
566 // If this live interval is non-empty we will use pbqp to allocate it.
567 // Empty intervals we allocate in a simple post-processing stage in
570 VRegsToAlloc
.insert(LI
.reg
);
572 EmptyIntervalVRegs
.insert(LI
.reg
);
577 static bool isACalleeSavedRegister(unsigned reg
, const TargetRegisterInfo
&TRI
,
578 const MachineFunction
&MF
) {
579 const MCPhysReg
*CSR
= MF
.getRegInfo().getCalleeSavedRegs();
580 for (unsigned i
= 0; CSR
[i
] != 0; ++i
)
581 if (TRI
.regsOverlap(reg
, CSR
[i
]))
586 void RegAllocPBQP::initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
,
587 Spiller
&VRegSpiller
) {
588 MachineFunction
&MF
= G
.getMetadata().MF
;
590 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
591 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
592 const TargetRegisterInfo
&TRI
=
593 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
595 std::vector
<unsigned> Worklist(VRegsToAlloc
.begin(), VRegsToAlloc
.end());
597 while (!Worklist
.empty()) {
598 unsigned VReg
= Worklist
.back();
601 const TargetRegisterClass
*TRC
= MRI
.getRegClass(VReg
);
602 LiveInterval
&VRegLI
= LIS
.getInterval(VReg
);
604 // Record any overlaps with regmask operands.
605 BitVector RegMaskOverlaps
;
606 LIS
.checkRegMaskInterference(VRegLI
, RegMaskOverlaps
);
608 // Compute an initial allowed set for the current vreg.
609 std::vector
<unsigned> VRegAllowed
;
610 ArrayRef
<MCPhysReg
> RawPRegOrder
= TRC
->getRawAllocationOrder(MF
);
611 for (unsigned I
= 0; I
!= RawPRegOrder
.size(); ++I
) {
612 unsigned PReg
= RawPRegOrder
[I
];
613 if (MRI
.isReserved(PReg
))
616 // vregLI crosses a regmask operand that clobbers preg.
617 if (!RegMaskOverlaps
.empty() && !RegMaskOverlaps
.test(PReg
))
620 // vregLI overlaps fixed regunit interference.
621 bool Interference
= false;
622 for (MCRegUnitIterator
Units(PReg
, &TRI
); Units
.isValid(); ++Units
) {
623 if (VRegLI
.overlaps(LIS
.getRegUnit(*Units
))) {
631 // preg is usable for this virtual register.
632 VRegAllowed
.push_back(PReg
);
635 // Check for vregs that have no allowed registers. These should be
636 // pre-spilled and the new vregs added to the worklist.
637 if (VRegAllowed
.empty()) {
638 SmallVector
<unsigned, 8> NewVRegs
;
639 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
640 Worklist
.insert(Worklist
.end(), NewVRegs
.begin(), NewVRegs
.end());
644 PBQPRAGraph::RawVector
NodeCosts(VRegAllowed
.size() + 1, 0);
646 // Tweak cost of callee saved registers, as using then force spilling and
647 // restoring them. This would only happen in the prologue / epilogue though.
648 for (unsigned i
= 0; i
!= VRegAllowed
.size(); ++i
)
649 if (isACalleeSavedRegister(VRegAllowed
[i
], TRI
, MF
))
650 NodeCosts
[1 + i
] += 1.0;
652 PBQPRAGraph::NodeId NId
= G
.addNode(std::move(NodeCosts
));
653 G
.getNodeMetadata(NId
).setVReg(VReg
);
654 G
.getNodeMetadata(NId
).setAllowedRegs(
655 G
.getMetadata().getAllowedRegs(std::move(VRegAllowed
)));
656 G
.getMetadata().setNodeIdForVReg(VReg
, NId
);
660 void RegAllocPBQP::spillVReg(unsigned VReg
,
661 SmallVectorImpl
<unsigned> &NewIntervals
,
662 MachineFunction
&MF
, LiveIntervals
&LIS
,
663 VirtRegMap
&VRM
, Spiller
&VRegSpiller
) {
664 VRegsToAlloc
.erase(VReg
);
665 LiveRangeEdit
LRE(&LIS
.getInterval(VReg
), NewIntervals
, MF
, LIS
, &VRM
,
666 nullptr, &DeadRemats
);
667 VRegSpiller
.spill(LRE
);
669 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
671 DEBUG(dbgs() << "VREG " << PrintReg(VReg
, &TRI
) << " -> SPILLED (Cost: "
672 << LRE
.getParent().weight
<< ", New vregs: ");
674 // Copy any newly inserted live intervals into the list of regs to
676 for (LiveRangeEdit::iterator I
= LRE
.begin(), E
= LRE
.end();
678 const LiveInterval
&LI
= LIS
.getInterval(*I
);
679 assert(!LI
.empty() && "Empty spill range.");
680 DEBUG(dbgs() << PrintReg(LI
.reg
, &TRI
) << " ");
681 VRegsToAlloc
.insert(LI
.reg
);
684 DEBUG(dbgs() << ")\n");
687 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
688 const PBQP::Solution
&Solution
,
690 Spiller
&VRegSpiller
) {
691 MachineFunction
&MF
= G
.getMetadata().MF
;
692 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
693 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
696 // Set to true if we have any spills
697 bool AnotherRoundNeeded
= false;
699 // Clear the existing allocation.
702 // Iterate over the nodes mapping the PBQP solution to a register
704 for (auto NId
: G
.nodeIds()) {
705 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
706 unsigned AllocOption
= Solution
.getSelection(NId
);
708 if (AllocOption
!= PBQP::RegAlloc::getSpillOptionIdx()) {
709 unsigned PReg
= G
.getNodeMetadata(NId
).getAllowedRegs()[AllocOption
- 1];
710 DEBUG(dbgs() << "VREG " << PrintReg(VReg
, &TRI
) << " -> "
711 << TRI
.getName(PReg
) << "\n");
712 assert(PReg
!= 0 && "Invalid preg selected.");
713 VRM
.assignVirt2Phys(VReg
, PReg
);
715 // Spill VReg. If this introduces new intervals we'll need another round
717 SmallVector
<unsigned, 8> NewVRegs
;
718 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
719 AnotherRoundNeeded
|= !NewVRegs
.empty();
723 return !AnotherRoundNeeded
;
726 void RegAllocPBQP::finalizeAlloc(MachineFunction
&MF
,
728 VirtRegMap
&VRM
) const {
729 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
731 // First allocate registers for the empty intervals.
732 for (RegSet::const_iterator
733 I
= EmptyIntervalVRegs
.begin(), E
= EmptyIntervalVRegs
.end();
735 LiveInterval
&LI
= LIS
.getInterval(*I
);
737 unsigned PReg
= MRI
.getSimpleHint(LI
.reg
);
740 const TargetRegisterClass
&RC
= *MRI
.getRegClass(LI
.reg
);
741 const ArrayRef
<MCPhysReg
> RawPRegOrder
= RC
.getRawAllocationOrder(MF
);
742 for (unsigned CandidateReg
: RawPRegOrder
) {
743 if (!VRM
.getRegInfo().isReserved(CandidateReg
)) {
749 "No un-reserved physical registers in this register class");
752 VRM
.assignVirt2Phys(LI
.reg
, PReg
);
756 void RegAllocPBQP::postOptimization(Spiller
&VRegSpiller
, LiveIntervals
&LIS
) {
757 VRegSpiller
.postOptimization();
758 /// Remove dead defs because of rematerialization.
759 for (auto DeadInst
: DeadRemats
) {
760 LIS
.RemoveMachineInstrFromMaps(*DeadInst
);
761 DeadInst
->eraseFromParent();
766 static inline float normalizePBQPSpillWeight(float UseDefFreq
, unsigned Size
,
768 // All intervals have a spill weight that is mostly proportional to the number
769 // of uses, with uses in loops having a bigger weight.
770 return NumInstr
* normalizeSpillWeight(UseDefFreq
, Size
, 1);
773 bool RegAllocPBQP::runOnMachineFunction(MachineFunction
&MF
) {
774 LiveIntervals
&LIS
= getAnalysis
<LiveIntervals
>();
775 MachineBlockFrequencyInfo
&MBFI
=
776 getAnalysis
<MachineBlockFrequencyInfo
>();
778 VirtRegMap
&VRM
= getAnalysis
<VirtRegMap
>();
780 calculateSpillWeightsAndHints(LIS
, MF
, &VRM
, getAnalysis
<MachineLoopInfo
>(),
781 MBFI
, normalizePBQPSpillWeight
);
783 std::unique_ptr
<Spiller
> VRegSpiller(createInlineSpiller(*this, MF
, VRM
));
785 MF
.getRegInfo().freezeReservedRegs(MF
);
787 DEBUG(dbgs() << "PBQP Register Allocating for " << MF
.getName() << "\n");
789 // Allocator main loop:
791 // * Map current regalloc problem to a PBQP problem
792 // * Solve the PBQP problem
793 // * Map the solution back to a register allocation
794 // * Spill if necessary
796 // This process is continued till no more spills are generated.
798 // Find the vreg intervals in need of allocation.
799 findVRegIntervalsToAlloc(MF
, LIS
);
802 const Function
&F
= *MF
.getFunction();
803 std::string FullyQualifiedName
=
804 F
.getParent()->getModuleIdentifier() + "." + F
.getName().str();
807 // If there are non-empty intervals allocate them using pbqp.
808 if (!VRegsToAlloc
.empty()) {
809 const TargetSubtargetInfo
&Subtarget
= MF
.getSubtarget();
810 std::unique_ptr
<PBQPRAConstraintList
> ConstraintsRoot
=
811 llvm::make_unique
<PBQPRAConstraintList
>();
812 ConstraintsRoot
->addConstraint(llvm::make_unique
<SpillCosts
>());
813 ConstraintsRoot
->addConstraint(llvm::make_unique
<Interference
>());
815 ConstraintsRoot
->addConstraint(llvm::make_unique
<Coalescing
>());
816 ConstraintsRoot
->addConstraint(Subtarget
.getCustomPBQPConstraints());
818 bool PBQPAllocComplete
= false;
821 while (!PBQPAllocComplete
) {
822 DEBUG(dbgs() << " PBQP Regalloc round " << Round
<< ":\n");
824 PBQPRAGraph
G(PBQPRAGraph::GraphMetadata(MF
, LIS
, MBFI
));
825 initializeGraph(G
, VRM
, *VRegSpiller
);
826 ConstraintsRoot
->apply(G
);
829 if (PBQPDumpGraphs
) {
830 std::ostringstream RS
;
832 std::string GraphFileName
= FullyQualifiedName
+ "." + RS
.str() +
835 raw_fd_ostream
OS(GraphFileName
, EC
, sys::fs::F_Text
);
836 DEBUG(dbgs() << "Dumping graph for round " << Round
<< " to \""
837 << GraphFileName
<< "\"\n");
842 PBQP::Solution Solution
= PBQP::RegAlloc::solve(G
);
843 PBQPAllocComplete
= mapPBQPToRegAlloc(G
, Solution
, VRM
, *VRegSpiller
);
848 // Finalise allocation, allocate empty ranges.
849 finalizeAlloc(MF
, LIS
, VRM
);
850 postOptimization(*VRegSpiller
, LIS
);
851 VRegsToAlloc
.clear();
852 EmptyIntervalVRegs
.clear();
854 DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM
<< "\n");
859 /// Create Printable object for node and register info.
860 static Printable
PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId
,
861 const PBQP::RegAlloc::PBQPRAGraph
&G
) {
862 return Printable([NId
, &G
](raw_ostream
&OS
) {
863 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
864 const TargetRegisterInfo
*TRI
= MRI
.getTargetRegisterInfo();
865 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
866 const char *RegClassName
= TRI
->getRegClassName(MRI
.getRegClass(VReg
));
867 OS
<< NId
<< " (" << RegClassName
<< ':' << PrintReg(VReg
, TRI
) << ')';
871 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
872 LLVM_DUMP_METHOD
void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream
&OS
) const {
873 for (auto NId
: nodeIds()) {
874 const Vector
&Costs
= getNodeCosts(NId
);
875 assert(Costs
.getLength() != 0 && "Empty vector in graph.");
876 OS
<< PrintNodeInfo(NId
, *this) << ": " << Costs
<< '\n';
880 for (auto EId
: edgeIds()) {
881 NodeId N1Id
= getEdgeNode1Id(EId
);
882 NodeId N2Id
= getEdgeNode2Id(EId
);
883 assert(N1Id
!= N2Id
&& "PBQP graphs should not have self-edges.");
884 const Matrix
&M
= getEdgeCosts(EId
);
885 assert(M
.getRows() != 0 && "No rows in matrix.");
886 assert(M
.getCols() != 0 && "No cols in matrix.");
887 OS
<< PrintNodeInfo(N1Id
, *this) << ' ' << M
.getRows() << " rows / ";
888 OS
<< PrintNodeInfo(N2Id
, *this) << ' ' << M
.getCols() << " cols:\n";
893 LLVM_DUMP_METHOD
void PBQP::RegAlloc::PBQPRAGraph::dump() const {
898 void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream
&OS
) const {
900 for (auto NId
: nodeIds()) {
901 OS
<< " node" << NId
<< " [ label=\""
902 << PrintNodeInfo(NId
, *this) << "\\n"
903 << getNodeCosts(NId
) << "\" ]\n";
906 OS
<< " edge [ len=" << nodeIds().size() << " ]\n";
907 for (auto EId
: edgeIds()) {
908 OS
<< " node" << getEdgeNode1Id(EId
)
909 << " -- node" << getEdgeNode2Id(EId
)
911 const Matrix
&EdgeCosts
= getEdgeCosts(EId
);
912 for (unsigned i
= 0; i
< EdgeCosts
.getRows(); ++i
) {
913 OS
<< EdgeCosts
.getRowAsVector(i
) << "\\n";
920 FunctionPass
*llvm::createPBQPRegisterAllocator(char *customPassID
) {
921 return new RegAllocPBQP(customPassID
);
924 FunctionPass
* llvm::createDefaultPBQPRegisterAllocator() {
925 return createPBQPRegisterAllocator();