1 //===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
10 // register allocator for LLVM. This allocator works by constructing a PBQP
11 // problem representing the register allocation problem under consideration,
12 // solving this using a PBQP solver, and mapping the solution back to a
13 // register assignment. If any variables are selected for spilling then spill
14 // code is inserted and the process repeated.
16 // The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
17 // for register allocation. For more information on PBQP for register
18 // allocation, see the following papers:
20 // (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
21 // PBQP. In Proceedings of the 7th Joint Modular Languages Conference
22 // (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
24 // (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
25 // architectures. In Proceedings of the Joint Conference on Languages,
26 // Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
29 //===----------------------------------------------------------------------===//
31 #include "llvm/CodeGen/RegAllocPBQP.h"
32 #include "RegisterCoalescer.h"
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/BitVector.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/DenseSet.h"
37 #include "llvm/ADT/STLExtras.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/StringRef.h"
41 #include "llvm/Analysis/AliasAnalysis.h"
42 #include "llvm/CodeGen/CalcSpillWeights.h"
43 #include "llvm/CodeGen/LiveInterval.h"
44 #include "llvm/CodeGen/LiveIntervals.h"
45 #include "llvm/CodeGen/LiveRangeEdit.h"
46 #include "llvm/CodeGen/LiveStacks.h"
47 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
48 #include "llvm/CodeGen/MachineDominators.h"
49 #include "llvm/CodeGen/MachineFunction.h"
50 #include "llvm/CodeGen/MachineFunctionPass.h"
51 #include "llvm/CodeGen/MachineInstr.h"
52 #include "llvm/CodeGen/MachineLoopInfo.h"
53 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/CodeGen/PBQP/Graph.h"
55 #include "llvm/CodeGen/PBQP/Math.h"
56 #include "llvm/CodeGen/PBQP/Solution.h"
57 #include "llvm/CodeGen/PBQPRAConstraint.h"
58 #include "llvm/CodeGen/RegAllocRegistry.h"
59 #include "llvm/CodeGen/SlotIndexes.h"
60 #include "llvm/CodeGen/Spiller.h"
61 #include "llvm/CodeGen/TargetRegisterInfo.h"
62 #include "llvm/CodeGen/TargetSubtargetInfo.h"
63 #include "llvm/CodeGen/VirtRegMap.h"
64 #include "llvm/Config/llvm-config.h"
65 #include "llvm/IR/Function.h"
66 #include "llvm/IR/Module.h"
67 #include "llvm/Pass.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/FileSystem.h"
72 #include "llvm/Support/Printable.h"
73 #include "llvm/Support/raw_ostream.h"
84 #include <system_error>
91 #define DEBUG_TYPE "regalloc"
93 static RegisterRegAlloc
94 RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
95 createDefaultPBQPRegisterAllocator
);
98 PBQPCoalescing("pbqp-coalescing",
99 cl::desc("Attempt coalescing during PBQP register allocation."),
100 cl::init(false), cl::Hidden
);
104 PBQPDumpGraphs("pbqp-dump-graphs",
105 cl::desc("Dump graphs for each function/round in the compilation unit."),
106 cl::init(false), cl::Hidden
);
112 /// PBQP based allocators solve the register allocation problem by mapping
113 /// register allocation problems to Partitioned Boolean Quadratic
114 /// Programming problems.
115 class RegAllocPBQP
: public MachineFunctionPass
{
119 /// Construct a PBQP register allocator.
120 RegAllocPBQP(char *cPassID
= nullptr)
121 : MachineFunctionPass(ID
), customPassID(cPassID
) {
122 initializeSlotIndexesWrapperPassPass(*PassRegistry::getPassRegistry());
123 initializeLiveIntervalsWrapperPassPass(*PassRegistry::getPassRegistry());
124 initializeLiveStacksWrapperLegacyPass(*PassRegistry::getPassRegistry());
125 initializeVirtRegMapWrapperLegacyPass(*PassRegistry::getPassRegistry());
128 /// Return the pass name.
129 StringRef
getPassName() const override
{ return "PBQP Register Allocator"; }
131 /// PBQP analysis usage.
132 void getAnalysisUsage(AnalysisUsage
&au
) const override
;
134 /// Perform register allocation
135 bool runOnMachineFunction(MachineFunction
&MF
) override
;
137 MachineFunctionProperties
getRequiredProperties() const override
{
138 return MachineFunctionProperties().set(
139 MachineFunctionProperties::Property::NoPHIs
);
142 MachineFunctionProperties
getClearedProperties() const override
{
143 return MachineFunctionProperties().set(
144 MachineFunctionProperties::Property::IsSSA
);
148 using RegSet
= std::set
<Register
>;
152 RegSet VRegsToAlloc
, EmptyIntervalVRegs
;
154 /// Inst which is a def of an original reg and whose defs are already all
155 /// dead after remat is saved in DeadRemats. The deletion of such inst is
156 /// postponed till all the allocations are done, so its remat expr is
157 /// always available for the remat of all the siblings of the original reg.
158 SmallPtrSet
<MachineInstr
*, 32> DeadRemats
;
160 /// Finds the initial set of vreg intervals to allocate.
161 void findVRegIntervalsToAlloc(const MachineFunction
&MF
, LiveIntervals
&LIS
);
163 /// Constructs an initial graph.
164 void initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
, Spiller
&VRegSpiller
);
166 /// Spill the given VReg.
167 void spillVReg(Register VReg
, SmallVectorImpl
<Register
> &NewIntervals
,
168 MachineFunction
&MF
, LiveIntervals
&LIS
, VirtRegMap
&VRM
,
169 Spiller
&VRegSpiller
);
171 /// Given a solved PBQP problem maps this solution back to a register
173 bool mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
174 const PBQP::Solution
&Solution
,
176 Spiller
&VRegSpiller
);
178 /// Postprocessing before final spilling. Sets basic block "live in"
180 void finalizeAlloc(MachineFunction
&MF
, LiveIntervals
&LIS
,
181 VirtRegMap
&VRM
) const;
183 void postOptimization(Spiller
&VRegSpiller
, LiveIntervals
&LIS
);
186 char RegAllocPBQP::ID
= 0;
188 /// Set spill costs for each node in the PBQP reg-alloc graph.
189 class SpillCosts
: public PBQPRAConstraint
{
191 void apply(PBQPRAGraph
&G
) override
{
192 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
194 // A minimum spill costs, so that register constraints can be set
195 // without normalization in the [0.0:MinSpillCost( interval.
196 const PBQP::PBQPNum MinSpillCost
= 10.0;
198 for (auto NId
: G
.nodeIds()) {
199 PBQP::PBQPNum SpillCost
=
200 LIS
.getInterval(G
.getNodeMetadata(NId
).getVReg()).weight();
201 if (SpillCost
== 0.0)
202 SpillCost
= std::numeric_limits
<PBQP::PBQPNum
>::min();
204 SpillCost
+= MinSpillCost
;
205 PBQPRAGraph::RawVector
NodeCosts(G
.getNodeCosts(NId
));
206 NodeCosts
[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost
;
207 G
.setNodeCosts(NId
, std::move(NodeCosts
));
212 /// Add interference edges between overlapping vregs.
213 class Interference
: public PBQPRAConstraint
{
215 using AllowedRegVecPtr
= const PBQP::RegAlloc::AllowedRegVector
*;
216 using IKey
= std::pair
<AllowedRegVecPtr
, AllowedRegVecPtr
>;
217 using IMatrixCache
= DenseMap
<IKey
, PBQPRAGraph::MatrixPtr
>;
218 using DisjointAllowedRegsCache
= DenseSet
<IKey
>;
219 using IEdgeKey
= std::pair
<PBQP::GraphBase::NodeId
, PBQP::GraphBase::NodeId
>;
220 using IEdgeCache
= DenseSet
<IEdgeKey
>;
222 bool haveDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
223 PBQPRAGraph::NodeId MId
,
224 const DisjointAllowedRegsCache
&D
) const {
225 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
226 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
232 return D
.contains(IKey(NRegs
, MRegs
));
234 return D
.contains(IKey(MRegs
, NRegs
));
237 void setDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
238 PBQPRAGraph::NodeId MId
,
239 DisjointAllowedRegsCache
&D
) {
240 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
241 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
243 assert(NRegs
!= MRegs
&& "AllowedRegs can not be disjoint with itself");
246 D
.insert(IKey(NRegs
, MRegs
));
248 D
.insert(IKey(MRegs
, NRegs
));
251 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
252 // for the fast interference graph construction algorithm. The last is there
253 // to save us from looking up node ids via the VRegToNode map in the graph
256 std::tuple
<LiveInterval
*, size_t, PBQP::GraphBase::NodeId
>;
258 static SlotIndex
getStartPoint(const IntervalInfo
&I
) {
259 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].start
;
262 static SlotIndex
getEndPoint(const IntervalInfo
&I
) {
263 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].end
;
266 static PBQP::GraphBase::NodeId
getNodeId(const IntervalInfo
&I
) {
267 return std::get
<2>(I
);
270 static bool lowestStartPoint(const IntervalInfo
&I1
,
271 const IntervalInfo
&I2
) {
272 // Condition reversed because priority queue has the *highest* element at
273 // the front, rather than the lowest.
274 return getStartPoint(I1
) > getStartPoint(I2
);
277 static bool lowestEndPoint(const IntervalInfo
&I1
,
278 const IntervalInfo
&I2
) {
279 SlotIndex E1
= getEndPoint(I1
);
280 SlotIndex E2
= getEndPoint(I2
);
288 // If two intervals end at the same point, we need a way to break the tie or
289 // the set will assume they're actually equal and refuse to insert a
290 // "duplicate". Just compare the vregs - fast and guaranteed unique.
291 return std::get
<0>(I1
)->reg() < std::get
<0>(I2
)->reg();
294 static bool isAtLastSegment(const IntervalInfo
&I
) {
295 return std::get
<1>(I
) == std::get
<0>(I
)->size() - 1;
298 static IntervalInfo
nextSegment(const IntervalInfo
&I
) {
299 return std::make_tuple(std::get
<0>(I
), std::get
<1>(I
) + 1, std::get
<2>(I
));
303 void apply(PBQPRAGraph
&G
) override
{
304 // The following is loosely based on the linear scan algorithm introduced in
305 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
306 // isn't linear, because the size of the active set isn't bound by the
307 // number of registers, but rather the size of the largest clique in the
308 // graph. Still, we expect this to be better than N^2.
309 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
311 // Interferenc matrices are incredibly regular - they're only a function of
312 // the allowed sets, so we cache them to avoid the overhead of constructing
313 // and uniquing them.
316 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
317 // cache locally edges we have already seen.
320 // Cache known disjoint allowed registers pairs
321 DisjointAllowedRegsCache D
;
323 using IntervalSet
= std::set
<IntervalInfo
, decltype(&lowestEndPoint
)>;
324 using IntervalQueue
=
325 std::priority_queue
<IntervalInfo
, std::vector
<IntervalInfo
>,
326 decltype(&lowestStartPoint
)>;
327 IntervalSet
Active(lowestEndPoint
);
328 IntervalQueue
Inactive(lowestStartPoint
);
330 // Start by building the inactive set.
331 for (auto NId
: G
.nodeIds()) {
332 Register VReg
= G
.getNodeMetadata(NId
).getVReg();
333 LiveInterval
&LI
= LIS
.getInterval(VReg
);
334 assert(!LI
.empty() && "PBQP graph contains node for empty interval");
335 Inactive
.push(std::make_tuple(&LI
, 0, NId
));
338 while (!Inactive
.empty()) {
339 // Tentatively grab the "next" interval - this choice may be overriden
341 IntervalInfo Cur
= Inactive
.top();
343 // Retire any active intervals that end before Cur starts.
344 IntervalSet::iterator RetireItr
= Active
.begin();
345 while (RetireItr
!= Active
.end() &&
346 (getEndPoint(*RetireItr
) <= getStartPoint(Cur
))) {
347 // If this interval has subsequent segments, add the next one to the
349 if (!isAtLastSegment(*RetireItr
))
350 Inactive
.push(nextSegment(*RetireItr
));
354 Active
.erase(Active
.begin(), RetireItr
);
356 // One of the newly retired segments may actually start before the
357 // Cur segment, so re-grab the front of the inactive list.
358 Cur
= Inactive
.top();
361 // At this point we know that Cur overlaps all active intervals. Add the
362 // interference edges.
363 PBQP::GraphBase::NodeId NId
= getNodeId(Cur
);
364 for (const auto &A
: Active
) {
365 PBQP::GraphBase::NodeId MId
= getNodeId(A
);
367 // Do not add an edge when the nodes' allowed registers do not
368 // intersect: there is obviously no interference.
369 if (haveDisjointAllowedRegs(G
, NId
, MId
, D
))
372 // Check that we haven't already added this edge
373 IEdgeKey
EK(std::min(NId
, MId
), std::max(NId
, MId
));
377 // This is a new edge - add it to the graph.
378 if (!createInterferenceEdge(G
, NId
, MId
, C
))
379 setDisjointAllowedRegs(G
, NId
, MId
, D
);
384 // Finally, add Cur to the Active set.
390 // Create an Interference edge and add it to the graph, unless it is
391 // a null matrix, meaning the nodes' allowed registers do not have any
392 // interference. This case occurs frequently between integer and floating
393 // point registers for example.
394 // return true iff both nodes interferes.
395 bool createInterferenceEdge(PBQPRAGraph
&G
,
396 PBQPRAGraph::NodeId NId
, PBQPRAGraph::NodeId MId
,
398 const TargetRegisterInfo
&TRI
=
399 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
400 const auto &NRegs
= G
.getNodeMetadata(NId
).getAllowedRegs();
401 const auto &MRegs
= G
.getNodeMetadata(MId
).getAllowedRegs();
403 // Try looking the edge costs up in the IMatrixCache first.
404 IKey
K(&NRegs
, &MRegs
);
405 IMatrixCache::iterator I
= C
.find(K
);
407 G
.addEdgeBypassingCostAllocator(NId
, MId
, I
->second
);
411 PBQPRAGraph::RawMatrix
M(NRegs
.size() + 1, MRegs
.size() + 1, 0);
412 bool NodesInterfere
= false;
413 for (unsigned I
= 0; I
!= NRegs
.size(); ++I
) {
414 MCRegister PRegN
= NRegs
[I
];
415 for (unsigned J
= 0; J
!= MRegs
.size(); ++J
) {
416 MCRegister PRegM
= MRegs
[J
];
417 if (TRI
.regsOverlap(PRegN
, PRegM
)) {
418 M
[I
+ 1][J
+ 1] = std::numeric_limits
<PBQP::PBQPNum
>::infinity();
419 NodesInterfere
= true;
427 PBQPRAGraph::EdgeId EId
= G
.addEdge(NId
, MId
, std::move(M
));
428 C
[K
] = G
.getEdgeCostsPtr(EId
);
434 class Coalescing
: public PBQPRAConstraint
{
436 void apply(PBQPRAGraph
&G
) override
{
437 MachineFunction
&MF
= G
.getMetadata().MF
;
438 MachineBlockFrequencyInfo
&MBFI
= G
.getMetadata().MBFI
;
439 CoalescerPair
CP(*MF
.getSubtarget().getRegisterInfo());
441 // Scan the machine function and add a coalescing cost whenever CoalescerPair
443 for (const auto &MBB
: MF
) {
444 for (const auto &MI
: MBB
) {
445 // Skip not-coalescable or already coalesced copies.
446 if (!CP
.setRegisters(&MI
) || CP
.getSrcReg() == CP
.getDstReg())
449 Register DstReg
= CP
.getDstReg();
450 Register SrcReg
= CP
.getSrcReg();
452 PBQP::PBQPNum CBenefit
= MBFI
.getBlockFreqRelativeToEntryBlock(&MBB
);
455 if (!MF
.getRegInfo().isAllocatable(DstReg
))
458 PBQPRAGraph::NodeId NId
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
460 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed
=
461 G
.getNodeMetadata(NId
).getAllowedRegs();
463 unsigned PRegOpt
= 0;
464 while (PRegOpt
< Allowed
.size() && Allowed
[PRegOpt
].id() != DstReg
)
467 if (PRegOpt
< Allowed
.size()) {
468 PBQPRAGraph::RawVector
NewCosts(G
.getNodeCosts(NId
));
469 NewCosts
[PRegOpt
+ 1] -= CBenefit
;
470 G
.setNodeCosts(NId
, std::move(NewCosts
));
473 PBQPRAGraph::NodeId N1Id
= G
.getMetadata().getNodeIdForVReg(DstReg
);
474 PBQPRAGraph::NodeId N2Id
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
475 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed1
=
476 &G
.getNodeMetadata(N1Id
).getAllowedRegs();
477 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed2
=
478 &G
.getNodeMetadata(N2Id
).getAllowedRegs();
480 PBQPRAGraph::EdgeId EId
= G
.findEdge(N1Id
, N2Id
);
481 if (EId
== G
.invalidEdgeId()) {
482 PBQPRAGraph::RawMatrix
Costs(Allowed1
->size() + 1,
483 Allowed2
->size() + 1, 0);
484 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
485 G
.addEdge(N1Id
, N2Id
, std::move(Costs
));
487 if (G
.getEdgeNode1Id(EId
) == N2Id
) {
488 std::swap(N1Id
, N2Id
);
489 std::swap(Allowed1
, Allowed2
);
491 PBQPRAGraph::RawMatrix
Costs(G
.getEdgeCosts(EId
));
492 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
493 G
.updateEdgeCosts(EId
, std::move(Costs
));
501 void addVirtRegCoalesce(
502 PBQPRAGraph::RawMatrix
&CostMat
,
503 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed1
,
504 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed2
,
505 PBQP::PBQPNum Benefit
) {
506 assert(CostMat
.getRows() == Allowed1
.size() + 1 && "Size mismatch.");
507 assert(CostMat
.getCols() == Allowed2
.size() + 1 && "Size mismatch.");
508 for (unsigned I
= 0; I
!= Allowed1
.size(); ++I
) {
509 MCRegister PReg1
= Allowed1
[I
];
510 for (unsigned J
= 0; J
!= Allowed2
.size(); ++J
) {
511 MCRegister PReg2
= Allowed2
[J
];
513 CostMat
[I
+ 1][J
+ 1] -= Benefit
;
519 /// PBQP-specific implementation of weight normalization.
520 class PBQPVirtRegAuxInfo final
: public VirtRegAuxInfo
{
521 float normalize(float UseDefFreq
, unsigned Size
, unsigned NumInstr
) override
{
522 // All intervals have a spill weight that is mostly proportional to the
523 // number of uses, with uses in loops having a bigger weight.
524 return NumInstr
* VirtRegAuxInfo::normalize(UseDefFreq
, Size
, 1);
528 PBQPVirtRegAuxInfo(MachineFunction
&MF
, LiveIntervals
&LIS
, VirtRegMap
&VRM
,
529 const MachineLoopInfo
&Loops
,
530 const MachineBlockFrequencyInfo
&MBFI
)
531 : VirtRegAuxInfo(MF
, LIS
, VRM
, Loops
, MBFI
) {}
533 } // end anonymous namespace
535 // Out-of-line destructor/anchor for PBQPRAConstraint.
536 PBQPRAConstraint::~PBQPRAConstraint() = default;
538 void PBQPRAConstraint::anchor() {}
540 void PBQPRAConstraintList::anchor() {}
542 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage
&au
) const {
543 au
.setPreservesCFG();
544 au
.addRequired
<AAResultsWrapperPass
>();
545 au
.addPreserved
<AAResultsWrapperPass
>();
546 au
.addRequired
<SlotIndexesWrapperPass
>();
547 au
.addPreserved
<SlotIndexesWrapperPass
>();
548 au
.addRequired
<LiveIntervalsWrapperPass
>();
549 au
.addPreserved
<LiveIntervalsWrapperPass
>();
550 //au.addRequiredID(SplitCriticalEdgesID);
552 au
.addRequiredID(*customPassID
);
553 au
.addRequired
<LiveStacksWrapperLegacy
>();
554 au
.addPreserved
<LiveStacksWrapperLegacy
>();
555 au
.addRequired
<MachineBlockFrequencyInfoWrapperPass
>();
556 au
.addPreserved
<MachineBlockFrequencyInfoWrapperPass
>();
557 au
.addRequired
<MachineLoopInfoWrapperPass
>();
558 au
.addPreserved
<MachineLoopInfoWrapperPass
>();
559 au
.addRequired
<MachineDominatorTreeWrapperPass
>();
560 au
.addPreserved
<MachineDominatorTreeWrapperPass
>();
561 au
.addRequired
<VirtRegMapWrapperLegacy
>();
562 au
.addPreserved
<VirtRegMapWrapperLegacy
>();
563 MachineFunctionPass::getAnalysisUsage(au
);
566 void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction
&MF
,
567 LiveIntervals
&LIS
) {
568 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
570 // Iterate over all live ranges.
571 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
572 Register Reg
= Register::index2VirtReg(I
);
573 if (MRI
.reg_nodbg_empty(Reg
))
575 VRegsToAlloc
.insert(Reg
);
579 static bool isACalleeSavedRegister(MCRegister Reg
,
580 const TargetRegisterInfo
&TRI
,
581 const MachineFunction
&MF
) {
582 const MCPhysReg
*CSR
= MF
.getRegInfo().getCalleeSavedRegs();
583 for (unsigned i
= 0; CSR
[i
] != 0; ++i
)
584 if (TRI
.regsOverlap(Reg
, CSR
[i
]))
589 void RegAllocPBQP::initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
,
590 Spiller
&VRegSpiller
) {
591 MachineFunction
&MF
= G
.getMetadata().MF
;
593 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
594 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
595 const TargetRegisterInfo
&TRI
=
596 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
598 std::vector
<Register
> Worklist(VRegsToAlloc
.begin(), VRegsToAlloc
.end());
600 std::map
<Register
, std::vector
<MCRegister
>> VRegAllowedMap
;
602 while (!Worklist
.empty()) {
603 Register VReg
= Worklist
.back();
606 LiveInterval
&VRegLI
= LIS
.getInterval(VReg
);
608 // If this is an empty interval move it to the EmptyIntervalVRegs set then
610 if (VRegLI
.empty()) {
611 EmptyIntervalVRegs
.insert(VRegLI
.reg());
612 VRegsToAlloc
.erase(VRegLI
.reg());
616 const TargetRegisterClass
*TRC
= MRI
.getRegClass(VReg
);
618 // Record any overlaps with regmask operands.
619 BitVector RegMaskOverlaps
;
620 LIS
.checkRegMaskInterference(VRegLI
, RegMaskOverlaps
);
622 // Compute an initial allowed set for the current vreg.
623 std::vector
<MCRegister
> VRegAllowed
;
624 ArrayRef
<MCPhysReg
> RawPRegOrder
= TRC
->getRawAllocationOrder(MF
);
625 for (MCPhysReg R
: RawPRegOrder
) {
627 if (MRI
.isReserved(PReg
))
630 // vregLI crosses a regmask operand that clobbers preg.
631 if (!RegMaskOverlaps
.empty() && !RegMaskOverlaps
.test(PReg
))
634 // vregLI overlaps fixed regunit interference.
635 bool Interference
= false;
636 for (MCRegUnit Unit
: TRI
.regunits(PReg
)) {
637 if (VRegLI
.overlaps(LIS
.getRegUnit(Unit
))) {
645 // preg is usable for this virtual register.
646 VRegAllowed
.push_back(PReg
);
649 // Check for vregs that have no allowed registers. These should be
650 // pre-spilled and the new vregs added to the worklist.
651 if (VRegAllowed
.empty()) {
652 SmallVector
<Register
, 8> NewVRegs
;
653 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
654 llvm::append_range(Worklist
, NewVRegs
);
658 VRegAllowedMap
[VReg
.id()] = std::move(VRegAllowed
);
661 for (auto &KV
: VRegAllowedMap
) {
662 auto VReg
= KV
.first
;
664 // Move empty intervals to the EmptyIntervalVReg set.
665 if (LIS
.getInterval(VReg
).empty()) {
666 EmptyIntervalVRegs
.insert(VReg
);
667 VRegsToAlloc
.erase(VReg
);
671 auto &VRegAllowed
= KV
.second
;
673 PBQPRAGraph::RawVector
NodeCosts(VRegAllowed
.size() + 1, 0);
675 // Tweak cost of callee saved registers, as using then force spilling and
676 // restoring them. This would only happen in the prologue / epilogue though.
677 for (unsigned i
= 0; i
!= VRegAllowed
.size(); ++i
)
678 if (isACalleeSavedRegister(VRegAllowed
[i
], TRI
, MF
))
679 NodeCosts
[1 + i
] += 1.0;
681 PBQPRAGraph::NodeId NId
= G
.addNode(std::move(NodeCosts
));
682 G
.getNodeMetadata(NId
).setVReg(VReg
);
683 G
.getNodeMetadata(NId
).setAllowedRegs(
684 G
.getMetadata().getAllowedRegs(std::move(VRegAllowed
)));
685 G
.getMetadata().setNodeIdForVReg(VReg
, NId
);
689 void RegAllocPBQP::spillVReg(Register VReg
,
690 SmallVectorImpl
<Register
> &NewIntervals
,
691 MachineFunction
&MF
, LiveIntervals
&LIS
,
692 VirtRegMap
&VRM
, Spiller
&VRegSpiller
) {
693 VRegsToAlloc
.erase(VReg
);
694 LiveRangeEdit
LRE(&LIS
.getInterval(VReg
), NewIntervals
, MF
, LIS
, &VRM
,
695 nullptr, &DeadRemats
);
696 VRegSpiller
.spill(LRE
);
698 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
700 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg
, &TRI
) << " -> SPILLED (Cost: "
701 << LRE
.getParent().weight() << ", New vregs: ");
703 // Copy any newly inserted live intervals into the list of regs to
705 for (const Register
&R
: LRE
) {
706 const LiveInterval
&LI
= LIS
.getInterval(R
);
707 assert(!LI
.empty() && "Empty spill range.");
708 LLVM_DEBUG(dbgs() << printReg(LI
.reg(), &TRI
) << " ");
709 VRegsToAlloc
.insert(LI
.reg());
712 LLVM_DEBUG(dbgs() << ")\n");
715 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
716 const PBQP::Solution
&Solution
,
718 Spiller
&VRegSpiller
) {
719 MachineFunction
&MF
= G
.getMetadata().MF
;
720 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
721 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
724 // Set to true if we have any spills
725 bool AnotherRoundNeeded
= false;
727 // Clear the existing allocation.
730 // Iterate over the nodes mapping the PBQP solution to a register
732 for (auto NId
: G
.nodeIds()) {
733 Register VReg
= G
.getNodeMetadata(NId
).getVReg();
734 unsigned AllocOpt
= Solution
.getSelection(NId
);
736 if (AllocOpt
!= PBQP::RegAlloc::getSpillOptionIdx()) {
737 MCRegister PReg
= G
.getNodeMetadata(NId
).getAllowedRegs()[AllocOpt
- 1];
738 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg
, &TRI
) << " -> "
739 << TRI
.getName(PReg
) << "\n");
740 assert(PReg
!= 0 && "Invalid preg selected.");
741 VRM
.assignVirt2Phys(VReg
, PReg
);
743 // Spill VReg. If this introduces new intervals we'll need another round
745 SmallVector
<Register
, 8> NewVRegs
;
746 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
747 AnotherRoundNeeded
|= !NewVRegs
.empty();
751 return !AnotherRoundNeeded
;
754 void RegAllocPBQP::finalizeAlloc(MachineFunction
&MF
,
756 VirtRegMap
&VRM
) const {
757 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
759 // First allocate registers for the empty intervals.
760 for (const Register
&R
: EmptyIntervalVRegs
) {
761 LiveInterval
&LI
= LIS
.getInterval(R
);
763 Register PReg
= MRI
.getSimpleHint(LI
.reg());
766 const TargetRegisterClass
&RC
= *MRI
.getRegClass(LI
.reg());
767 const ArrayRef
<MCPhysReg
> RawPRegOrder
= RC
.getRawAllocationOrder(MF
);
768 for (MCRegister CandidateReg
: RawPRegOrder
) {
769 if (!VRM
.getRegInfo().isReserved(CandidateReg
)) {
775 "No un-reserved physical registers in this register class");
778 VRM
.assignVirt2Phys(LI
.reg(), PReg
);
782 void RegAllocPBQP::postOptimization(Spiller
&VRegSpiller
, LiveIntervals
&LIS
) {
783 VRegSpiller
.postOptimization();
784 /// Remove dead defs because of rematerialization.
785 for (auto *DeadInst
: DeadRemats
) {
786 LIS
.RemoveMachineInstrFromMaps(*DeadInst
);
787 DeadInst
->eraseFromParent();
792 bool RegAllocPBQP::runOnMachineFunction(MachineFunction
&MF
) {
793 LiveIntervals
&LIS
= getAnalysis
<LiveIntervalsWrapperPass
>().getLIS();
794 MachineBlockFrequencyInfo
&MBFI
=
795 getAnalysis
<MachineBlockFrequencyInfoWrapperPass
>().getMBFI();
797 auto &LiveStks
= getAnalysis
<LiveStacksWrapperLegacy
>().getLS();
798 auto &MDT
= getAnalysis
<MachineDominatorTreeWrapperPass
>().getDomTree();
800 VirtRegMap
&VRM
= getAnalysis
<VirtRegMapWrapperLegacy
>().getVRM();
802 PBQPVirtRegAuxInfo
VRAI(
803 MF
, LIS
, VRM
, getAnalysis
<MachineLoopInfoWrapperPass
>().getLI(), MBFI
);
804 VRAI
.calculateSpillWeightsAndHints();
806 // FIXME: we create DefaultVRAI here to match existing behavior pre-passing
807 // the VRAI through the spiller to the live range editor. However, it probably
808 // makes more sense to pass the PBQP VRAI. The existing behavior had
809 // LiveRangeEdit make its own VirtRegAuxInfo object.
810 VirtRegAuxInfo
DefaultVRAI(
811 MF
, LIS
, VRM
, getAnalysis
<MachineLoopInfoWrapperPass
>().getLI(), MBFI
);
812 std::unique_ptr
<Spiller
> VRegSpiller(
813 createInlineSpiller({LIS
, LiveStks
, MDT
, MBFI
}, MF
, VRM
, DefaultVRAI
));
815 MF
.getRegInfo().freezeReservedRegs();
817 LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF
.getName() << "\n");
819 // Allocator main loop:
821 // * Map current regalloc problem to a PBQP problem
822 // * Solve the PBQP problem
823 // * Map the solution back to a register allocation
824 // * Spill if necessary
826 // This process is continued till no more spills are generated.
828 // Find the vreg intervals in need of allocation.
829 findVRegIntervalsToAlloc(MF
, LIS
);
832 const Function
&F
= MF
.getFunction();
833 std::string FullyQualifiedName
=
834 F
.getParent()->getModuleIdentifier() + "." + F
.getName().str();
837 // If there are non-empty intervals allocate them using pbqp.
838 if (!VRegsToAlloc
.empty()) {
839 const TargetSubtargetInfo
&Subtarget
= MF
.getSubtarget();
840 std::unique_ptr
<PBQPRAConstraintList
> ConstraintsRoot
=
841 std::make_unique
<PBQPRAConstraintList
>();
842 ConstraintsRoot
->addConstraint(std::make_unique
<SpillCosts
>());
843 ConstraintsRoot
->addConstraint(std::make_unique
<Interference
>());
845 ConstraintsRoot
->addConstraint(std::make_unique
<Coalescing
>());
846 ConstraintsRoot
->addConstraint(Subtarget
.getCustomPBQPConstraints());
848 bool PBQPAllocComplete
= false;
851 while (!PBQPAllocComplete
) {
852 LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round
<< ":\n");
855 PBQPRAGraph
G(PBQPRAGraph::GraphMetadata(MF
, LIS
, MBFI
));
856 initializeGraph(G
, VRM
, *VRegSpiller
);
857 ConstraintsRoot
->apply(G
);
860 if (PBQPDumpGraphs
) {
861 std::ostringstream RS
;
863 std::string GraphFileName
= FullyQualifiedName
+ "." + RS
.str() +
866 raw_fd_ostream
OS(GraphFileName
, EC
, sys::fs::OF_TextWithCRLF
);
867 LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round
<< " to \""
868 << GraphFileName
<< "\"\n");
873 PBQP::Solution Solution
= PBQP::RegAlloc::solve(G
);
874 PBQPAllocComplete
= mapPBQPToRegAlloc(G
, Solution
, VRM
, *VRegSpiller
);
879 // Finalise allocation, allocate empty ranges.
880 finalizeAlloc(MF
, LIS
, VRM
);
881 postOptimization(*VRegSpiller
, LIS
);
882 VRegsToAlloc
.clear();
883 EmptyIntervalVRegs
.clear();
885 LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM
<< "\n");
890 /// Create Printable object for node and register info.
891 static Printable
PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId
,
892 const PBQP::RegAlloc::PBQPRAGraph
&G
) {
893 return Printable([NId
, &G
](raw_ostream
&OS
) {
894 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
895 const TargetRegisterInfo
*TRI
= MRI
.getTargetRegisterInfo();
896 Register VReg
= G
.getNodeMetadata(NId
).getVReg();
897 const char *RegClassName
= TRI
->getRegClassName(MRI
.getRegClass(VReg
));
898 OS
<< NId
<< " (" << RegClassName
<< ':' << printReg(VReg
, TRI
) << ')';
902 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
903 LLVM_DUMP_METHOD
void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream
&OS
) const {
904 for (auto NId
: nodeIds()) {
905 const Vector
&Costs
= getNodeCosts(NId
);
906 assert(Costs
.getLength() != 0 && "Empty vector in graph.");
907 OS
<< PrintNodeInfo(NId
, *this) << ": " << Costs
<< '\n';
911 for (auto EId
: edgeIds()) {
912 NodeId N1Id
= getEdgeNode1Id(EId
);
913 NodeId N2Id
= getEdgeNode2Id(EId
);
914 assert(N1Id
!= N2Id
&& "PBQP graphs should not have self-edges.");
915 const Matrix
&M
= getEdgeCosts(EId
);
916 assert(M
.getRows() != 0 && "No rows in matrix.");
917 assert(M
.getCols() != 0 && "No cols in matrix.");
918 OS
<< PrintNodeInfo(N1Id
, *this) << ' ' << M
.getRows() << " rows / ";
919 OS
<< PrintNodeInfo(N2Id
, *this) << ' ' << M
.getCols() << " cols:\n";
924 LLVM_DUMP_METHOD
void PBQP::RegAlloc::PBQPRAGraph::dump() const {
929 void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream
&OS
) const {
931 for (auto NId
: nodeIds()) {
932 OS
<< " node" << NId
<< " [ label=\""
933 << PrintNodeInfo(NId
, *this) << "\\n"
934 << getNodeCosts(NId
) << "\" ]\n";
937 OS
<< " edge [ len=" << nodeIds().size() << " ]\n";
938 for (auto EId
: edgeIds()) {
939 OS
<< " node" << getEdgeNode1Id(EId
)
940 << " -- node" << getEdgeNode2Id(EId
)
942 const Matrix
&EdgeCosts
= getEdgeCosts(EId
);
943 for (unsigned i
= 0; i
< EdgeCosts
.getRows(); ++i
) {
944 OS
<< EdgeCosts
.getRowAsVector(i
) << "\\n";
951 FunctionPass
*llvm::createPBQPRegisterAllocator(char *customPassID
) {
952 return new RegAllocPBQP(customPassID
);
955 FunctionPass
* llvm::createDefaultPBQPRegisterAllocator() {
956 return createPBQPRegisterAllocator();