1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements bottom-up and top-down register pressure reduction list
10 // schedulers, using standard algorithms. The basic approach uses a priority
11 // queue of available nodes to schedule. One at a time, nodes are taken from
12 // the priority queue (thus in priority order), checked for legality to
13 // schedule, and emitted if legal.
15 //===----------------------------------------------------------------------===//
17 #include "ScheduleDAGSDNodes.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineOperand.h"
27 #include "llvm/CodeGen/Register.h"
28 #include "llvm/CodeGen/ScheduleDAG.h"
29 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
30 #include "llvm/CodeGen/SchedulerRegistry.h"
31 #include "llvm/CodeGen/SelectionDAGISel.h"
32 #include "llvm/CodeGen/SelectionDAGNodes.h"
33 #include "llvm/CodeGen/TargetInstrInfo.h"
34 #include "llvm/CodeGen/TargetLowering.h"
35 #include "llvm/CodeGen/TargetOpcodes.h"
36 #include "llvm/CodeGen/TargetRegisterInfo.h"
37 #include "llvm/CodeGen/TargetSubtargetInfo.h"
38 #include "llvm/CodeGenTypes/MachineValueType.h"
39 #include "llvm/Config/llvm-config.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/MC/MCInstrDesc.h"
42 #include "llvm/MC/MCRegisterInfo.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/CodeGen.h"
45 #include "llvm/Support/CommandLine.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/Debug.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/raw_ostream.h"
62 #define DEBUG_TYPE "pre-RA-sched"
64 STATISTIC(NumBacktracks
, "Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds
, "Number of nodes unfolded");
66 STATISTIC(NumDups
, "Number of duplicated nodes");
67 STATISTIC(NumPRCopies
, "Number of physical register copies");
69 static RegisterScheduler
70 burrListDAGScheduler("list-burr",
71 "Bottom-up register reduction list scheduling",
72 createBURRListDAGScheduler
);
74 static RegisterScheduler
75 sourceListDAGScheduler("source",
76 "Similar to list-burr but schedules in source "
77 "order when possible",
78 createSourceListDAGScheduler
);
80 static RegisterScheduler
81 hybridListDAGScheduler("list-hybrid",
82 "Bottom-up register pressure aware list scheduling "
83 "which tries to balance latency and register pressure",
84 createHybridListDAGScheduler
);
86 static RegisterScheduler
87 ILPListDAGScheduler("list-ilp",
88 "Bottom-up register pressure aware list scheduling "
89 "which tries to balance ILP and register pressure",
90 createILPListDAGScheduler
);
92 static cl::opt
<bool> DisableSchedCycles(
93 "disable-sched-cycles", cl::Hidden
, cl::init(false),
94 cl::desc("Disable cycle-level precision during preRA scheduling"));
96 // Temporary sched=list-ilp flags until the heuristics are robust.
97 // Some options are also available under sched=list-hybrid.
98 static cl::opt
<bool> DisableSchedRegPressure(
99 "disable-sched-reg-pressure", cl::Hidden
, cl::init(false),
100 cl::desc("Disable regpressure priority in sched=list-ilp"));
101 static cl::opt
<bool> DisableSchedLiveUses(
102 "disable-sched-live-uses", cl::Hidden
, cl::init(true),
103 cl::desc("Disable live use priority in sched=list-ilp"));
104 static cl::opt
<bool> DisableSchedVRegCycle(
105 "disable-sched-vrcycle", cl::Hidden
, cl::init(false),
106 cl::desc("Disable virtual register cycle interference checks"));
107 static cl::opt
<bool> DisableSchedPhysRegJoin(
108 "disable-sched-physreg-join", cl::Hidden
, cl::init(false),
109 cl::desc("Disable physreg def-use affinity"));
110 static cl::opt
<bool> DisableSchedStalls(
111 "disable-sched-stalls", cl::Hidden
, cl::init(true),
112 cl::desc("Disable no-stall priority in sched=list-ilp"));
113 static cl::opt
<bool> DisableSchedCriticalPath(
114 "disable-sched-critical-path", cl::Hidden
, cl::init(false),
115 cl::desc("Disable critical path priority in sched=list-ilp"));
116 static cl::opt
<bool> DisableSchedHeight(
117 "disable-sched-height", cl::Hidden
, cl::init(false),
118 cl::desc("Disable scheduled-height priority in sched=list-ilp"));
119 static cl::opt
<bool> Disable2AddrHack(
120 "disable-2addr-hack", cl::Hidden
, cl::init(true),
121 cl::desc("Disable scheduler's two-address hack"));
123 static cl::opt
<int> MaxReorderWindow(
124 "max-sched-reorder", cl::Hidden
, cl::init(6),
125 cl::desc("Number of instructions to allow ahead of the critical path "
126 "in sched=list-ilp"));
128 static cl::opt
<unsigned> AvgIPC(
129 "sched-avg-ipc", cl::Hidden
, cl::init(1),
130 cl::desc("Average inst/cycle whan no target itinerary exists."));
134 //===----------------------------------------------------------------------===//
135 /// ScheduleDAGRRList - The actual register reduction list scheduler
136 /// implementation. This supports both top-down and bottom-up scheduling.
138 class ScheduleDAGRRList
: public ScheduleDAGSDNodes
{
140 /// NeedLatency - True if the scheduler will make use of latency information.
143 /// AvailableQueue - The priority queue to use for the available SUnits.
144 SchedulingPriorityQueue
*AvailableQueue
;
146 /// PendingQueue - This contains all of the instructions whose operands have
147 /// been issued, but their results are not ready yet (due to the latency of
148 /// the operation). Once the operands becomes available, the instruction is
149 /// added to the AvailableQueue.
150 std::vector
<SUnit
*> PendingQueue
;
152 /// HazardRec - The hazard recognizer to use.
153 ScheduleHazardRecognizer
*HazardRec
;
155 /// CurCycle - The current scheduler state corresponds to this cycle.
156 unsigned CurCycle
= 0;
158 /// MinAvailableCycle - Cycle of the soonest available instruction.
159 unsigned MinAvailableCycle
= ~0u;
161 /// IssueCount - Count instructions issued in this cycle
162 /// Currently valid only for bottom-up scheduling.
163 unsigned IssueCount
= 0u;
165 /// LiveRegDefs - A set of physical registers and their definition
166 /// that are "live". These nodes must be scheduled before any other nodes that
167 /// modifies the registers can be scheduled.
168 unsigned NumLiveRegs
= 0u;
169 std::unique_ptr
<SUnit
*[]> LiveRegDefs
;
170 std::unique_ptr
<SUnit
*[]> LiveRegGens
;
172 // Collect interferences between physical register use/defs.
173 // Each interference is an SUnit and set of physical registers.
174 SmallVector
<SUnit
*, 4> Interferences
;
176 using LRegsMapT
= DenseMap
<SUnit
*, SmallVector
<unsigned, 4>>;
180 /// Topo - A topological ordering for SUnits which permits fast IsReachable
181 /// and similar queries.
182 ScheduleDAGTopologicalSort Topo
;
184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy
186 SmallDenseMap
<SUnit
*, SUnit
*, 16> CallSeqEndForStart
;
189 ScheduleDAGRRList(MachineFunction
&mf
, bool needlatency
,
190 SchedulingPriorityQueue
*availqueue
,
191 CodeGenOptLevel OptLevel
)
192 : ScheduleDAGSDNodes(mf
), NeedLatency(needlatency
),
193 AvailableQueue(availqueue
), Topo(SUnits
, nullptr) {
194 const TargetSubtargetInfo
&STI
= mf
.getSubtarget();
195 if (DisableSchedCycles
|| !NeedLatency
)
196 HazardRec
= new ScheduleHazardRecognizer();
198 HazardRec
= STI
.getInstrInfo()->CreateTargetHazardRecognizer(&STI
, this);
201 ~ScheduleDAGRRList() override
{
203 delete AvailableQueue
;
206 void Schedule() override
;
208 ScheduleHazardRecognizer
*getHazardRec() { return HazardRec
; }
210 /// IsReachable - Checks if SU is reachable from TargetSU.
211 bool IsReachable(const SUnit
*SU
, const SUnit
*TargetSU
) {
212 return Topo
.IsReachable(SU
, TargetSU
);
215 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217 bool WillCreateCycle(SUnit
*SU
, SUnit
*TargetSU
) {
218 return Topo
.WillCreateCycle(SU
, TargetSU
);
221 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
222 /// This returns true if this is a new predecessor.
223 /// Does *NOT* update the topological ordering! It just queues an update.
224 void AddPredQueued(SUnit
*SU
, const SDep
&D
) {
225 Topo
.AddPredQueued(SU
, D
.getSUnit());
229 /// AddPred - adds a predecessor edge to SUnit SU.
230 /// This returns true if this is a new predecessor.
231 /// Updates the topological ordering if required.
232 void AddPred(SUnit
*SU
, const SDep
&D
) {
233 Topo
.AddPred(SU
, D
.getSUnit());
237 /// RemovePred - removes a predecessor edge from SUnit SU.
238 /// This returns true if an edge was removed.
239 /// Updates the topological ordering if required.
240 void RemovePred(SUnit
*SU
, const SDep
&D
) {
241 Topo
.RemovePred(SU
, D
.getSUnit());
246 bool isReady(SUnit
*SU
) {
247 return DisableSchedCycles
|| !AvailableQueue
->hasReadyFilter() ||
248 AvailableQueue
->isReady(SU
);
251 void ReleasePred(SUnit
*SU
, const SDep
*PredEdge
);
252 void ReleasePredecessors(SUnit
*SU
);
253 void ReleasePending();
254 void AdvanceToCycle(unsigned NextCycle
);
255 void AdvancePastStalls(SUnit
*SU
);
256 void EmitNode(SUnit
*SU
);
257 void ScheduleNodeBottomUp(SUnit
*);
258 void CapturePred(SDep
*PredEdge
);
259 void UnscheduleNodeBottomUp(SUnit
*);
260 void RestoreHazardCheckerBottomUp();
261 void BacktrackBottomUp(SUnit
*, SUnit
*);
262 SUnit
*TryUnfoldSU(SUnit
*);
263 SUnit
*CopyAndMoveSuccessors(SUnit
*);
264 void InsertCopiesAndMoveSuccs(SUnit
*, unsigned,
265 const TargetRegisterClass
*,
266 const TargetRegisterClass
*,
267 SmallVectorImpl
<SUnit
*>&);
268 bool DelayForLiveRegsBottomUp(SUnit
*, SmallVectorImpl
<unsigned>&);
270 void releaseInterferences(unsigned Reg
= 0);
272 SUnit
*PickNodeToScheduleBottomUp();
273 void ListScheduleBottomUp();
275 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
276 SUnit
*CreateNewSUnit(SDNode
*N
) {
277 unsigned NumSUnits
= SUnits
.size();
278 SUnit
*NewNode
= newSUnit(N
);
279 // Update the topological ordering.
280 if (NewNode
->NodeNum
>= NumSUnits
)
281 Topo
.AddSUnitWithoutPredecessors(NewNode
);
285 /// CreateClone - Creates a new SUnit from an existing one.
286 SUnit
*CreateClone(SUnit
*N
) {
287 unsigned NumSUnits
= SUnits
.size();
288 SUnit
*NewNode
= Clone(N
);
289 // Update the topological ordering.
290 if (NewNode
->NodeNum
>= NumSUnits
)
291 Topo
.AddSUnitWithoutPredecessors(NewNode
);
295 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
296 /// need actual latency information but the hybrid scheduler does.
297 bool forceUnitLatencies() const override
{
302 } // end anonymous namespace
304 static constexpr unsigned RegSequenceCost
= 1;
306 /// GetCostForDef - Looks up the register class and cost for a given definition.
307 /// Typically this just means looking up the representative register class,
308 /// but for untyped values (MVT::Untyped) it means inspecting the node's
309 /// opcode to determine what register class is being generated.
310 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter
&RegDefPos
,
311 const TargetLowering
*TLI
,
312 const TargetInstrInfo
*TII
,
313 const TargetRegisterInfo
*TRI
,
314 unsigned &RegClass
, unsigned &Cost
,
315 const MachineFunction
&MF
) {
316 MVT VT
= RegDefPos
.GetValue();
318 // Special handling for untyped values. These values can only come from
319 // the expansion of custom DAG-to-DAG patterns.
320 if (VT
== MVT::Untyped
) {
321 const SDNode
*Node
= RegDefPos
.GetNode();
323 // Special handling for CopyFromReg of untyped values.
324 if (!Node
->isMachineOpcode() && Node
->getOpcode() == ISD::CopyFromReg
) {
325 Register Reg
= cast
<RegisterSDNode
>(Node
->getOperand(1))->getReg();
326 const TargetRegisterClass
*RC
= MF
.getRegInfo().getRegClass(Reg
);
327 RegClass
= RC
->getID();
332 unsigned Opcode
= Node
->getMachineOpcode();
333 if (Opcode
== TargetOpcode::REG_SEQUENCE
) {
334 unsigned DstRCIdx
= Node
->getConstantOperandVal(0);
335 const TargetRegisterClass
*RC
= TRI
->getRegClass(DstRCIdx
);
336 RegClass
= RC
->getID();
337 Cost
= RegSequenceCost
;
341 unsigned Idx
= RegDefPos
.GetIdx();
342 const MCInstrDesc
&Desc
= TII
->get(Opcode
);
343 const TargetRegisterClass
*RC
= TII
->getRegClass(Desc
, Idx
, TRI
, MF
);
344 assert(RC
&& "Not a valid register class");
345 RegClass
= RC
->getID();
346 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
347 // better way to determine it.
350 RegClass
= TLI
->getRepRegClassFor(VT
)->getID();
351 Cost
= TLI
->getRepRegClassCostFor(VT
);
355 /// Schedule - Schedule the DAG using list scheduling.
356 void ScheduleDAGRRList::Schedule() {
357 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB
)
358 << " '" << BB
->getName() << "' **********\n");
363 DisableSchedCycles
? 0 : std::numeric_limits
<unsigned>::max();
365 // Allocate slots for each physical register, plus one for a special register
366 // to track the virtual resource of a calling sequence.
367 LiveRegDefs
.reset(new SUnit
*[TRI
->getNumRegs() + 1]());
368 LiveRegGens
.reset(new SUnit
*[TRI
->getNumRegs() + 1]());
369 CallSeqEndForStart
.clear();
370 assert(Interferences
.empty() && LRegsMap
.empty() && "stale Interferences");
372 // Build the scheduling graph.
373 BuildSchedGraph(nullptr);
378 AvailableQueue
->initNodes(SUnits
);
382 // Execute the actual scheduling loop.
383 ListScheduleBottomUp();
385 AvailableQueue
->releaseState();
388 dbgs() << "*** Final schedule ***\n";
394 //===----------------------------------------------------------------------===//
395 // Bottom-Up Scheduling
396 //===----------------------------------------------------------------------===//
398 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
399 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
400 void ScheduleDAGRRList::ReleasePred(SUnit
*SU
, const SDep
*PredEdge
) {
401 SUnit
*PredSU
= PredEdge
->getSUnit();
404 if (PredSU
->NumSuccsLeft
== 0) {
405 dbgs() << "*** Scheduling failed! ***\n";
407 dbgs() << " has been released too many times!\n";
408 llvm_unreachable(nullptr);
411 --PredSU
->NumSuccsLeft
;
413 if (!forceUnitLatencies()) {
414 // Updating predecessor's height. This is now the cycle when the
415 // predecessor can be scheduled without causing a pipeline stall.
416 PredSU
->setHeightToAtLeast(SU
->getHeight() + PredEdge
->getLatency());
419 // If all the node's successors are scheduled, this node is ready
420 // to be scheduled. Ignore the special EntrySU node.
421 if (PredSU
->NumSuccsLeft
== 0 && PredSU
!= &EntrySU
) {
422 PredSU
->isAvailable
= true;
424 unsigned Height
= PredSU
->getHeight();
425 if (Height
< MinAvailableCycle
)
426 MinAvailableCycle
= Height
;
428 if (isReady(PredSU
)) {
429 AvailableQueue
->push(PredSU
);
431 // CapturePred and others may have left the node in the pending queue, avoid
433 else if (!PredSU
->isPending
) {
434 PredSU
->isPending
= true;
435 PendingQueue
.push_back(PredSU
);
440 /// IsChainDependent - Test if Outer is reachable from Inner through
441 /// chain dependencies.
442 static bool IsChainDependent(SDNode
*Outer
, SDNode
*Inner
,
444 const TargetInstrInfo
*TII
) {
449 // For a TokenFactor, examine each operand. There may be multiple ways
450 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
451 // most nesting in order to ensure that we find the corresponding match.
452 if (N
->getOpcode() == ISD::TokenFactor
) {
453 for (const SDValue
&Op
: N
->op_values())
454 if (IsChainDependent(Op
.getNode(), Inner
, NestLevel
, TII
))
458 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
459 if (N
->isMachineOpcode()) {
460 if (N
->getMachineOpcode() == TII
->getCallFrameDestroyOpcode()) {
462 } else if (N
->getMachineOpcode() == TII
->getCallFrameSetupOpcode()) {
468 // Otherwise, find the chain and continue climbing.
469 for (const SDValue
&Op
: N
->op_values())
470 if (Op
.getValueType() == MVT::Other
) {
472 goto found_chain_operand
;
475 found_chain_operand
:;
476 if (N
->getOpcode() == ISD::EntryToken
)
481 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
482 /// the corresponding (lowered) CALLSEQ_BEGIN node.
484 /// NestLevel and MaxNested are used in recursion to indcate the current level
485 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
486 /// level seen so far.
488 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
489 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
491 FindCallSeqStart(SDNode
*N
, unsigned &NestLevel
, unsigned &MaxNest
,
492 const TargetInstrInfo
*TII
) {
494 // For a TokenFactor, examine each operand. There may be multiple ways
495 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
496 // most nesting in order to ensure that we find the corresponding match.
497 if (N
->getOpcode() == ISD::TokenFactor
) {
498 SDNode
*Best
= nullptr;
499 unsigned BestMaxNest
= MaxNest
;
500 for (const SDValue
&Op
: N
->op_values()) {
501 unsigned MyNestLevel
= NestLevel
;
502 unsigned MyMaxNest
= MaxNest
;
503 if (SDNode
*New
= FindCallSeqStart(Op
.getNode(),
504 MyNestLevel
, MyMaxNest
, TII
))
505 if (!Best
|| (MyMaxNest
> BestMaxNest
)) {
507 BestMaxNest
= MyMaxNest
;
511 MaxNest
= BestMaxNest
;
514 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
515 if (N
->isMachineOpcode()) {
516 if (N
->getMachineOpcode() == TII
->getCallFrameDestroyOpcode()) {
518 MaxNest
= std::max(MaxNest
, NestLevel
);
519 } else if (N
->getMachineOpcode() == TII
->getCallFrameSetupOpcode()) {
520 assert(NestLevel
!= 0);
526 // Otherwise, find the chain and continue climbing.
527 for (const SDValue
&Op
: N
->op_values())
528 if (Op
.getValueType() == MVT::Other
) {
530 goto found_chain_operand
;
533 found_chain_operand
:;
534 if (N
->getOpcode() == ISD::EntryToken
)
539 /// Call ReleasePred for each predecessor, then update register live def/gen.
540 /// Always update LiveRegDefs for a register dependence even if the current SU
541 /// also defines the register. This effectively create one large live range
542 /// across a sequence of two-address node. This is important because the
543 /// entire chain must be scheduled together. Example:
546 /// flags = (2) addc flags
547 /// flags = (1) addc flags
551 /// LiveRegDefs[flags] = 3
552 /// LiveRegGens[flags] = 1
554 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
555 /// interference on flags.
556 void ScheduleDAGRRList::ReleasePredecessors(SUnit
*SU
) {
557 // Bottom up: release predecessors
558 for (SDep
&Pred
: SU
->Preds
) {
559 ReleasePred(SU
, &Pred
);
560 if (Pred
.isAssignedRegDep()) {
561 // This is a physical register dependency and it's impossible or
562 // expensive to copy the register. Make sure nothing that can
563 // clobber the register is scheduled between the predecessor and
565 SUnit
*RegDef
= LiveRegDefs
[Pred
.getReg()]; (void)RegDef
;
566 assert((!RegDef
|| RegDef
== SU
|| RegDef
== Pred
.getSUnit()) &&
567 "interference on register dependence");
568 LiveRegDefs
[Pred
.getReg()] = Pred
.getSUnit();
569 if (!LiveRegGens
[Pred
.getReg()]) {
571 LiveRegGens
[Pred
.getReg()] = SU
;
576 // If we're scheduling a lowered CALLSEQ_END, find the corresponding
577 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
578 // these nodes, to prevent other calls from being interscheduled with them.
579 unsigned CallResource
= TRI
->getNumRegs();
580 if (!LiveRegDefs
[CallResource
])
581 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getGluedNode())
582 if (Node
->isMachineOpcode() &&
583 Node
->getMachineOpcode() == TII
->getCallFrameDestroyOpcode()) {
584 unsigned NestLevel
= 0;
585 unsigned MaxNest
= 0;
586 SDNode
*N
= FindCallSeqStart(Node
, NestLevel
, MaxNest
, TII
);
587 assert(N
&& "Must find call sequence start");
589 SUnit
*Def
= &SUnits
[N
->getNodeId()];
590 CallSeqEndForStart
[Def
] = SU
;
593 LiveRegDefs
[CallResource
] = Def
;
594 LiveRegGens
[CallResource
] = SU
;
599 /// Check to see if any of the pending instructions are ready to issue. If
600 /// so, add them to the available queue.
601 void ScheduleDAGRRList::ReleasePending() {
602 if (DisableSchedCycles
) {
603 assert(PendingQueue
.empty() && "pending instrs not allowed in this mode");
607 // If the available queue is empty, it is safe to reset MinAvailableCycle.
608 if (AvailableQueue
->empty())
609 MinAvailableCycle
= std::numeric_limits
<unsigned>::max();
611 // Check to see if any of the pending instructions are ready to issue. If
612 // so, add them to the available queue.
613 for (unsigned i
= 0, e
= PendingQueue
.size(); i
!= e
; ++i
) {
614 unsigned ReadyCycle
= PendingQueue
[i
]->getHeight();
615 if (ReadyCycle
< MinAvailableCycle
)
616 MinAvailableCycle
= ReadyCycle
;
618 if (PendingQueue
[i
]->isAvailable
) {
619 if (!isReady(PendingQueue
[i
]))
621 AvailableQueue
->push(PendingQueue
[i
]);
623 PendingQueue
[i
]->isPending
= false;
624 PendingQueue
[i
] = PendingQueue
.back();
625 PendingQueue
.pop_back();
630 /// Move the scheduler state forward by the specified number of Cycles.
631 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle
) {
632 if (NextCycle
<= CurCycle
)
636 AvailableQueue
->setCurCycle(NextCycle
);
637 if (!HazardRec
->isEnabled()) {
638 // Bypass lots of virtual calls in case of long latency.
639 CurCycle
= NextCycle
;
642 for (; CurCycle
!= NextCycle
; ++CurCycle
) {
643 HazardRec
->RecedeCycle();
646 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
647 // available Q to release pending nodes at least once before popping.
651 /// Move the scheduler state forward until the specified node's dependents are
652 /// ready and can be scheduled with no resource conflicts.
653 void ScheduleDAGRRList::AdvancePastStalls(SUnit
*SU
) {
654 if (DisableSchedCycles
)
657 // FIXME: Nodes such as CopyFromReg probably should not advance the current
658 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
659 // has predecessors the cycle will be advanced when they are scheduled.
660 // But given the crude nature of modeling latency though such nodes, we
661 // currently need to treat these nodes like real instructions.
662 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
664 unsigned ReadyCycle
= SU
->getHeight();
666 // Bump CurCycle to account for latency. We assume the latency of other
667 // available instructions may be hidden by the stall (not a full pipe stall).
668 // This updates the hazard recognizer's cycle before reserving resources for
670 AdvanceToCycle(ReadyCycle
);
672 // Calls are scheduled in their preceding cycle, so don't conflict with
673 // hazards from instructions after the call. EmitNode will reset the
674 // scoreboard state before emitting the call.
678 // FIXME: For resource conflicts in very long non-pipelined stages, we
679 // should probably skip ahead here to avoid useless scoreboard checks.
682 ScheduleHazardRecognizer::HazardType HT
=
683 HazardRec
->getHazardType(SU
, -Stalls
);
685 if (HT
== ScheduleHazardRecognizer::NoHazard
)
690 AdvanceToCycle(CurCycle
+ Stalls
);
693 /// Record this SUnit in the HazardRecognizer.
694 /// Does not update CurCycle.
695 void ScheduleDAGRRList::EmitNode(SUnit
*SU
) {
696 if (!HazardRec
->isEnabled())
699 // Check for phys reg copy.
703 switch (SU
->getNode()->getOpcode()) {
705 assert(SU
->getNode()->isMachineOpcode() &&
706 "This target-independent node should not be scheduled.");
708 case ISD::MERGE_VALUES
:
709 case ISD::TokenFactor
:
710 case ISD::LIFETIME_START
:
711 case ISD::LIFETIME_END
:
713 case ISD::CopyFromReg
:
715 // Noops don't affect the scoreboard state. Copies are likely to be
719 case ISD::INLINEASM_BR
:
720 // For inline asm, clear the pipeline state.
725 // Calls are scheduled with their preceding instructions. For bottom-up
726 // scheduling, clear the pipeline state before emitting.
730 HazardRec
->EmitInstruction(SU
);
733 static void resetVRegCycle(SUnit
*SU
);
735 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
736 /// count of its predecessors. If a predecessor pending count is zero, add it to
737 /// the Available queue.
738 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit
*SU
) {
739 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle
<< "]: ");
740 LLVM_DEBUG(dumpNode(*SU
));
743 if (CurCycle
< SU
->getHeight())
744 LLVM_DEBUG(dbgs() << " Height [" << SU
->getHeight()
745 << "] pipeline stall!\n");
748 // FIXME: Do not modify node height. It may interfere with
749 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
750 // node its ready cycle can aid heuristics, and after scheduling it can
751 // indicate the scheduled cycle.
752 SU
->setHeightToAtLeast(CurCycle
);
754 // Reserve resources for the scheduled instruction.
757 Sequence
.push_back(SU
);
759 AvailableQueue
->scheduledNode(SU
);
761 // If HazardRec is disabled, and each inst counts as one cycle, then
762 // advance CurCycle before ReleasePredecessors to avoid useless pushes to
763 // PendingQueue for schedulers that implement HasReadyFilter.
764 if (!HazardRec
->isEnabled() && AvgIPC
< 2)
765 AdvanceToCycle(CurCycle
+ 1);
767 // Update liveness of predecessors before successors to avoid treating a
768 // two-address node as a live range def.
769 ReleasePredecessors(SU
);
771 // Release all the implicit physical register defs that are live.
772 for (SDep
&Succ
: SU
->Succs
) {
773 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
774 if (Succ
.isAssignedRegDep() && LiveRegDefs
[Succ
.getReg()] == SU
) {
775 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
777 LiveRegDefs
[Succ
.getReg()] = nullptr;
778 LiveRegGens
[Succ
.getReg()] = nullptr;
779 releaseInterferences(Succ
.getReg());
782 // Release the special call resource dependence, if this is the beginning
784 unsigned CallResource
= TRI
->getNumRegs();
785 if (LiveRegDefs
[CallResource
] == SU
)
786 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
787 SUNode
= SUNode
->getGluedNode()) {
788 if (SUNode
->isMachineOpcode() &&
789 SUNode
->getMachineOpcode() == TII
->getCallFrameSetupOpcode()) {
790 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
792 LiveRegDefs
[CallResource
] = nullptr;
793 LiveRegGens
[CallResource
] = nullptr;
794 releaseInterferences(CallResource
);
800 SU
->isScheduled
= true;
802 // Conditions under which the scheduler should eagerly advance the cycle:
803 // (1) No available instructions
804 // (2) All pipelines full, so available instructions must have hazards.
806 // If HazardRec is disabled, the cycle was pre-advanced before calling
807 // ReleasePredecessors. In that case, IssueCount should remain 0.
809 // Check AvailableQueue after ReleasePredecessors in case of zero latency.
810 if (HazardRec
->isEnabled() || AvgIPC
> 1) {
811 if (SU
->getNode() && SU
->getNode()->isMachineOpcode())
813 if ((HazardRec
->isEnabled() && HazardRec
->atIssueLimit())
814 || (!HazardRec
->isEnabled() && IssueCount
== AvgIPC
))
815 AdvanceToCycle(CurCycle
+ 1);
819 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
820 /// unscheduled, increase the succ left count of its predecessors. Remove
821 /// them from AvailableQueue if necessary.
822 void ScheduleDAGRRList::CapturePred(SDep
*PredEdge
) {
823 SUnit
*PredSU
= PredEdge
->getSUnit();
824 if (PredSU
->isAvailable
) {
825 PredSU
->isAvailable
= false;
826 if (!PredSU
->isPending
)
827 AvailableQueue
->remove(PredSU
);
830 assert(PredSU
->NumSuccsLeft
< std::numeric_limits
<unsigned>::max() &&
831 "NumSuccsLeft will overflow!");
832 ++PredSU
->NumSuccsLeft
;
835 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
836 /// its predecessor states to reflect the change.
837 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit
*SU
) {
838 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU
->getHeight() << "]: ");
839 LLVM_DEBUG(dumpNode(*SU
));
841 for (SDep
&Pred
: SU
->Preds
) {
843 if (Pred
.isAssignedRegDep() && SU
== LiveRegGens
[Pred
.getReg()]){
844 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
845 assert(LiveRegDefs
[Pred
.getReg()] == Pred
.getSUnit() &&
846 "Physical register dependency violated?");
848 LiveRegDefs
[Pred
.getReg()] = nullptr;
849 LiveRegGens
[Pred
.getReg()] = nullptr;
850 releaseInterferences(Pred
.getReg());
854 // Reclaim the special call resource dependence, if this is the beginning
856 unsigned CallResource
= TRI
->getNumRegs();
857 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
858 SUNode
= SUNode
->getGluedNode()) {
859 if (SUNode
->isMachineOpcode() &&
860 SUNode
->getMachineOpcode() == TII
->getCallFrameSetupOpcode()) {
861 SUnit
*SeqEnd
= CallSeqEndForStart
[SU
];
862 assert(SeqEnd
&& "Call sequence start/end must be known");
863 assert(!LiveRegDefs
[CallResource
]);
864 assert(!LiveRegGens
[CallResource
]);
866 LiveRegDefs
[CallResource
] = SU
;
867 LiveRegGens
[CallResource
] = SeqEnd
;
871 // Release the special call resource dependence, if this is the end
873 if (LiveRegGens
[CallResource
] == SU
)
874 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
875 SUNode
= SUNode
->getGluedNode()) {
876 if (SUNode
->isMachineOpcode() &&
877 SUNode
->getMachineOpcode() == TII
->getCallFrameDestroyOpcode()) {
878 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
879 assert(LiveRegDefs
[CallResource
]);
880 assert(LiveRegGens
[CallResource
]);
882 LiveRegDefs
[CallResource
] = nullptr;
883 LiveRegGens
[CallResource
] = nullptr;
884 releaseInterferences(CallResource
);
888 for (auto &Succ
: SU
->Succs
) {
889 if (Succ
.isAssignedRegDep()) {
890 auto Reg
= Succ
.getReg();
891 if (!LiveRegDefs
[Reg
])
893 // This becomes the nearest def. Note that an earlier def may still be
894 // pending if this is a two-address node.
895 LiveRegDefs
[Reg
] = SU
;
897 // Update LiveRegGen only if was empty before this unscheduling.
898 // This is to avoid incorrect updating LiveRegGen set in previous run.
899 if (!LiveRegGens
[Reg
]) {
900 // Find the successor with the lowest height.
901 LiveRegGens
[Reg
] = Succ
.getSUnit();
902 for (auto &Succ2
: SU
->Succs
) {
903 if (Succ2
.isAssignedRegDep() && Succ2
.getReg() == Reg
&&
904 Succ2
.getSUnit()->getHeight() < LiveRegGens
[Reg
]->getHeight())
905 LiveRegGens
[Reg
] = Succ2
.getSUnit();
910 if (SU
->getHeight() < MinAvailableCycle
)
911 MinAvailableCycle
= SU
->getHeight();
913 SU
->setHeightDirty();
914 SU
->isScheduled
= false;
915 SU
->isAvailable
= true;
916 if (!DisableSchedCycles
&& AvailableQueue
->hasReadyFilter()) {
917 // Don't make available until backtracking is complete.
918 SU
->isPending
= true;
919 PendingQueue
.push_back(SU
);
922 AvailableQueue
->push(SU
);
924 AvailableQueue
->unscheduledNode(SU
);
927 /// After backtracking, the hazard checker needs to be restored to a state
928 /// corresponding the current cycle.
929 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
932 unsigned LookAhead
= std::min((unsigned)Sequence
.size(),
933 HazardRec
->getMaxLookAhead());
937 std::vector
<SUnit
*>::const_iterator I
= (Sequence
.end() - LookAhead
);
938 unsigned HazardCycle
= (*I
)->getHeight();
939 for (auto E
= Sequence
.end(); I
!= E
; ++I
) {
941 for (; SU
->getHeight() > HazardCycle
; ++HazardCycle
) {
942 HazardRec
->RecedeCycle();
948 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
949 /// BTCycle in order to schedule a specific node.
950 void ScheduleDAGRRList::BacktrackBottomUp(SUnit
*SU
, SUnit
*BtSU
) {
951 SUnit
*OldSU
= Sequence
.back();
954 // FIXME: use ready cycle instead of height
955 CurCycle
= OldSU
->getHeight();
956 UnscheduleNodeBottomUp(OldSU
);
957 AvailableQueue
->setCurCycle(CurCycle
);
960 OldSU
= Sequence
.back();
963 assert(!SU
->isSucc(OldSU
) && "Something is wrong!");
965 RestoreHazardCheckerBottomUp();
972 static bool isOperandOf(const SUnit
*SU
, SDNode
*N
) {
973 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
974 SUNode
= SUNode
->getGluedNode()) {
975 if (SUNode
->isOperandOf(N
))
981 /// TryUnfold - Attempt to unfold
982 SUnit
*ScheduleDAGRRList::TryUnfoldSU(SUnit
*SU
) {
983 SDNode
*N
= SU
->getNode();
984 // Use while over if to ease fall through.
985 SmallVector
<SDNode
*, 2> NewNodes
;
986 if (!TII
->unfoldMemoryOperand(*DAG
, N
, NewNodes
))
989 assert(NewNodes
.size() == 2 && "Expected a load folding node!");
992 SDNode
*LoadNode
= NewNodes
[0];
993 unsigned NumVals
= N
->getNumValues();
994 unsigned OldNumVals
= SU
->getNode()->getNumValues();
996 // LoadNode may already exist. This can happen when there is another
997 // load from the same location and producing the same type of value
998 // but it has different alignment or volatileness.
999 bool isNewLoad
= true;
1001 if (LoadNode
->getNodeId() != -1) {
1002 LoadSU
= &SUnits
[LoadNode
->getNodeId()];
1003 // If LoadSU has already been scheduled, we should clone it but
1004 // this would negate the benefit to unfolding so just return SU.
1005 if (LoadSU
->isScheduled
)
1009 LoadSU
= CreateNewSUnit(LoadNode
);
1010 LoadNode
->setNodeId(LoadSU
->NodeNum
);
1012 InitNumRegDefsLeft(LoadSU
);
1013 computeLatency(LoadSU
);
1018 // This can only happen when isNewLoad is false.
1019 if (N
->getNodeId() != -1) {
1020 NewSU
= &SUnits
[N
->getNodeId()];
1021 // If NewSU has already been scheduled, we need to clone it, but this
1022 // negates the benefit to unfolding so just return SU.
1023 if (NewSU
->isScheduled
) {
1028 NewSU
= CreateNewSUnit(N
);
1029 N
->setNodeId(NewSU
->NodeNum
);
1031 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
1032 for (unsigned i
= 0; i
!= MCID
.getNumOperands(); ++i
) {
1033 if (MCID
.getOperandConstraint(i
, MCOI::TIED_TO
) != -1) {
1034 NewSU
->isTwoAddress
= true;
1038 if (MCID
.isCommutable())
1039 NewSU
->isCommutable
= true;
1041 InitNumRegDefsLeft(NewSU
);
1042 computeLatency(NewSU
);
1045 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU
->NodeNum
<< "\n");
1047 // Now that we are committed to unfolding replace DAG Uses.
1048 for (unsigned i
= 0; i
!= NumVals
; ++i
)
1049 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), i
), SDValue(N
, i
));
1050 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), OldNumVals
- 1),
1051 SDValue(LoadNode
, 1));
1053 // Record all the edges to and from the old SU, by category.
1054 SmallVector
<SDep
, 4> ChainPreds
;
1055 SmallVector
<SDep
, 4> ChainSuccs
;
1056 SmallVector
<SDep
, 4> LoadPreds
;
1057 SmallVector
<SDep
, 4> NodePreds
;
1058 SmallVector
<SDep
, 4> NodeSuccs
;
1059 for (SDep
&Pred
: SU
->Preds
) {
1061 ChainPreds
.push_back(Pred
);
1062 else if (isOperandOf(Pred
.getSUnit(), LoadNode
))
1063 LoadPreds
.push_back(Pred
);
1065 NodePreds
.push_back(Pred
);
1067 for (SDep
&Succ
: SU
->Succs
) {
1069 ChainSuccs
.push_back(Succ
);
1071 NodeSuccs
.push_back(Succ
);
1074 // Now assign edges to the newly-created nodes.
1075 for (const SDep
&Pred
: ChainPreds
) {
1076 RemovePred(SU
, Pred
);
1078 AddPredQueued(LoadSU
, Pred
);
1080 for (const SDep
&Pred
: LoadPreds
) {
1081 RemovePred(SU
, Pred
);
1083 AddPredQueued(LoadSU
, Pred
);
1085 for (const SDep
&Pred
: NodePreds
) {
1086 RemovePred(SU
, Pred
);
1087 AddPredQueued(NewSU
, Pred
);
1089 for (SDep
&D
: NodeSuccs
) {
1090 SUnit
*SuccDep
= D
.getSUnit();
1092 RemovePred(SuccDep
, D
);
1094 AddPredQueued(SuccDep
, D
);
1095 // Balance register pressure.
1096 if (AvailableQueue
->tracksRegPressure() && SuccDep
->isScheduled
&&
1097 !D
.isCtrl() && NewSU
->NumRegDefsLeft
> 0)
1098 --NewSU
->NumRegDefsLeft
;
1100 for (SDep
&D
: ChainSuccs
) {
1101 SUnit
*SuccDep
= D
.getSUnit();
1103 RemovePred(SuccDep
, D
);
1106 AddPredQueued(SuccDep
, D
);
1110 // Add a data dependency to reflect that NewSU reads the value defined
1112 SDep
D(LoadSU
, SDep::Data
, 0);
1113 D
.setLatency(LoadSU
->Latency
);
1114 AddPredQueued(NewSU
, D
);
1117 AvailableQueue
->addNode(LoadSU
);
1119 AvailableQueue
->addNode(NewSU
);
1123 if (NewSU
->NumSuccsLeft
== 0)
1124 NewSU
->isAvailable
= true;
1129 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1130 /// successors to the newly created node.
1131 SUnit
*ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit
*SU
) {
1132 SDNode
*N
= SU
->getNode();
1136 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1137 LLVM_DEBUG(dumpNode(*SU
));
1139 if (N
->getGluedNode() &&
1140 !TII
->canCopyGluedNodeDuringSchedule(N
)) {
1143 << "Giving up because it has incoming glue and the target does not "
1144 "want to copy it\n");
1149 bool TryUnfold
= false;
1150 for (unsigned i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
1151 MVT VT
= N
->getSimpleValueType(i
);
1152 if (VT
== MVT::Glue
) {
1153 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1155 } else if (VT
== MVT::Other
)
1158 for (const SDValue
&Op
: N
->op_values()) {
1159 MVT VT
= Op
.getNode()->getSimpleValueType(Op
.getResNo());
1160 if (VT
== MVT::Glue
&& !TII
->canCopyGluedNodeDuringSchedule(N
)) {
1162 dbgs() << "Giving up because it one of the operands is glue and "
1163 "the target does not want to copy it\n");
1168 // If possible unfold instruction.
1170 SUnit
*UnfoldSU
= TryUnfoldSU(SU
);
1175 // If this can be scheduled don't bother duplicating and just return
1176 if (SU
->NumSuccsLeft
== 0)
1180 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU
->NodeNum
<< "\n");
1181 NewSU
= CreateClone(SU
);
1183 // New SUnit has the exact same predecessors.
1184 for (SDep
&Pred
: SU
->Preds
)
1185 if (!Pred
.isArtificial())
1186 AddPredQueued(NewSU
, Pred
);
1188 // Make sure the clone comes after the original. (InstrEmitter assumes
1190 AddPredQueued(NewSU
, SDep(SU
, SDep::Artificial
));
1192 // Only copy scheduled successors. Cut them from old node's successor
1193 // list and move them over.
1194 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
1195 for (SDep
&Succ
: SU
->Succs
) {
1196 if (Succ
.isArtificial())
1198 SUnit
*SuccSU
= Succ
.getSUnit();
1199 if (SuccSU
->isScheduled
) {
1202 AddPredQueued(SuccSU
, D
);
1204 DelDeps
.emplace_back(SuccSU
, D
);
1207 for (const auto &[DelSU
, DelD
] : DelDeps
)
1208 RemovePred(DelSU
, DelD
);
1210 AvailableQueue
->updateNode(SU
);
1211 AvailableQueue
->addNode(NewSU
);
1217 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1218 /// scheduled successors of the given SUnit to the last copy.
1219 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit
*SU
, unsigned Reg
,
1220 const TargetRegisterClass
*DestRC
,
1221 const TargetRegisterClass
*SrcRC
,
1222 SmallVectorImpl
<SUnit
*> &Copies
) {
1223 SUnit
*CopyFromSU
= CreateNewSUnit(nullptr);
1224 CopyFromSU
->CopySrcRC
= SrcRC
;
1225 CopyFromSU
->CopyDstRC
= DestRC
;
1227 SUnit
*CopyToSU
= CreateNewSUnit(nullptr);
1228 CopyToSU
->CopySrcRC
= DestRC
;
1229 CopyToSU
->CopyDstRC
= SrcRC
;
1231 // Only copy scheduled successors. Cut them from old node's successor
1232 // list and move them over.
1233 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
1234 for (SDep
&Succ
: SU
->Succs
) {
1235 if (Succ
.isArtificial())
1237 SUnit
*SuccSU
= Succ
.getSUnit();
1238 if (SuccSU
->isScheduled
) {
1240 D
.setSUnit(CopyToSU
);
1241 AddPredQueued(SuccSU
, D
);
1242 DelDeps
.emplace_back(SuccSU
, Succ
);
1245 // Avoid scheduling the def-side copy before other successors. Otherwise,
1246 // we could introduce another physreg interference on the copy and
1247 // continue inserting copies indefinitely.
1248 AddPredQueued(SuccSU
, SDep(CopyFromSU
, SDep::Artificial
));
1251 for (const auto &[DelSU
, DelD
] : DelDeps
)
1252 RemovePred(DelSU
, DelD
);
1254 SDep
FromDep(SU
, SDep::Data
, Reg
);
1255 FromDep
.setLatency(SU
->Latency
);
1256 AddPredQueued(CopyFromSU
, FromDep
);
1257 SDep
ToDep(CopyFromSU
, SDep::Data
, 0);
1258 ToDep
.setLatency(CopyFromSU
->Latency
);
1259 AddPredQueued(CopyToSU
, ToDep
);
1261 AvailableQueue
->updateNode(SU
);
1262 AvailableQueue
->addNode(CopyFromSU
);
1263 AvailableQueue
->addNode(CopyToSU
);
1264 Copies
.push_back(CopyFromSU
);
1265 Copies
.push_back(CopyToSU
);
1270 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1271 /// definition of the specified node.
1272 /// FIXME: Move to SelectionDAG?
1273 static MVT
getPhysicalRegisterVT(SDNode
*N
, unsigned Reg
,
1274 const TargetInstrInfo
*TII
) {
1276 if (N
->getOpcode() == ISD::CopyFromReg
) {
1277 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1280 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
1281 assert(!MCID
.implicit_defs().empty() &&
1282 "Physical reg def must be in implicit def list!");
1283 NumRes
= MCID
.getNumDefs();
1284 for (MCPhysReg ImpDef
: MCID
.implicit_defs()) {
1290 return N
->getSimpleValueType(NumRes
);
1293 /// CheckForLiveRegDef - Return true and update live register vector if the
1294 /// specified register def of the specified SUnit clobbers any "live" registers.
1295 static void CheckForLiveRegDef(SUnit
*SU
, unsigned Reg
, SUnit
**LiveRegDefs
,
1296 SmallSet
<unsigned, 4> &RegAdded
,
1297 SmallVectorImpl
<unsigned> &LRegs
,
1298 const TargetRegisterInfo
*TRI
,
1299 const SDNode
*Node
= nullptr) {
1300 for (MCRegAliasIterator
AliasI(Reg
, TRI
, true); AliasI
.isValid(); ++AliasI
) {
1302 // Check if Ref is live.
1303 if (!LiveRegDefs
[*AliasI
]) continue;
1305 // Allow multiple uses of the same def.
1306 if (LiveRegDefs
[*AliasI
] == SU
) continue;
1308 // Allow multiple uses of same def
1309 if (Node
&& LiveRegDefs
[*AliasI
]->getNode() == Node
)
1312 // Add Reg to the set of interfering live regs.
1313 if (RegAdded
.insert(*AliasI
).second
) {
1314 LRegs
.push_back(*AliasI
);
1319 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1320 /// by RegMask, and add them to LRegs.
1321 static void CheckForLiveRegDefMasked(SUnit
*SU
, const uint32_t *RegMask
,
1322 ArrayRef
<SUnit
*> LiveRegDefs
,
1323 SmallSet
<unsigned, 4> &RegAdded
,
1324 SmallVectorImpl
<unsigned> &LRegs
) {
1325 // Look at all live registers. Skip Reg0 and the special CallResource.
1326 for (unsigned i
= 1, e
= LiveRegDefs
.size()-1; i
!= e
; ++i
) {
1327 if (!LiveRegDefs
[i
]) continue;
1328 if (LiveRegDefs
[i
] == SU
) continue;
1329 if (!MachineOperand::clobbersPhysReg(RegMask
, i
)) continue;
1330 if (RegAdded
.insert(i
).second
)
1335 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1336 static const uint32_t *getNodeRegMask(const SDNode
*N
) {
1337 for (const SDValue
&Op
: N
->op_values())
1338 if (const auto *RegOp
= dyn_cast
<RegisterMaskSDNode
>(Op
.getNode()))
1339 return RegOp
->getRegMask();
1343 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1344 /// scheduling of the given node to satisfy live physical register dependencies.
1345 /// If the specific node is the last one that's available to schedule, do
1346 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1347 bool ScheduleDAGRRList::
1348 DelayForLiveRegsBottomUp(SUnit
*SU
, SmallVectorImpl
<unsigned> &LRegs
) {
1349 if (NumLiveRegs
== 0)
1352 SmallSet
<unsigned, 4> RegAdded
;
1353 // If this node would clobber any "live" register, then it's not ready.
1355 // If SU is the currently live definition of the same register that it uses,
1356 // then we are free to schedule it.
1357 for (SDep
&Pred
: SU
->Preds
) {
1358 if (Pred
.isAssignedRegDep() && LiveRegDefs
[Pred
.getReg()] != SU
)
1359 CheckForLiveRegDef(Pred
.getSUnit(), Pred
.getReg(), LiveRegDefs
.get(),
1360 RegAdded
, LRegs
, TRI
);
1363 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getGluedNode()) {
1364 if (Node
->getOpcode() == ISD::INLINEASM
||
1365 Node
->getOpcode() == ISD::INLINEASM_BR
) {
1366 // Inline asm can clobber physical defs.
1367 unsigned NumOps
= Node
->getNumOperands();
1368 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Glue
)
1369 --NumOps
; // Ignore the glue operand.
1371 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
1372 unsigned Flags
= Node
->getConstantOperandVal(i
);
1373 const InlineAsm::Flag
F(Flags
);
1374 unsigned NumVals
= F
.getNumOperandRegisters();
1376 ++i
; // Skip the ID value.
1377 if (F
.isRegDefKind() || F
.isRegDefEarlyClobberKind() ||
1378 F
.isClobberKind()) {
1379 // Check for def of register or earlyclobber register.
1380 for (; NumVals
; --NumVals
, ++i
) {
1381 Register Reg
= cast
<RegisterSDNode
>(Node
->getOperand(i
))->getReg();
1382 if (Reg
.isPhysical())
1383 CheckForLiveRegDef(SU
, Reg
, LiveRegDefs
.get(), RegAdded
, LRegs
, TRI
);
1391 if (Node
->getOpcode() == ISD::CopyToReg
) {
1392 Register Reg
= cast
<RegisterSDNode
>(Node
->getOperand(1))->getReg();
1393 if (Reg
.isPhysical()) {
1394 SDNode
*SrcNode
= Node
->getOperand(2).getNode();
1395 CheckForLiveRegDef(SU
, Reg
, LiveRegDefs
.get(), RegAdded
, LRegs
, TRI
,
1400 if (!Node
->isMachineOpcode())
1402 // If we're in the middle of scheduling a call, don't begin scheduling
1403 // another call. Also, don't allow any physical registers to be live across
1405 if (Node
->getMachineOpcode() == TII
->getCallFrameDestroyOpcode()) {
1406 // Check the special calling-sequence resource.
1407 unsigned CallResource
= TRI
->getNumRegs();
1408 if (LiveRegDefs
[CallResource
]) {
1409 SDNode
*Gen
= LiveRegGens
[CallResource
]->getNode();
1410 while (SDNode
*Glued
= Gen
->getGluedNode())
1412 if (!IsChainDependent(Gen
, Node
, 0, TII
) &&
1413 RegAdded
.insert(CallResource
).second
)
1414 LRegs
.push_back(CallResource
);
1417 if (const uint32_t *RegMask
= getNodeRegMask(Node
))
1418 CheckForLiveRegDefMasked(SU
, RegMask
,
1419 ArrayRef(LiveRegDefs
.get(), TRI
->getNumRegs()),
1422 const MCInstrDesc
&MCID
= TII
->get(Node
->getMachineOpcode());
1423 if (MCID
.hasOptionalDef()) {
1424 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1425 // This operand can be either a def of CPSR, if the S bit is set; or a use
1426 // of %noreg. When the OptionalDef is set to a valid register, we need to
1427 // handle it in the same way as an ImplicitDef.
1428 for (unsigned i
= 0; i
< MCID
.getNumDefs(); ++i
)
1429 if (MCID
.operands()[i
].isOptionalDef()) {
1430 const SDValue
&OptionalDef
= Node
->getOperand(i
- Node
->getNumValues());
1431 Register Reg
= cast
<RegisterSDNode
>(OptionalDef
)->getReg();
1432 CheckForLiveRegDef(SU
, Reg
, LiveRegDefs
.get(), RegAdded
, LRegs
, TRI
);
1435 for (MCPhysReg Reg
: MCID
.implicit_defs())
1436 CheckForLiveRegDef(SU
, Reg
, LiveRegDefs
.get(), RegAdded
, LRegs
, TRI
);
1439 return !LRegs
.empty();
1442 void ScheduleDAGRRList::releaseInterferences(unsigned Reg
) {
1443 // Add the nodes that aren't ready back onto the available list.
1444 for (unsigned i
= Interferences
.size(); i
> 0; --i
) {
1445 SUnit
*SU
= Interferences
[i
-1];
1446 LRegsMapT::iterator LRegsPos
= LRegsMap
.find(SU
);
1448 SmallVectorImpl
<unsigned> &LRegs
= LRegsPos
->second
;
1449 if (!is_contained(LRegs
, Reg
))
1452 SU
->isPending
= false;
1453 // The interfering node may no longer be available due to backtracking.
1454 // Furthermore, it may have been made available again, in which case it is
1455 // now already in the AvailableQueue.
1456 if (SU
->isAvailable
&& !SU
->NodeQueueId
) {
1457 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU
->NodeNum
<< '\n');
1458 AvailableQueue
->push(SU
);
1460 if (i
< Interferences
.size())
1461 Interferences
[i
-1] = Interferences
.back();
1462 Interferences
.pop_back();
1463 LRegsMap
.erase(LRegsPos
);
1467 /// Return a node that can be scheduled in this cycle. Requirements:
1468 /// (1) Ready: latency has been satisfied
1469 /// (2) No Hazards: resources are available
1470 /// (3) No Interferences: may unschedule to break register interferences.
1471 SUnit
*ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1472 SUnit
*CurSU
= AvailableQueue
->empty() ? nullptr : AvailableQueue
->pop();
1473 auto FindAvailableNode
= [&]() {
1475 SmallVector
<unsigned, 4> LRegs
;
1476 if (!DelayForLiveRegsBottomUp(CurSU
, LRegs
))
1478 LLVM_DEBUG(dbgs() << " Interfering reg ";
1479 if (LRegs
[0] == TRI
->getNumRegs()) dbgs() << "CallResource";
1480 else dbgs() << printReg(LRegs
[0], TRI
);
1481 dbgs() << " SU #" << CurSU
->NodeNum
<< '\n');
1482 auto [LRegsIter
, LRegsInserted
] = LRegsMap
.try_emplace(CurSU
, LRegs
);
1483 if (LRegsInserted
) {
1484 CurSU
->isPending
= true; // This SU is not in AvailableQueue right now.
1485 Interferences
.push_back(CurSU
);
1488 assert(CurSU
->isPending
&& "Interferences are pending");
1489 // Update the interference with current live regs.
1490 LRegsIter
->second
= LRegs
;
1492 CurSU
= AvailableQueue
->pop();
1495 FindAvailableNode();
1499 // We query the topological order in the loop body, so make sure outstanding
1500 // updates are applied before entering it (we only enter the loop if there
1501 // are some interferences). If we make changes to the ordering, we exit
1504 // All candidates are delayed due to live physical reg dependencies.
1505 // Try backtracking, code duplication, or inserting cross class copies
1507 for (SUnit
*TrySU
: Interferences
) {
1508 SmallVectorImpl
<unsigned> &LRegs
= LRegsMap
[TrySU
];
1510 // Try unscheduling up to the point where it's safe to schedule
1512 SUnit
*BtSU
= nullptr;
1513 unsigned LiveCycle
= std::numeric_limits
<unsigned>::max();
1514 for (unsigned Reg
: LRegs
) {
1515 if (LiveRegGens
[Reg
]->getHeight() < LiveCycle
) {
1516 BtSU
= LiveRegGens
[Reg
];
1517 LiveCycle
= BtSU
->getHeight();
1520 if (!WillCreateCycle(TrySU
, BtSU
)) {
1521 // BacktrackBottomUp mutates Interferences!
1522 BacktrackBottomUp(TrySU
, BtSU
);
1524 // Force the current node to be scheduled before the node that
1525 // requires the physical reg dep.
1526 if (BtSU
->isAvailable
) {
1527 BtSU
->isAvailable
= false;
1528 if (!BtSU
->isPending
)
1529 AvailableQueue
->remove(BtSU
);
1531 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU
->NodeNum
1532 << ") to SU(" << TrySU
->NodeNum
<< ")\n");
1533 AddPredQueued(TrySU
, SDep(BtSU
, SDep::Artificial
));
1535 // If one or more successors has been unscheduled, then the current
1536 // node is no longer available.
1537 if (!TrySU
->isAvailable
|| !TrySU
->NodeQueueId
) {
1538 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1539 CurSU
= AvailableQueue
->pop();
1541 LLVM_DEBUG(dbgs() << "TrySU available\n");
1542 // Available and in AvailableQueue
1543 AvailableQueue
->remove(TrySU
);
1546 FindAvailableNode();
1547 // Interferences has been mutated. We must break.
1553 // Can't backtrack. If it's too expensive to copy the value, then try
1554 // duplicate the nodes that produces these "too expensive to copy"
1555 // values to break the dependency. In case even that doesn't work,
1556 // insert cross class copies.
1557 // If it's not too expensive, i.e. cost != -1, issue copies.
1558 SUnit
*TrySU
= Interferences
[0];
1559 SmallVectorImpl
<unsigned> &LRegs
= LRegsMap
[TrySU
];
1560 assert(LRegs
.size() == 1 && "Can't handle this yet!");
1561 unsigned Reg
= LRegs
[0];
1562 SUnit
*LRDef
= LiveRegDefs
[Reg
];
1563 MVT VT
= getPhysicalRegisterVT(LRDef
->getNode(), Reg
, TII
);
1564 const TargetRegisterClass
*RC
=
1565 TRI
->getMinimalPhysRegClass(Reg
, VT
);
1566 const TargetRegisterClass
*DestRC
= TRI
->getCrossCopyRegClass(RC
);
1568 // If cross copy register class is the same as RC, then it must be possible
1569 // copy the value directly. Do not try duplicate the def.
1570 // If cross copy register class is not the same as RC, then it's possible to
1571 // copy the value but it require cross register class copies and it is
1573 // If cross copy register class is null, then it's not possible to copy
1574 // the value at all.
1575 SUnit
*NewDef
= nullptr;
1577 NewDef
= CopyAndMoveSuccessors(LRDef
);
1578 if (!DestRC
&& !NewDef
)
1579 report_fatal_error("Can't handle live physical register dependency!");
1582 // Issue copies, these can be expensive cross register class copies.
1583 SmallVector
<SUnit
*, 2> Copies
;
1584 InsertCopiesAndMoveSuccs(LRDef
, Reg
, DestRC
, RC
, Copies
);
1585 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU
->NodeNum
1586 << " to SU #" << Copies
.front()->NodeNum
<< "\n");
1587 AddPredQueued(TrySU
, SDep(Copies
.front(), SDep::Artificial
));
1588 NewDef
= Copies
.back();
1591 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef
->NodeNum
1592 << " to SU #" << TrySU
->NodeNum
<< "\n");
1593 LiveRegDefs
[Reg
] = NewDef
;
1594 AddPredQueued(NewDef
, SDep(TrySU
, SDep::Artificial
));
1595 TrySU
->isAvailable
= false;
1598 assert(CurSU
&& "Unable to resolve live physical register dependencies!");
1602 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1604 void ScheduleDAGRRList::ListScheduleBottomUp() {
1605 // Release any predecessors of the special Exit node.
1606 ReleasePredecessors(&ExitSU
);
1608 // Add root to Available queue.
1609 if (!SUnits
.empty()) {
1610 SUnit
*RootSU
= &SUnits
[DAG
->getRoot().getNode()->getNodeId()];
1611 assert(RootSU
->Succs
.empty() && "Graph root shouldn't have successors!");
1612 RootSU
->isAvailable
= true;
1613 AvailableQueue
->push(RootSU
);
1616 // While Available queue is not empty, grab the node with the highest
1617 // priority. If it is not ready put it back. Schedule the node.
1618 Sequence
.reserve(SUnits
.size());
1619 while (!AvailableQueue
->empty() || !Interferences
.empty()) {
1620 LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1621 AvailableQueue
->dump(this));
1623 // Pick the best node to schedule taking all constraints into
1625 SUnit
*SU
= PickNodeToScheduleBottomUp();
1627 AdvancePastStalls(SU
);
1629 ScheduleNodeBottomUp(SU
);
1631 while (AvailableQueue
->empty() && !PendingQueue
.empty()) {
1632 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1633 assert(MinAvailableCycle
< std::numeric_limits
<unsigned>::max() &&
1634 "MinAvailableCycle uninitialized");
1635 AdvanceToCycle(std::max(CurCycle
+ 1, MinAvailableCycle
));
1639 // Reverse the order if it is bottom up.
1640 std::reverse(Sequence
.begin(), Sequence
.end());
1643 VerifyScheduledSequence(/*isBottomUp=*/true);
1649 class RegReductionPQBase
;
1652 bool isReady(SUnit
* SU
, unsigned CurCycle
) const { return true; }
1657 struct reverse_sort
: public queue_sort
{
1660 reverse_sort(SF
&sf
) : SortFunc(sf
) {}
1662 bool operator()(SUnit
* left
, SUnit
* right
) const {
1663 // reverse left/right rather than simply !SortFunc(left, right)
1664 // to expose different paths in the comparison logic.
1665 return SortFunc(right
, left
);
1670 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1671 // reduction scheduler.
1672 struct bu_ls_rr_sort
: public queue_sort
{
1675 HasReadyFilter
= false
1678 RegReductionPQBase
*SPQ
;
1680 bu_ls_rr_sort(RegReductionPQBase
*spq
) : SPQ(spq
) {}
1682 bool operator()(SUnit
* left
, SUnit
* right
) const;
1685 // src_ls_rr_sort - Priority function for source order scheduler.
1686 struct src_ls_rr_sort
: public queue_sort
{
1689 HasReadyFilter
= false
1692 RegReductionPQBase
*SPQ
;
1694 src_ls_rr_sort(RegReductionPQBase
*spq
) : SPQ(spq
) {}
1696 bool operator()(SUnit
* left
, SUnit
* right
) const;
1699 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1700 struct hybrid_ls_rr_sort
: public queue_sort
{
1703 HasReadyFilter
= false
1706 RegReductionPQBase
*SPQ
;
1708 hybrid_ls_rr_sort(RegReductionPQBase
*spq
) : SPQ(spq
) {}
1710 bool isReady(SUnit
*SU
, unsigned CurCycle
) const;
1712 bool operator()(SUnit
* left
, SUnit
* right
) const;
1715 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1717 struct ilp_ls_rr_sort
: public queue_sort
{
1720 HasReadyFilter
= false
1723 RegReductionPQBase
*SPQ
;
1725 ilp_ls_rr_sort(RegReductionPQBase
*spq
) : SPQ(spq
) {}
1727 bool isReady(SUnit
*SU
, unsigned CurCycle
) const;
1729 bool operator()(SUnit
* left
, SUnit
* right
) const;
1732 class RegReductionPQBase
: public SchedulingPriorityQueue
{
1734 std::vector
<SUnit
*> Queue
;
1735 unsigned CurQueueId
= 0;
1736 bool TracksRegPressure
;
1739 // SUnits - The SUnits for the current graph.
1740 std::vector
<SUnit
> *SUnits
= nullptr;
1742 MachineFunction
&MF
;
1743 const TargetInstrInfo
*TII
= nullptr;
1744 const TargetRegisterInfo
*TRI
= nullptr;
1745 const TargetLowering
*TLI
= nullptr;
1746 ScheduleDAGRRList
*scheduleDAG
= nullptr;
1748 // SethiUllmanNumbers - The SethiUllman number for each node.
1749 std::vector
<unsigned> SethiUllmanNumbers
;
1751 /// RegPressure - Tracking current reg pressure per register class.
1752 std::vector
<unsigned> RegPressure
;
1754 /// RegLimit - Tracking the number of allocatable registers per register
1756 std::vector
<unsigned> RegLimit
;
1759 RegReductionPQBase(MachineFunction
&mf
,
1760 bool hasReadyFilter
,
1763 const TargetInstrInfo
*tii
,
1764 const TargetRegisterInfo
*tri
,
1765 const TargetLowering
*tli
)
1766 : SchedulingPriorityQueue(hasReadyFilter
), TracksRegPressure(tracksrp
),
1767 SrcOrder(srcorder
), MF(mf
), TII(tii
), TRI(tri
), TLI(tli
) {
1768 if (TracksRegPressure
) {
1769 unsigned NumRC
= TRI
->getNumRegClasses();
1770 RegLimit
.resize(NumRC
);
1771 RegPressure
.resize(NumRC
);
1772 std::fill(RegLimit
.begin(), RegLimit
.end(), 0);
1773 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
1774 for (const TargetRegisterClass
*RC
: TRI
->regclasses())
1775 RegLimit
[RC
->getID()] = tri
->getRegPressureLimit(RC
, MF
);
1779 void setScheduleDAG(ScheduleDAGRRList
*scheduleDag
) {
1780 scheduleDAG
= scheduleDag
;
1783 ScheduleHazardRecognizer
* getHazardRec() {
1784 return scheduleDAG
->getHazardRec();
1787 void initNodes(std::vector
<SUnit
> &sunits
) override
;
1789 void addNode(const SUnit
*SU
) override
;
1791 void updateNode(const SUnit
*SU
) override
;
1793 void releaseState() override
{
1795 SethiUllmanNumbers
.clear();
1796 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
1799 unsigned getNodePriority(const SUnit
*SU
) const;
1801 unsigned getNodeOrdering(const SUnit
*SU
) const {
1802 if (!SU
->getNode()) return 0;
1804 return SU
->getNode()->getIROrder();
1807 bool empty() const override
{ return Queue
.empty(); }
1809 void push(SUnit
*U
) override
{
1810 assert(!U
->NodeQueueId
&& "Node in the queue already");
1811 U
->NodeQueueId
= ++CurQueueId
;
1815 void remove(SUnit
*SU
) override
{
1816 assert(!Queue
.empty() && "Queue is empty!");
1817 assert(SU
->NodeQueueId
!= 0 && "Not in queue!");
1818 std::vector
<SUnit
*>::iterator I
= llvm::find(Queue
, SU
);
1819 if (I
!= std::prev(Queue
.end()))
1820 std::swap(*I
, Queue
.back());
1822 SU
->NodeQueueId
= 0;
1825 bool tracksRegPressure() const override
{ return TracksRegPressure
; }
1827 void dumpRegPressure() const;
1829 bool HighRegPressure(const SUnit
*SU
) const;
1831 bool MayReduceRegPressure(SUnit
*SU
) const;
1833 int RegPressureDiff(SUnit
*SU
, unsigned &LiveUses
) const;
1835 void scheduledNode(SUnit
*SU
) override
;
1837 void unscheduledNode(SUnit
*SU
) override
;
1840 bool canClobber(const SUnit
*SU
, const SUnit
*Op
);
1841 void AddPseudoTwoAddrDeps();
1842 void PrescheduleNodesWithMultipleUses();
1843 void CalculateSethiUllmanNumbers();
1847 static SUnit
*popFromQueueImpl(std::vector
<SUnit
*> &Q
, SF
&Picker
) {
1848 unsigned BestIdx
= 0;
1849 // Only compute the cost for the first 1000 items in the queue, to avoid
1850 // excessive compile-times for very large queues.
1851 for (unsigned I
= 1, E
= std::min(Q
.size(), (decltype(Q
.size()))1000); I
!= E
;
1853 if (Picker(Q
[BestIdx
], Q
[I
]))
1855 SUnit
*V
= Q
[BestIdx
];
1856 if (BestIdx
+ 1 != Q
.size())
1857 std::swap(Q
[BestIdx
], Q
.back());
1863 SUnit
*popFromQueue(std::vector
<SUnit
*> &Q
, SF
&Picker
, ScheduleDAG
*DAG
) {
1865 if (DAG
->StressSched
) {
1866 reverse_sort
<SF
> RPicker(Picker
);
1867 return popFromQueueImpl(Q
, RPicker
);
1871 return popFromQueueImpl(Q
, Picker
);
1874 //===----------------------------------------------------------------------===//
1875 // RegReductionPriorityQueue Definition
1876 //===----------------------------------------------------------------------===//
1878 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1879 // to reduce register pressure.
1882 class RegReductionPriorityQueue
: public RegReductionPQBase
{
1886 RegReductionPriorityQueue(MachineFunction
&mf
,
1889 const TargetInstrInfo
*tii
,
1890 const TargetRegisterInfo
*tri
,
1891 const TargetLowering
*tli
)
1892 : RegReductionPQBase(mf
, SF::HasReadyFilter
, tracksrp
, srcorder
,
1896 bool isBottomUp() const override
{ return SF::IsBottomUp
; }
1898 bool isReady(SUnit
*U
) const override
{
1899 return Picker
.HasReadyFilter
&& Picker
.isReady(U
, getCurCycle());
1902 SUnit
*pop() override
{
1903 if (Queue
.empty()) return nullptr;
1905 SUnit
*V
= popFromQueue(Queue
, Picker
, scheduleDAG
);
1910 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1911 LLVM_DUMP_METHOD
void dump(ScheduleDAG
*DAG
) const override
{
1912 // Emulate pop() without clobbering NodeQueueIds.
1913 std::vector
<SUnit
*> DumpQueue
= Queue
;
1914 SF DumpPicker
= Picker
;
1915 while (!DumpQueue
.empty()) {
1916 SUnit
*SU
= popFromQueue(DumpQueue
, DumpPicker
, scheduleDAG
);
1917 dbgs() << "Height " << SU
->getHeight() << ": ";
1924 using BURegReductionPriorityQueue
= RegReductionPriorityQueue
<bu_ls_rr_sort
>;
1925 using SrcRegReductionPriorityQueue
= RegReductionPriorityQueue
<src_ls_rr_sort
>;
1926 using HybridBURRPriorityQueue
= RegReductionPriorityQueue
<hybrid_ls_rr_sort
>;
1927 using ILPBURRPriorityQueue
= RegReductionPriorityQueue
<ilp_ls_rr_sort
>;
1929 } // end anonymous namespace
1931 //===----------------------------------------------------------------------===//
1932 // Static Node Priority for Register Pressure Reduction
1933 //===----------------------------------------------------------------------===//
1935 // Check for special nodes that bypass scheduling heuristics.
1936 // Currently this pushes TokenFactor nodes down, but may be used for other
1937 // pseudo-ops as well.
1939 // Return -1 to schedule right above left, 1 for left above right.
1940 // Return 0 if no bias exists.
1941 static int checkSpecialNodes(const SUnit
*left
, const SUnit
*right
) {
1942 bool LSchedLow
= left
->isScheduleLow
;
1943 bool RSchedLow
= right
->isScheduleLow
;
1944 if (LSchedLow
!= RSchedLow
)
1945 return LSchedLow
< RSchedLow
? 1 : -1;
1949 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1950 /// Smaller number is the higher priority.
1952 CalcNodeSethiUllmanNumber(const SUnit
*SU
, std::vector
<unsigned> &SUNumbers
) {
1953 if (SUNumbers
[SU
->NodeNum
] != 0)
1954 return SUNumbers
[SU
->NodeNum
];
1956 // Use WorkList to avoid stack overflow on excessively large IRs.
1958 WorkState(const SUnit
*SU
) : SU(SU
) {}
1960 unsigned PredsProcessed
= 0;
1963 SmallVector
<WorkState
, 16> WorkList
;
1964 WorkList
.push_back(SU
);
1965 while (!WorkList
.empty()) {
1966 auto &Temp
= WorkList
.back();
1967 auto *TempSU
= Temp
.SU
;
1968 bool AllPredsKnown
= true;
1969 // Try to find a non-evaluated pred and push it into the processing stack.
1970 for (unsigned P
= Temp
.PredsProcessed
; P
< TempSU
->Preds
.size(); ++P
) {
1971 auto &Pred
= TempSU
->Preds
[P
];
1972 if (Pred
.isCtrl()) continue; // ignore chain preds
1973 SUnit
*PredSU
= Pred
.getSUnit();
1974 if (SUNumbers
[PredSU
->NodeNum
] == 0) {
1976 // In debug mode, check that we don't have such element in the stack.
1977 for (auto It
: WorkList
)
1978 assert(It
.SU
!= PredSU
&& "Trying to push an element twice?");
1980 // Next time start processing this one starting from the next pred.
1981 Temp
.PredsProcessed
= P
+ 1;
1982 WorkList
.push_back(PredSU
);
1983 AllPredsKnown
= false;
1991 // Once all preds are known, we can calculate the answer for this one.
1992 unsigned SethiUllmanNumber
= 0;
1994 for (const SDep
&Pred
: TempSU
->Preds
) {
1995 if (Pred
.isCtrl()) continue; // ignore chain preds
1996 SUnit
*PredSU
= Pred
.getSUnit();
1997 unsigned PredSethiUllman
= SUNumbers
[PredSU
->NodeNum
];
1998 assert(PredSethiUllman
> 0 && "We should have evaluated this pred!");
1999 if (PredSethiUllman
> SethiUllmanNumber
) {
2000 SethiUllmanNumber
= PredSethiUllman
;
2002 } else if (PredSethiUllman
== SethiUllmanNumber
)
2006 SethiUllmanNumber
+= Extra
;
2007 if (SethiUllmanNumber
== 0)
2008 SethiUllmanNumber
= 1;
2009 SUNumbers
[TempSU
->NodeNum
] = SethiUllmanNumber
;
2010 WorkList
.pop_back();
2013 assert(SUNumbers
[SU
->NodeNum
] > 0 && "SethiUllman should never be zero!");
2014 return SUNumbers
[SU
->NodeNum
];
2017 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2018 /// scheduling units.
2019 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2020 SethiUllmanNumbers
.assign(SUnits
->size(), 0);
2022 for (const SUnit
&SU
: *SUnits
)
2023 CalcNodeSethiUllmanNumber(&SU
, SethiUllmanNumbers
);
2026 void RegReductionPQBase::addNode(const SUnit
*SU
) {
2027 unsigned SUSize
= SethiUllmanNumbers
.size();
2028 if (SUnits
->size() > SUSize
)
2029 SethiUllmanNumbers
.resize(SUSize
*2, 0);
2030 CalcNodeSethiUllmanNumber(SU
, SethiUllmanNumbers
);
2033 void RegReductionPQBase::updateNode(const SUnit
*SU
) {
2034 SethiUllmanNumbers
[SU
->NodeNum
] = 0;
2035 CalcNodeSethiUllmanNumber(SU
, SethiUllmanNumbers
);
2038 // Lower priority means schedule further down. For bottom-up scheduling, lower
2039 // priority SUs are scheduled before higher priority SUs.
2040 unsigned RegReductionPQBase::getNodePriority(const SUnit
*SU
) const {
2041 assert(SU
->NodeNum
< SethiUllmanNumbers
.size());
2042 unsigned Opc
= SU
->getNode() ? SU
->getNode()->getOpcode() : 0;
2043 if (Opc
== ISD::TokenFactor
|| Opc
== ISD::CopyToReg
)
2044 // CopyToReg should be close to its uses to facilitate coalescing and
2047 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
2048 Opc
== TargetOpcode::SUBREG_TO_REG
||
2049 Opc
== TargetOpcode::INSERT_SUBREG
)
2050 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2051 // close to their uses to facilitate coalescing.
2053 if (SU
->NumSuccs
== 0 && SU
->NumPreds
!= 0)
2054 // If SU does not have a register use, i.e. it doesn't produce a value
2055 // that would be consumed (e.g. store), then it terminates a chain of
2056 // computation. Give it a large SethiUllman number so it will be
2057 // scheduled right before its predecessors that it doesn't lengthen
2058 // their live ranges.
2060 if (SU
->NumPreds
== 0 && SU
->NumSuccs
!= 0)
2061 // If SU does not have a register def, schedule it close to its uses
2062 // because it does not lengthen any live ranges.
2065 return SethiUllmanNumbers
[SU
->NodeNum
];
2067 unsigned Priority
= SethiUllmanNumbers
[SU
->NodeNum
];
2069 // FIXME: This assumes all of the defs are used as call operands.
2070 int NP
= (int)Priority
- SU
->getNode()->getNumValues();
2071 return (NP
> 0) ? NP
: 0;
2077 //===----------------------------------------------------------------------===//
2078 // Register Pressure Tracking
2079 //===----------------------------------------------------------------------===//
2081 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2082 LLVM_DUMP_METHOD
void RegReductionPQBase::dumpRegPressure() const {
2083 for (const TargetRegisterClass
*RC
: TRI
->regclasses()) {
2084 unsigned Id
= RC
->getID();
2085 unsigned RP
= RegPressure
[Id
];
2087 LLVM_DEBUG(dbgs() << TRI
->getRegClassName(RC
) << ": " << RP
<< " / "
2088 << RegLimit
[Id
] << '\n');
2093 bool RegReductionPQBase::HighRegPressure(const SUnit
*SU
) const {
2097 for (const SDep
&Pred
: SU
->Preds
) {
2100 SUnit
*PredSU
= Pred
.getSUnit();
2101 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2102 // to cover the number of registers defined (they are all live).
2103 if (PredSU
->NumRegDefsLeft
== 0) {
2106 for (ScheduleDAGSDNodes::RegDefIter
RegDefPos(PredSU
, scheduleDAG
);
2107 RegDefPos
.IsValid(); RegDefPos
.Advance()) {
2108 unsigned RCId
, Cost
;
2109 GetCostForDef(RegDefPos
, TLI
, TII
, TRI
, RCId
, Cost
, MF
);
2111 if ((RegPressure
[RCId
] + Cost
) >= RegLimit
[RCId
])
2118 bool RegReductionPQBase::MayReduceRegPressure(SUnit
*SU
) const {
2119 const SDNode
*N
= SU
->getNode();
2121 if (!N
->isMachineOpcode() || !SU
->NumSuccs
)
2124 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
2125 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
2126 MVT VT
= N
->getSimpleValueType(i
);
2127 if (!N
->hasAnyUseOfValue(i
))
2129 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2130 if (RegPressure
[RCId
] >= RegLimit
[RCId
])
2136 // Compute the register pressure contribution by this instruction by count up
2137 // for uses that are not live and down for defs. Only count register classes
2138 // that are already under high pressure. As a side effect, compute the number of
2139 // uses of registers that are already live.
2141 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2142 // so could probably be factored.
2143 int RegReductionPQBase::RegPressureDiff(SUnit
*SU
, unsigned &LiveUses
) const {
2146 for (const SDep
&Pred
: SU
->Preds
) {
2149 SUnit
*PredSU
= Pred
.getSUnit();
2150 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2151 // to cover the number of registers defined (they are all live).
2152 if (PredSU
->NumRegDefsLeft
== 0) {
2153 if (PredSU
->getNode()->isMachineOpcode())
2157 for (ScheduleDAGSDNodes::RegDefIter
RegDefPos(PredSU
, scheduleDAG
);
2158 RegDefPos
.IsValid(); RegDefPos
.Advance()) {
2159 MVT VT
= RegDefPos
.GetValue();
2160 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2161 if (RegPressure
[RCId
] >= RegLimit
[RCId
])
2165 const SDNode
*N
= SU
->getNode();
2167 if (!N
|| !N
->isMachineOpcode() || !SU
->NumSuccs
)
2170 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
2171 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
2172 MVT VT
= N
->getSimpleValueType(i
);
2173 if (!N
->hasAnyUseOfValue(i
))
2175 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2176 if (RegPressure
[RCId
] >= RegLimit
[RCId
])
2182 void RegReductionPQBase::scheduledNode(SUnit
*SU
) {
2183 if (!TracksRegPressure
)
2189 for (const SDep
&Pred
: SU
->Preds
) {
2192 SUnit
*PredSU
= Pred
.getSUnit();
2193 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2194 // to cover the number of registers defined (they are all live).
2195 if (PredSU
->NumRegDefsLeft
== 0) {
2198 // FIXME: The ScheduleDAG currently loses information about which of a
2199 // node's values is consumed by each dependence. Consequently, if the node
2200 // defines multiple register classes, we don't know which to pressurize
2201 // here. Instead the following loop consumes the register defs in an
2202 // arbitrary order. At least it handles the common case of clustered loads
2203 // to the same class. For precise liveness, each SDep needs to indicate the
2204 // result number. But that tightly couples the ScheduleDAG with the
2205 // SelectionDAG making updates tricky. A simpler hack would be to attach a
2206 // value type or register class to SDep.
2208 // The most important aspect of register tracking is balancing the increase
2209 // here with the reduction further below. Note that this SU may use multiple
2210 // defs in PredSU. The can't be determined here, but we've already
2211 // compensated by reducing NumRegDefsLeft in PredSU during
2212 // ScheduleDAGSDNodes::AddSchedEdges.
2213 --PredSU
->NumRegDefsLeft
;
2214 unsigned SkipRegDefs
= PredSU
->NumRegDefsLeft
;
2215 for (ScheduleDAGSDNodes::RegDefIter
RegDefPos(PredSU
, scheduleDAG
);
2216 RegDefPos
.IsValid(); RegDefPos
.Advance(), --SkipRegDefs
) {
2220 unsigned RCId
, Cost
;
2221 GetCostForDef(RegDefPos
, TLI
, TII
, TRI
, RCId
, Cost
, MF
);
2222 RegPressure
[RCId
] += Cost
;
2227 // We should have this assert, but there may be dead SDNodes that never
2228 // materialize as SUnits, so they don't appear to generate liveness.
2229 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2230 int SkipRegDefs
= (int)SU
->NumRegDefsLeft
;
2231 for (ScheduleDAGSDNodes::RegDefIter
RegDefPos(SU
, scheduleDAG
);
2232 RegDefPos
.IsValid(); RegDefPos
.Advance(), --SkipRegDefs
) {
2233 if (SkipRegDefs
> 0)
2235 unsigned RCId
, Cost
;
2236 GetCostForDef(RegDefPos
, TLI
, TII
, TRI
, RCId
, Cost
, MF
);
2237 if (RegPressure
[RCId
] < Cost
) {
2238 // Register pressure tracking is imprecise. This can happen. But we try
2239 // hard not to let it happen because it likely results in poor scheduling.
2240 LLVM_DEBUG(dbgs() << " SU(" << SU
->NodeNum
2241 << ") has too many regdefs\n");
2242 RegPressure
[RCId
] = 0;
2245 RegPressure
[RCId
] -= Cost
;
2248 LLVM_DEBUG(dumpRegPressure());
2251 void RegReductionPQBase::unscheduledNode(SUnit
*SU
) {
2252 if (!TracksRegPressure
)
2255 const SDNode
*N
= SU
->getNode();
2258 if (!N
->isMachineOpcode()) {
2259 if (N
->getOpcode() != ISD::CopyToReg
)
2262 unsigned Opc
= N
->getMachineOpcode();
2263 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
2264 Opc
== TargetOpcode::INSERT_SUBREG
||
2265 Opc
== TargetOpcode::SUBREG_TO_REG
||
2266 Opc
== TargetOpcode::REG_SEQUENCE
||
2267 Opc
== TargetOpcode::IMPLICIT_DEF
)
2271 for (const SDep
&Pred
: SU
->Preds
) {
2274 SUnit
*PredSU
= Pred
.getSUnit();
2275 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2276 // counts data deps.
2277 if (PredSU
->NumSuccsLeft
!= PredSU
->Succs
.size())
2279 const SDNode
*PN
= PredSU
->getNode();
2280 if (!PN
->isMachineOpcode()) {
2281 if (PN
->getOpcode() == ISD::CopyFromReg
) {
2282 MVT VT
= PN
->getSimpleValueType(0);
2283 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2284 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
2288 unsigned POpc
= PN
->getMachineOpcode();
2289 if (POpc
== TargetOpcode::IMPLICIT_DEF
)
2291 if (POpc
== TargetOpcode::EXTRACT_SUBREG
||
2292 POpc
== TargetOpcode::INSERT_SUBREG
||
2293 POpc
== TargetOpcode::SUBREG_TO_REG
) {
2294 MVT VT
= PN
->getSimpleValueType(0);
2295 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2296 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
2299 if (POpc
== TargetOpcode::REG_SEQUENCE
) {
2300 unsigned DstRCIdx
= PN
->getConstantOperandVal(0);
2301 const TargetRegisterClass
*RC
= TRI
->getRegClass(DstRCIdx
);
2302 unsigned RCId
= RC
->getID();
2303 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
2304 // here. Instead use the same constant as in GetCostForDef.
2305 RegPressure
[RCId
] += RegSequenceCost
;
2308 unsigned NumDefs
= TII
->get(PN
->getMachineOpcode()).getNumDefs();
2309 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
2310 MVT VT
= PN
->getSimpleValueType(i
);
2311 if (!PN
->hasAnyUseOfValue(i
))
2313 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2314 if (RegPressure
[RCId
] < TLI
->getRepRegClassCostFor(VT
))
2315 // Register pressure tracking is imprecise. This can happen.
2316 RegPressure
[RCId
] = 0;
2318 RegPressure
[RCId
] -= TLI
->getRepRegClassCostFor(VT
);
2322 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2323 // may transfer data dependencies to CopyToReg.
2324 if (SU
->NumSuccs
&& N
->isMachineOpcode()) {
2325 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
2326 for (unsigned i
= NumDefs
, e
= N
->getNumValues(); i
!= e
; ++i
) {
2327 MVT VT
= N
->getSimpleValueType(i
);
2328 if (VT
== MVT::Glue
|| VT
== MVT::Other
)
2330 if (!N
->hasAnyUseOfValue(i
))
2332 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2333 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
2337 LLVM_DEBUG(dumpRegPressure());
2340 //===----------------------------------------------------------------------===//
2341 // Dynamic Node Priority for Register Pressure Reduction
2342 //===----------------------------------------------------------------------===//
2344 /// closestSucc - Returns the scheduled cycle of the successor which is
2345 /// closest to the current cycle.
2346 static unsigned closestSucc(const SUnit
*SU
) {
2347 unsigned MaxHeight
= 0;
2348 for (const SDep
&Succ
: SU
->Succs
) {
2349 if (Succ
.isCtrl()) continue; // ignore chain succs
2350 unsigned Height
= Succ
.getSUnit()->getHeight();
2351 // If there are bunch of CopyToRegs stacked up, they should be considered
2352 // to be at the same position.
2353 if (Succ
.getSUnit()->getNode() &&
2354 Succ
.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg
)
2355 Height
= closestSucc(Succ
.getSUnit())+1;
2356 if (Height
> MaxHeight
)
2362 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2363 /// for scratch registers, i.e. number of data dependencies.
2364 static unsigned calcMaxScratches(const SUnit
*SU
) {
2365 unsigned Scratches
= 0;
2366 for (const SDep
&Pred
: SU
->Preds
) {
2367 if (Pred
.isCtrl()) continue; // ignore chain preds
2373 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2374 /// CopyFromReg from a virtual register.
2375 static bool hasOnlyLiveInOpers(const SUnit
*SU
) {
2376 bool RetVal
= false;
2377 for (const SDep
&Pred
: SU
->Preds
) {
2378 if (Pred
.isCtrl()) continue;
2379 const SUnit
*PredSU
= Pred
.getSUnit();
2380 if (PredSU
->getNode() &&
2381 PredSU
->getNode()->getOpcode() == ISD::CopyFromReg
) {
2383 cast
<RegisterSDNode
>(PredSU
->getNode()->getOperand(1))->getReg();
2384 if (Reg
.isVirtual()) {
2394 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2395 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2396 /// it has no other use. It should be scheduled closer to the terminator.
2397 static bool hasOnlyLiveOutUses(const SUnit
*SU
) {
2398 bool RetVal
= false;
2399 for (const SDep
&Succ
: SU
->Succs
) {
2400 if (Succ
.isCtrl()) continue;
2401 const SUnit
*SuccSU
= Succ
.getSUnit();
2402 if (SuccSU
->getNode() && SuccSU
->getNode()->getOpcode() == ISD::CopyToReg
) {
2404 cast
<RegisterSDNode
>(SuccSU
->getNode()->getOperand(1))->getReg();
2405 if (Reg
.isVirtual()) {
2415 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2416 // set isVRegCycle for its CopyFromReg operands.
2418 // This is only relevant for single-block loops, in which case the VRegCycle
2419 // node is likely an induction variable in which the operand and target virtual
2420 // registers should be coalesced (e.g. pre/post increment values). Setting the
2421 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2422 // CopyFromReg so that this node becomes the virtual register "kill". This
2423 // avoids interference between the values live in and out of the block and
2424 // eliminates a copy inside the loop.
2425 static void initVRegCycle(SUnit
*SU
) {
2426 if (DisableSchedVRegCycle
)
2429 if (!hasOnlyLiveInOpers(SU
) || !hasOnlyLiveOutUses(SU
))
2432 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU
->NodeNum
<< ")\n");
2434 SU
->isVRegCycle
= true;
2436 for (const SDep
&Pred
: SU
->Preds
) {
2437 if (Pred
.isCtrl()) continue;
2438 Pred
.getSUnit()->isVRegCycle
= true;
2442 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2443 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2444 static void resetVRegCycle(SUnit
*SU
) {
2445 if (!SU
->isVRegCycle
)
2448 for (const SDep
&Pred
: SU
->Preds
) {
2449 if (Pred
.isCtrl()) continue; // ignore chain preds
2450 SUnit
*PredSU
= Pred
.getSUnit();
2451 if (PredSU
->isVRegCycle
) {
2452 assert(PredSU
->getNode()->getOpcode() == ISD::CopyFromReg
&&
2453 "VRegCycle def must be CopyFromReg");
2454 Pred
.getSUnit()->isVRegCycle
= false;
2459 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2460 // means a node that defines the VRegCycle has not been scheduled yet.
2461 static bool hasVRegCycleUse(const SUnit
*SU
) {
2462 // If this SU also defines the VReg, don't hoist it as a "use".
2463 if (SU
->isVRegCycle
)
2466 for (const SDep
&Pred
: SU
->Preds
) {
2467 if (Pred
.isCtrl()) continue; // ignore chain preds
2468 if (Pred
.getSUnit()->isVRegCycle
&&
2469 Pred
.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg
) {
2470 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU
->NodeNum
<< ")\n");
2477 // Check for either a dependence (latency) or resource (hazard) stall.
2479 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2480 static bool BUHasStall(SUnit
*SU
, int Height
, RegReductionPQBase
*SPQ
) {
2481 if ((int)SPQ
->getCurCycle() < Height
) return true;
2482 if (SPQ
->getHazardRec()->getHazardType(SU
, 0)
2483 != ScheduleHazardRecognizer::NoHazard
)
2488 // Return -1 if left has higher priority, 1 if right has higher priority.
2489 // Return 0 if latency-based priority is equivalent.
2490 static int BUCompareLatency(SUnit
*left
, SUnit
*right
, bool checkPref
,
2491 RegReductionPQBase
*SPQ
) {
2492 // Scheduling an instruction that uses a VReg whose postincrement has not yet
2493 // been scheduled will induce a copy. Model this as an extra cycle of latency.
2494 int LPenalty
= hasVRegCycleUse(left
) ? 1 : 0;
2495 int RPenalty
= hasVRegCycleUse(right
) ? 1 : 0;
2496 int LHeight
= (int)left
->getHeight() + LPenalty
;
2497 int RHeight
= (int)right
->getHeight() + RPenalty
;
2499 bool LStall
= (!checkPref
|| left
->SchedulingPref
== Sched::ILP
) &&
2500 BUHasStall(left
, LHeight
, SPQ
);
2501 bool RStall
= (!checkPref
|| right
->SchedulingPref
== Sched::ILP
) &&
2502 BUHasStall(right
, RHeight
, SPQ
);
2504 // If scheduling one of the node will cause a pipeline stall, delay it.
2505 // If scheduling either one of the node will cause a pipeline stall, sort
2506 // them according to their height.
2510 if (LHeight
!= RHeight
)
2511 return LHeight
> RHeight
? 1 : -1;
2515 // If either node is scheduling for latency, sort them by height/depth
2517 if (!checkPref
|| (left
->SchedulingPref
== Sched::ILP
||
2518 right
->SchedulingPref
== Sched::ILP
)) {
2519 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2520 // is enabled, grouping instructions by cycle, then its height is already
2521 // covered so only its depth matters. We also reach this point if both stall
2522 // but have the same height.
2523 if (!SPQ
->getHazardRec()->isEnabled()) {
2524 if (LHeight
!= RHeight
)
2525 return LHeight
> RHeight
? 1 : -1;
2527 int LDepth
= left
->getDepth() - LPenalty
;
2528 int RDepth
= right
->getDepth() - RPenalty
;
2529 if (LDepth
!= RDepth
) {
2530 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left
->NodeNum
2531 << ") depth " << LDepth
<< " vs SU (" << right
->NodeNum
2532 << ") depth " << RDepth
<< "\n");
2533 return LDepth
< RDepth
? 1 : -1;
2535 if (left
->Latency
!= right
->Latency
)
2536 return left
->Latency
> right
->Latency
? 1 : -1;
2541 static bool BURRSort(SUnit
*left
, SUnit
*right
, RegReductionPQBase
*SPQ
) {
2542 // Schedule physical register definitions close to their use. This is
2543 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2544 // long as shortening physreg live ranges is generally good, we can defer
2545 // creating a subtarget hook.
2546 if (!DisableSchedPhysRegJoin
) {
2547 bool LHasPhysReg
= left
->hasPhysRegDefs
;
2548 bool RHasPhysReg
= right
->hasPhysRegDefs
;
2549 if (LHasPhysReg
!= RHasPhysReg
) {
2551 static const char *const PhysRegMsg
[] = { " has no physreg",
2552 " defines a physreg" };
2554 LLVM_DEBUG(dbgs() << " SU (" << left
->NodeNum
<< ") "
2555 << PhysRegMsg
[LHasPhysReg
] << " SU(" << right
->NodeNum
2556 << ") " << PhysRegMsg
[RHasPhysReg
] << "\n");
2557 return LHasPhysReg
< RHasPhysReg
;
2561 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2562 unsigned LPriority
= SPQ
->getNodePriority(left
);
2563 unsigned RPriority
= SPQ
->getNodePriority(right
);
2565 // Be really careful about hoisting call operands above previous calls.
2566 // Only allows it if it would reduce register pressure.
2567 if (left
->isCall
&& right
->isCallOp
) {
2568 unsigned RNumVals
= right
->getNode()->getNumValues();
2569 RPriority
= (RPriority
> RNumVals
) ? (RPriority
- RNumVals
) : 0;
2571 if (right
->isCall
&& left
->isCallOp
) {
2572 unsigned LNumVals
= left
->getNode()->getNumValues();
2573 LPriority
= (LPriority
> LNumVals
) ? (LPriority
- LNumVals
) : 0;
2576 if (LPriority
!= RPriority
)
2577 return LPriority
> RPriority
;
2579 // One or both of the nodes are calls and their sethi-ullman numbers are the
2580 // same, then keep source order.
2581 if (left
->isCall
|| right
->isCall
) {
2582 unsigned LOrder
= SPQ
->getNodeOrdering(left
);
2583 unsigned ROrder
= SPQ
->getNodeOrdering(right
);
2585 // Prefer an ordering where the lower the non-zero order number, the higher
2587 if ((LOrder
|| ROrder
) && LOrder
!= ROrder
)
2588 return LOrder
!= 0 && (LOrder
< ROrder
|| ROrder
== 0);
2591 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2596 // and the following instructions are both ready.
2600 // Then schedule t2 = op first.
2607 // This creates more short live intervals.
2608 unsigned LDist
= closestSucc(left
);
2609 unsigned RDist
= closestSucc(right
);
2611 return LDist
< RDist
;
2613 // How many registers becomes live when the node is scheduled.
2614 unsigned LScratch
= calcMaxScratches(left
);
2615 unsigned RScratch
= calcMaxScratches(right
);
2616 if (LScratch
!= RScratch
)
2617 return LScratch
> RScratch
;
2619 // Comparing latency against a call makes little sense unless the node
2620 // is register pressure-neutral.
2621 if ((left
->isCall
&& RPriority
> 0) || (right
->isCall
&& LPriority
> 0))
2622 return (left
->NodeQueueId
> right
->NodeQueueId
);
2624 // Do not compare latencies when one or both of the nodes are calls.
2625 if (!DisableSchedCycles
&&
2626 !(left
->isCall
|| right
->isCall
)) {
2627 int result
= BUCompareLatency(left
, right
, false /*checkPref*/, SPQ
);
2632 if (left
->getHeight() != right
->getHeight())
2633 return left
->getHeight() > right
->getHeight();
2635 if (left
->getDepth() != right
->getDepth())
2636 return left
->getDepth() < right
->getDepth();
2639 assert(left
->NodeQueueId
&& right
->NodeQueueId
&&
2640 "NodeQueueId cannot be zero");
2641 return (left
->NodeQueueId
> right
->NodeQueueId
);
2645 bool bu_ls_rr_sort::operator()(SUnit
*left
, SUnit
*right
) const {
2646 if (int res
= checkSpecialNodes(left
, right
))
2649 return BURRSort(left
, right
, SPQ
);
2652 // Source order, otherwise bottom up.
2653 bool src_ls_rr_sort::operator()(SUnit
*left
, SUnit
*right
) const {
2654 if (int res
= checkSpecialNodes(left
, right
))
2657 unsigned LOrder
= SPQ
->getNodeOrdering(left
);
2658 unsigned ROrder
= SPQ
->getNodeOrdering(right
);
2660 // Prefer an ordering where the lower the non-zero order number, the higher
2662 if ((LOrder
|| ROrder
) && LOrder
!= ROrder
)
2663 return LOrder
!= 0 && (LOrder
< ROrder
|| ROrder
== 0);
2665 return BURRSort(left
, right
, SPQ
);
2668 // If the time between now and when the instruction will be ready can cover
2669 // the spill code, then avoid adding it to the ready queue. This gives long
2670 // stalls highest priority and allows hoisting across calls. It should also
2671 // speed up processing the available queue.
2672 bool hybrid_ls_rr_sort::isReady(SUnit
*SU
, unsigned CurCycle
) const {
2673 static const unsigned ReadyDelay
= 3;
2675 if (SPQ
->MayReduceRegPressure(SU
)) return true;
2677 if (SU
->getHeight() > (CurCycle
+ ReadyDelay
)) return false;
2679 if (SPQ
->getHazardRec()->getHazardType(SU
, -ReadyDelay
)
2680 != ScheduleHazardRecognizer::NoHazard
)
2686 // Return true if right should be scheduled with higher priority than left.
2687 bool hybrid_ls_rr_sort::operator()(SUnit
*left
, SUnit
*right
) const {
2688 if (int res
= checkSpecialNodes(left
, right
))
2691 if (left
->isCall
|| right
->isCall
)
2692 // No way to compute latency of calls.
2693 return BURRSort(left
, right
, SPQ
);
2695 bool LHigh
= SPQ
->HighRegPressure(left
);
2696 bool RHigh
= SPQ
->HighRegPressure(right
);
2697 // Avoid causing spills. If register pressure is high, schedule for
2698 // register pressure reduction.
2699 if (LHigh
&& !RHigh
) {
2700 LLVM_DEBUG(dbgs() << " pressure SU(" << left
->NodeNum
<< ") > SU("
2701 << right
->NodeNum
<< ")\n");
2704 else if (!LHigh
&& RHigh
) {
2705 LLVM_DEBUG(dbgs() << " pressure SU(" << right
->NodeNum
<< ") > SU("
2706 << left
->NodeNum
<< ")\n");
2709 if (!LHigh
&& !RHigh
) {
2710 int result
= BUCompareLatency(left
, right
, true /*checkPref*/, SPQ
);
2714 return BURRSort(left
, right
, SPQ
);
2717 // Schedule as many instructions in each cycle as possible. So don't make an
2718 // instruction available unless it is ready in the current cycle.
2719 bool ilp_ls_rr_sort::isReady(SUnit
*SU
, unsigned CurCycle
) const {
2720 if (SU
->getHeight() > CurCycle
) return false;
2722 if (SPQ
->getHazardRec()->getHazardType(SU
, 0)
2723 != ScheduleHazardRecognizer::NoHazard
)
2729 static bool canEnableCoalescing(SUnit
*SU
) {
2730 unsigned Opc
= SU
->getNode() ? SU
->getNode()->getOpcode() : 0;
2731 if (Opc
== ISD::TokenFactor
|| Opc
== ISD::CopyToReg
)
2732 // CopyToReg should be close to its uses to facilitate coalescing and
2736 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
2737 Opc
== TargetOpcode::SUBREG_TO_REG
||
2738 Opc
== TargetOpcode::INSERT_SUBREG
)
2739 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2740 // close to their uses to facilitate coalescing.
2743 if (SU
->NumPreds
== 0 && SU
->NumSuccs
!= 0)
2744 // If SU does not have a register def, schedule it close to its uses
2745 // because it does not lengthen any live ranges.
2751 // list-ilp is currently an experimental scheduler that allows various
2752 // heuristics to be enabled prior to the normal register reduction logic.
2753 bool ilp_ls_rr_sort::operator()(SUnit
*left
, SUnit
*right
) const {
2754 if (int res
= checkSpecialNodes(left
, right
))
2757 if (left
->isCall
|| right
->isCall
)
2758 // No way to compute latency of calls.
2759 return BURRSort(left
, right
, SPQ
);
2761 unsigned LLiveUses
= 0, RLiveUses
= 0;
2762 int LPDiff
= 0, RPDiff
= 0;
2763 if (!DisableSchedRegPressure
|| !DisableSchedLiveUses
) {
2764 LPDiff
= SPQ
->RegPressureDiff(left
, LLiveUses
);
2765 RPDiff
= SPQ
->RegPressureDiff(right
, RLiveUses
);
2767 if (!DisableSchedRegPressure
&& LPDiff
!= RPDiff
) {
2768 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left
->NodeNum
2769 << "): " << LPDiff
<< " != SU(" << right
->NodeNum
2770 << "): " << RPDiff
<< "\n");
2771 return LPDiff
> RPDiff
;
2774 if (!DisableSchedRegPressure
&& (LPDiff
> 0 || RPDiff
> 0)) {
2775 bool LReduce
= canEnableCoalescing(left
);
2776 bool RReduce
= canEnableCoalescing(right
);
2777 if (LReduce
&& !RReduce
) return false;
2778 if (RReduce
&& !LReduce
) return true;
2781 if (!DisableSchedLiveUses
&& (LLiveUses
!= RLiveUses
)) {
2782 LLVM_DEBUG(dbgs() << "Live uses SU(" << left
->NodeNum
<< "): " << LLiveUses
2783 << " != SU(" << right
->NodeNum
<< "): " << RLiveUses
2785 return LLiveUses
< RLiveUses
;
2788 if (!DisableSchedStalls
) {
2789 bool LStall
= BUHasStall(left
, left
->getHeight(), SPQ
);
2790 bool RStall
= BUHasStall(right
, right
->getHeight(), SPQ
);
2791 if (LStall
!= RStall
)
2792 return left
->getHeight() > right
->getHeight();
2795 if (!DisableSchedCriticalPath
) {
2796 int spread
= (int)left
->getDepth() - (int)right
->getDepth();
2797 if (std::abs(spread
) > MaxReorderWindow
) {
2798 LLVM_DEBUG(dbgs() << "Depth of SU(" << left
->NodeNum
<< "): "
2799 << left
->getDepth() << " != SU(" << right
->NodeNum
2800 << "): " << right
->getDepth() << "\n");
2801 return left
->getDepth() < right
->getDepth();
2805 if (!DisableSchedHeight
&& left
->getHeight() != right
->getHeight()) {
2806 int spread
= (int)left
->getHeight() - (int)right
->getHeight();
2807 if (std::abs(spread
) > MaxReorderWindow
)
2808 return left
->getHeight() > right
->getHeight();
2811 return BURRSort(left
, right
, SPQ
);
2814 void RegReductionPQBase::initNodes(std::vector
<SUnit
> &sunits
) {
2816 // Add pseudo dependency edges for two-address nodes.
2817 if (!Disable2AddrHack
)
2818 AddPseudoTwoAddrDeps();
2819 // Reroute edges to nodes with multiple uses.
2820 if (!TracksRegPressure
&& !SrcOrder
)
2821 PrescheduleNodesWithMultipleUses();
2822 // Calculate node priorities.
2823 CalculateSethiUllmanNumbers();
2825 // For single block loops, mark nodes that look like canonical IV increments.
2826 if (scheduleDAG
->BB
->isSuccessor(scheduleDAG
->BB
))
2827 for (SUnit
&SU
: sunits
)
2831 //===----------------------------------------------------------------------===//
2832 // Preschedule for Register Pressure
2833 //===----------------------------------------------------------------------===//
2835 bool RegReductionPQBase::canClobber(const SUnit
*SU
, const SUnit
*Op
) {
2836 if (SU
->isTwoAddress
) {
2837 unsigned Opc
= SU
->getNode()->getMachineOpcode();
2838 const MCInstrDesc
&MCID
= TII
->get(Opc
);
2839 unsigned NumRes
= MCID
.getNumDefs();
2840 unsigned NumOps
= MCID
.getNumOperands() - NumRes
;
2841 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
2842 if (MCID
.getOperandConstraint(i
+NumRes
, MCOI::TIED_TO
) != -1) {
2843 SDNode
*DU
= SU
->getNode()->getOperand(i
).getNode();
2844 if (DU
->getNodeId() != -1 &&
2845 Op
->OrigNode
== &(*SUnits
)[DU
->getNodeId()])
2853 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2854 /// successor's explicit physregs whose definition can reach DepSU.
2855 /// i.e. DepSU should not be scheduled above SU.
2856 static bool canClobberReachingPhysRegUse(const SUnit
*DepSU
, const SUnit
*SU
,
2857 ScheduleDAGRRList
*scheduleDAG
,
2858 const TargetInstrInfo
*TII
,
2859 const TargetRegisterInfo
*TRI
) {
2860 ArrayRef
<MCPhysReg
> ImpDefs
=
2861 TII
->get(SU
->getNode()->getMachineOpcode()).implicit_defs();
2862 const uint32_t *RegMask
= getNodeRegMask(SU
->getNode());
2863 if (ImpDefs
.empty() && !RegMask
)
2866 for (const SDep
&Succ
: SU
->Succs
) {
2867 SUnit
*SuccSU
= Succ
.getSUnit();
2868 for (const SDep
&SuccPred
: SuccSU
->Preds
) {
2869 if (!SuccPred
.isAssignedRegDep())
2873 MachineOperand::clobbersPhysReg(RegMask
, SuccPred
.getReg()) &&
2874 scheduleDAG
->IsReachable(DepSU
, SuccPred
.getSUnit()))
2877 for (MCPhysReg ImpDef
: ImpDefs
) {
2878 // Return true if SU clobbers this physical register use and the
2879 // definition of the register reaches from DepSU. IsReachable queries
2880 // a topological forward sort of the DAG (following the successors).
2881 if (TRI
->regsOverlap(ImpDef
, SuccPred
.getReg()) &&
2882 scheduleDAG
->IsReachable(DepSU
, SuccPred
.getSUnit()))
2890 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2891 /// physical register defs.
2892 static bool canClobberPhysRegDefs(const SUnit
*SuccSU
, const SUnit
*SU
,
2893 const TargetInstrInfo
*TII
,
2894 const TargetRegisterInfo
*TRI
) {
2895 SDNode
*N
= SuccSU
->getNode();
2896 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
2897 ArrayRef
<MCPhysReg
> ImpDefs
= TII
->get(N
->getMachineOpcode()).implicit_defs();
2898 assert(!ImpDefs
.empty() && "Caller should check hasPhysRegDefs");
2899 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
2900 SUNode
= SUNode
->getGluedNode()) {
2901 if (!SUNode
->isMachineOpcode())
2903 ArrayRef
<MCPhysReg
> SUImpDefs
=
2904 TII
->get(SUNode
->getMachineOpcode()).implicit_defs();
2905 const uint32_t *SURegMask
= getNodeRegMask(SUNode
);
2906 if (SUImpDefs
.empty() && !SURegMask
)
2908 for (unsigned i
= NumDefs
, e
= N
->getNumValues(); i
!= e
; ++i
) {
2909 MVT VT
= N
->getSimpleValueType(i
);
2910 if (VT
== MVT::Glue
|| VT
== MVT::Other
)
2912 if (!N
->hasAnyUseOfValue(i
))
2914 MCPhysReg Reg
= ImpDefs
[i
- NumDefs
];
2915 if (SURegMask
&& MachineOperand::clobbersPhysReg(SURegMask
, Reg
))
2917 for (MCPhysReg SUReg
: SUImpDefs
) {
2918 if (TRI
->regsOverlap(Reg
, SUReg
))
2926 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2927 /// are not handled well by the general register pressure reduction
2928 /// heuristics. When presented with code like this:
2937 /// the heuristics tend to push the store up, but since the
2938 /// operand of the store has another use (U), this would increase
2939 /// the length of that other use (the U->N edge).
2941 /// This function transforms code like the above to route U's
2942 /// dependence through the store when possible, like this:
2953 /// This results in the store being scheduled immediately
2954 /// after N, which shortens the U->N live range, reducing
2955 /// register pressure.
2956 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2957 // Visit all the nodes in topological order, working top-down.
2958 for (SUnit
&SU
: *SUnits
) {
2959 // For now, only look at nodes with no data successors, such as stores.
2960 // These are especially important, due to the heuristics in
2961 // getNodePriority for nodes with no data successors.
2962 if (SU
.NumSuccs
!= 0)
2964 // For now, only look at nodes with exactly one data predecessor.
2965 if (SU
.NumPreds
!= 1)
2967 // Avoid prescheduling copies to virtual registers, which don't behave
2968 // like other nodes from the perspective of scheduling heuristics.
2969 if (SDNode
*N
= SU
.getNode())
2970 if (N
->getOpcode() == ISD::CopyToReg
&&
2971 cast
<RegisterSDNode
>(N
->getOperand(1))->getReg().isVirtual())
2974 SDNode
*PredFrameSetup
= nullptr;
2975 for (const SDep
&Pred
: SU
.Preds
)
2976 if (Pred
.isCtrl() && Pred
.getSUnit()) {
2977 // Find the predecessor which is not data dependence.
2978 SDNode
*PredND
= Pred
.getSUnit()->getNode();
2980 // If PredND is FrameSetup, we should not pre-scheduled the node,
2981 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2982 // ADJCALLSTACKUP may hold CallResource too long and make other
2983 // calls can't be scheduled. If there's no other available node
2984 // to schedule, the schedular will try to rename the register by
2985 // creating copy to avoid the conflict which will fail because
2986 // CallResource is not a real physical register.
2987 if (PredND
&& PredND
->isMachineOpcode() &&
2988 (PredND
->getMachineOpcode() == TII
->getCallFrameSetupOpcode())) {
2989 PredFrameSetup
= PredND
;
2993 // Skip the node has FrameSetup parent.
2994 if (PredFrameSetup
!= nullptr)
2997 // Locate the single data predecessor.
2998 SUnit
*PredSU
= nullptr;
2999 for (const SDep
&Pred
: SU
.Preds
)
3000 if (!Pred
.isCtrl()) {
3001 PredSU
= Pred
.getSUnit();
3006 // Don't rewrite edges that carry physregs, because that requires additional
3007 // support infrastructure.
3008 if (PredSU
->hasPhysRegDefs
)
3010 // Short-circuit the case where SU is PredSU's only data successor.
3011 if (PredSU
->NumSuccs
== 1)
3013 // Avoid prescheduling to copies from virtual registers, which don't behave
3014 // like other nodes from the perspective of scheduling heuristics.
3015 if (SDNode
*N
= SU
.getNode())
3016 if (N
->getOpcode() == ISD::CopyFromReg
&&
3017 cast
<RegisterSDNode
>(N
->getOperand(1))->getReg().isVirtual())
3020 // Perform checks on the successors of PredSU.
3021 for (const SDep
&PredSucc
: PredSU
->Succs
) {
3022 SUnit
*PredSuccSU
= PredSucc
.getSUnit();
3023 if (PredSuccSU
== &SU
) continue;
3024 // If PredSU has another successor with no data successors, for
3025 // now don't attempt to choose either over the other.
3026 if (PredSuccSU
->NumSuccs
== 0)
3027 goto outer_loop_continue
;
3028 // Don't break physical register dependencies.
3029 if (SU
.hasPhysRegClobbers
&& PredSuccSU
->hasPhysRegDefs
)
3030 if (canClobberPhysRegDefs(PredSuccSU
, &SU
, TII
, TRI
))
3031 goto outer_loop_continue
;
3032 // Don't introduce graph cycles.
3033 if (scheduleDAG
->IsReachable(&SU
, PredSuccSU
))
3034 goto outer_loop_continue
;
3037 // Ok, the transformation is safe and the heuristics suggest it is
3038 // profitable. Update the graph.
3040 dbgs() << " Prescheduling SU #" << SU
.NodeNum
<< " next to PredSU #"
3042 << " to guide scheduling in the presence of multiple uses\n");
3043 for (unsigned i
= 0; i
!= PredSU
->Succs
.size(); ++i
) {
3044 SDep Edge
= PredSU
->Succs
[i
];
3045 assert(!Edge
.isAssignedRegDep());
3046 SUnit
*SuccSU
= Edge
.getSUnit();
3047 if (SuccSU
!= &SU
) {
3048 Edge
.setSUnit(PredSU
);
3049 scheduleDAG
->RemovePred(SuccSU
, Edge
);
3050 scheduleDAG
->AddPredQueued(&SU
, Edge
);
3052 scheduleDAG
->AddPredQueued(SuccSU
, Edge
);
3056 outer_loop_continue
:;
3060 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3061 /// it as a def&use operand. Add a pseudo control edge from it to the other
3062 /// node (if it won't create a cycle) so the two-address one will be scheduled
3063 /// first (lower in the schedule). If both nodes are two-address, favor the
3064 /// one that has a CopyToReg use (more likely to be a loop induction update).
3065 /// If both are two-address, but one is commutable while the other is not
3066 /// commutable, favor the one that's not commutable.
3067 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3068 for (SUnit
&SU
: *SUnits
) {
3069 if (!SU
.isTwoAddress
)
3072 SDNode
*Node
= SU
.getNode();
3073 if (!Node
|| !Node
->isMachineOpcode() || SU
.getNode()->getGluedNode())
3076 bool isLiveOut
= hasOnlyLiveOutUses(&SU
);
3077 unsigned Opc
= Node
->getMachineOpcode();
3078 const MCInstrDesc
&MCID
= TII
->get(Opc
);
3079 unsigned NumRes
= MCID
.getNumDefs();
3080 unsigned NumOps
= MCID
.getNumOperands() - NumRes
;
3081 for (unsigned j
= 0; j
!= NumOps
; ++j
) {
3082 if (MCID
.getOperandConstraint(j
+NumRes
, MCOI::TIED_TO
) == -1)
3084 SDNode
*DU
= SU
.getNode()->getOperand(j
).getNode();
3085 if (DU
->getNodeId() == -1)
3087 const SUnit
*DUSU
= &(*SUnits
)[DU
->getNodeId()];
3090 for (const SDep
&Succ
: DUSU
->Succs
) {
3093 SUnit
*SuccSU
= Succ
.getSUnit();
3096 // Be conservative. Ignore if nodes aren't at roughly the same
3097 // depth and height.
3098 if (SuccSU
->getHeight() < SU
.getHeight() &&
3099 (SU
.getHeight() - SuccSU
->getHeight()) > 1)
3101 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3102 // constrains whatever is using the copy, instead of the copy
3103 // itself. In the case that the copy is coalesced, this
3104 // preserves the intent of the pseudo two-address heurietics.
3105 while (SuccSU
->Succs
.size() == 1 &&
3106 SuccSU
->getNode()->isMachineOpcode() &&
3107 SuccSU
->getNode()->getMachineOpcode() ==
3108 TargetOpcode::COPY_TO_REGCLASS
)
3109 SuccSU
= SuccSU
->Succs
.front().getSUnit();
3110 // Don't constrain non-instruction nodes.
3111 if (!SuccSU
->getNode() || !SuccSU
->getNode()->isMachineOpcode())
3113 // Don't constrain nodes with physical register defs if the
3114 // predecessor can clobber them.
3115 if (SuccSU
->hasPhysRegDefs
&& SU
.hasPhysRegClobbers
) {
3116 if (canClobberPhysRegDefs(SuccSU
, &SU
, TII
, TRI
))
3119 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3120 // these may be coalesced away. We want them close to their uses.
3121 unsigned SuccOpc
= SuccSU
->getNode()->getMachineOpcode();
3122 if (SuccOpc
== TargetOpcode::EXTRACT_SUBREG
||
3123 SuccOpc
== TargetOpcode::INSERT_SUBREG
||
3124 SuccOpc
== TargetOpcode::SUBREG_TO_REG
)
3126 if (!canClobberReachingPhysRegUse(SuccSU
, &SU
, scheduleDAG
, TII
, TRI
) &&
3127 (!canClobber(SuccSU
, DUSU
) ||
3128 (isLiveOut
&& !hasOnlyLiveOutUses(SuccSU
)) ||
3129 (!SU
.isCommutable
&& SuccSU
->isCommutable
)) &&
3130 !scheduleDAG
->IsReachable(SuccSU
, &SU
)) {
3132 << " Adding a pseudo-two-addr edge from SU #"
3133 << SU
.NodeNum
<< " to SU #" << SuccSU
->NodeNum
<< "\n");
3134 scheduleDAG
->AddPredQueued(&SU
, SDep(SuccSU
, SDep::Artificial
));
3141 //===----------------------------------------------------------------------===//
3142 // Public Constructor Functions
3143 //===----------------------------------------------------------------------===//
3145 ScheduleDAGSDNodes
*llvm::createBURRListDAGScheduler(SelectionDAGISel
*IS
,
3146 CodeGenOptLevel OptLevel
) {
3147 const TargetSubtargetInfo
&STI
= IS
->MF
->getSubtarget();
3148 const TargetInstrInfo
*TII
= STI
.getInstrInfo();
3149 const TargetRegisterInfo
*TRI
= STI
.getRegisterInfo();
3151 BURegReductionPriorityQueue
*PQ
=
3152 new BURegReductionPriorityQueue(*IS
->MF
, false, false, TII
, TRI
, nullptr);
3153 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, false, PQ
, OptLevel
);
3154 PQ
->setScheduleDAG(SD
);
3158 ScheduleDAGSDNodes
*
3159 llvm::createSourceListDAGScheduler(SelectionDAGISel
*IS
,
3160 CodeGenOptLevel OptLevel
) {
3161 const TargetSubtargetInfo
&STI
= IS
->MF
->getSubtarget();
3162 const TargetInstrInfo
*TII
= STI
.getInstrInfo();
3163 const TargetRegisterInfo
*TRI
= STI
.getRegisterInfo();
3165 SrcRegReductionPriorityQueue
*PQ
=
3166 new SrcRegReductionPriorityQueue(*IS
->MF
, false, true, TII
, TRI
, nullptr);
3167 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, false, PQ
, OptLevel
);
3168 PQ
->setScheduleDAG(SD
);
3172 ScheduleDAGSDNodes
*
3173 llvm::createHybridListDAGScheduler(SelectionDAGISel
*IS
,
3174 CodeGenOptLevel OptLevel
) {
3175 const TargetSubtargetInfo
&STI
= IS
->MF
->getSubtarget();
3176 const TargetInstrInfo
*TII
= STI
.getInstrInfo();
3177 const TargetRegisterInfo
*TRI
= STI
.getRegisterInfo();
3178 const TargetLowering
*TLI
= IS
->TLI
;
3180 HybridBURRPriorityQueue
*PQ
=
3181 new HybridBURRPriorityQueue(*IS
->MF
, true, false, TII
, TRI
, TLI
);
3183 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, true, PQ
, OptLevel
);
3184 PQ
->setScheduleDAG(SD
);
3188 ScheduleDAGSDNodes
*llvm::createILPListDAGScheduler(SelectionDAGISel
*IS
,
3189 CodeGenOptLevel OptLevel
) {
3190 const TargetSubtargetInfo
&STI
= IS
->MF
->getSubtarget();
3191 const TargetInstrInfo
*TII
= STI
.getInstrInfo();
3192 const TargetRegisterInfo
*TRI
= STI
.getRegisterInfo();
3193 const TargetLowering
*TLI
= IS
->TLI
;
3195 ILPBURRPriorityQueue
*PQ
=
3196 new ILPBURRPriorityQueue(*IS
->MF
, true, false, TII
, TRI
, TLI
);
3197 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, true, PQ
, OptLevel
);
3198 PQ
->setScheduleDAG(SD
);