1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/InlineAsm.h"
21 #include "llvm/CodeGen/SchedulerRegistry.h"
22 #include "llvm/CodeGen/SelectionDAGISel.h"
23 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24 #include "llvm/Target/TargetRegisterInfo.h"
25 #include "llvm/Target/TargetData.h"
26 #include "llvm/Target/TargetMachine.h"
27 #include "llvm/Target/TargetInstrInfo.h"
28 #include "llvm/Target/TargetLowering.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
38 STATISTIC(NumBacktracks
, "Number of times scheduler backtracked");
39 STATISTIC(NumUnfolds
, "Number of nodes unfolded");
40 STATISTIC(NumDups
, "Number of duplicated nodes");
41 STATISTIC(NumPRCopies
, "Number of physical register copies");
43 static RegisterScheduler
44 burrListDAGScheduler("list-burr",
45 "Bottom-up register reduction list scheduling",
46 createBURRListDAGScheduler
);
47 static RegisterScheduler
48 tdrListrDAGScheduler("list-tdrr",
49 "Top-down register reduction list scheduling",
50 createTDRRListDAGScheduler
);
51 static RegisterScheduler
52 sourceListDAGScheduler("source",
53 "Similar to list-burr but schedules in source "
54 "order when possible",
55 createSourceListDAGScheduler
);
57 static RegisterScheduler
58 hybridListDAGScheduler("list-hybrid",
59 "Bottom-up register pressure aware list scheduling "
60 "which tries to balance latency and register pressure",
61 createHybridListDAGScheduler
);
63 static RegisterScheduler
64 ILPListDAGScheduler("list-ilp",
65 "Bottom-up register pressure aware list scheduling "
66 "which tries to balance ILP and register pressure",
67 createILPListDAGScheduler
);
69 static cl::opt
<bool> DisableSchedCycles(
70 "disable-sched-cycles", cl::Hidden
, cl::init(false),
71 cl::desc("Disable cycle-level precision during preRA scheduling"));
73 // Temporary sched=list-ilp flags until the heuristics are robust.
74 // Some options are also available under sched=list-hybrid.
75 static cl::opt
<bool> DisableSchedRegPressure(
76 "disable-sched-reg-pressure", cl::Hidden
, cl::init(false),
77 cl::desc("Disable regpressure priority in sched=list-ilp"));
78 static cl::opt
<bool> DisableSchedLiveUses(
79 "disable-sched-live-uses", cl::Hidden
, cl::init(true),
80 cl::desc("Disable live use priority in sched=list-ilp"));
81 static cl::opt
<bool> DisableSchedVRegCycle(
82 "disable-sched-vrcycle", cl::Hidden
, cl::init(false),
83 cl::desc("Disable virtual register cycle interference checks"));
84 static cl::opt
<bool> DisableSchedPhysRegJoin(
85 "disable-sched-physreg-join", cl::Hidden
, cl::init(false),
86 cl::desc("Disable physreg def-use affinity"));
87 static cl::opt
<bool> DisableSchedStalls(
88 "disable-sched-stalls", cl::Hidden
, cl::init(true),
89 cl::desc("Disable no-stall priority in sched=list-ilp"));
90 static cl::opt
<bool> DisableSchedCriticalPath(
91 "disable-sched-critical-path", cl::Hidden
, cl::init(false),
92 cl::desc("Disable critical path priority in sched=list-ilp"));
93 static cl::opt
<bool> DisableSchedHeight(
94 "disable-sched-height", cl::Hidden
, cl::init(false),
95 cl::desc("Disable scheduled-height priority in sched=list-ilp"));
97 static cl::opt
<int> MaxReorderWindow(
98 "max-sched-reorder", cl::Hidden
, cl::init(6),
99 cl::desc("Number of instructions to allow ahead of the critical path "
100 "in sched=list-ilp"));
102 static cl::opt
<unsigned> AvgIPC(
103 "sched-avg-ipc", cl::Hidden
, cl::init(1),
104 cl::desc("Average inst/cycle whan no target itinerary exists."));
108 // For sched=list-ilp, Count the number of times each factor comes into play.
109 enum { FactPressureDiff
, FactRegUses
, FactStall
, FactHeight
, FactDepth
,
110 FactStatic
, FactOther
, NumFactors
};
112 static const char *FactorName
[NumFactors
] =
113 {"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
114 static int FactorCount
[NumFactors
];
118 //===----------------------------------------------------------------------===//
119 /// ScheduleDAGRRList - The actual register reduction list scheduler
120 /// implementation. This supports both top-down and bottom-up scheduling.
122 class ScheduleDAGRRList
: public ScheduleDAGSDNodes
{
124 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
128 /// NeedLatency - True if the scheduler will make use of latency information.
132 /// AvailableQueue - The priority queue to use for the available SUnits.
133 SchedulingPriorityQueue
*AvailableQueue
;
135 /// PendingQueue - This contains all of the instructions whose operands have
136 /// been issued, but their results are not ready yet (due to the latency of
137 /// the operation). Once the operands becomes available, the instruction is
138 /// added to the AvailableQueue.
139 std::vector
<SUnit
*> PendingQueue
;
141 /// HazardRec - The hazard recognizer to use.
142 ScheduleHazardRecognizer
*HazardRec
;
144 /// CurCycle - The current scheduler state corresponds to this cycle.
147 /// MinAvailableCycle - Cycle of the soonest available instruction.
148 unsigned MinAvailableCycle
;
150 /// IssueCount - Count instructions issued in this cycle
151 /// Currently valid only for bottom-up scheduling.
154 /// LiveRegDefs - A set of physical registers and their definition
155 /// that are "live". These nodes must be scheduled before any other nodes that
156 /// modifies the registers can be scheduled.
157 unsigned NumLiveRegs
;
158 std::vector
<SUnit
*> LiveRegDefs
;
159 std::vector
<SUnit
*> LiveRegGens
;
161 /// Topo - A topological ordering for SUnits which permits fast IsReachable
162 /// and similar queries.
163 ScheduleDAGTopologicalSort Topo
;
166 ScheduleDAGRRList(MachineFunction
&mf
, bool needlatency
,
167 SchedulingPriorityQueue
*availqueue
,
168 CodeGenOpt::Level OptLevel
)
169 : ScheduleDAGSDNodes(mf
), isBottomUp(availqueue
->isBottomUp()),
170 NeedLatency(needlatency
), AvailableQueue(availqueue
), CurCycle(0),
173 const TargetMachine
&tm
= mf
.getTarget();
174 if (DisableSchedCycles
|| !NeedLatency
)
175 HazardRec
= new ScheduleHazardRecognizer();
177 HazardRec
= tm
.getInstrInfo()->CreateTargetHazardRecognizer(&tm
, this);
180 ~ScheduleDAGRRList() {
182 delete AvailableQueue
;
187 ScheduleHazardRecognizer
*getHazardRec() { return HazardRec
; }
189 /// IsReachable - Checks if SU is reachable from TargetSU.
190 bool IsReachable(const SUnit
*SU
, const SUnit
*TargetSU
) {
191 return Topo
.IsReachable(SU
, TargetSU
);
194 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
196 bool WillCreateCycle(SUnit
*SU
, SUnit
*TargetSU
) {
197 return Topo
.WillCreateCycle(SU
, TargetSU
);
200 /// AddPred - adds a predecessor edge to SUnit SU.
201 /// This returns true if this is a new predecessor.
202 /// Updates the topological ordering if required.
203 void AddPred(SUnit
*SU
, const SDep
&D
) {
204 Topo
.AddPred(SU
, D
.getSUnit());
208 /// RemovePred - removes a predecessor edge from SUnit SU.
209 /// This returns true if an edge was removed.
210 /// Updates the topological ordering if required.
211 void RemovePred(SUnit
*SU
, const SDep
&D
) {
212 Topo
.RemovePred(SU
, D
.getSUnit());
217 bool isReady(SUnit
*SU
) {
218 return DisableSchedCycles
|| !AvailableQueue
->hasReadyFilter() ||
219 AvailableQueue
->isReady(SU
);
222 void ReleasePred(SUnit
*SU
, const SDep
*PredEdge
);
223 void ReleasePredecessors(SUnit
*SU
);
224 void ReleaseSucc(SUnit
*SU
, const SDep
*SuccEdge
);
225 void ReleaseSuccessors(SUnit
*SU
);
226 void ReleasePending();
227 void AdvanceToCycle(unsigned NextCycle
);
228 void AdvancePastStalls(SUnit
*SU
);
229 void EmitNode(SUnit
*SU
);
230 void ScheduleNodeBottomUp(SUnit
*);
231 void CapturePred(SDep
*PredEdge
);
232 void UnscheduleNodeBottomUp(SUnit
*);
233 void RestoreHazardCheckerBottomUp();
234 void BacktrackBottomUp(SUnit
*, SUnit
*);
235 SUnit
*CopyAndMoveSuccessors(SUnit
*);
236 void InsertCopiesAndMoveSuccs(SUnit
*, unsigned,
237 const TargetRegisterClass
*,
238 const TargetRegisterClass
*,
239 SmallVector
<SUnit
*, 2>&);
240 bool DelayForLiveRegsBottomUp(SUnit
*, SmallVector
<unsigned, 4>&);
242 SUnit
*PickNodeToScheduleBottomUp();
243 void ListScheduleBottomUp();
245 void ScheduleNodeTopDown(SUnit
*);
246 void ListScheduleTopDown();
249 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
250 /// Updates the topological ordering if required.
251 SUnit
*CreateNewSUnit(SDNode
*N
) {
252 unsigned NumSUnits
= SUnits
.size();
253 SUnit
*NewNode
= NewSUnit(N
);
254 // Update the topological ordering.
255 if (NewNode
->NodeNum
>= NumSUnits
)
256 Topo
.InitDAGTopologicalSorting();
260 /// CreateClone - Creates a new SUnit from an existing one.
261 /// Updates the topological ordering if required.
262 SUnit
*CreateClone(SUnit
*N
) {
263 unsigned NumSUnits
= SUnits
.size();
264 SUnit
*NewNode
= Clone(N
);
265 // Update the topological ordering.
266 if (NewNode
->NodeNum
>= NumSUnits
)
267 Topo
.InitDAGTopologicalSorting();
271 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
272 /// need actual latency information but the hybrid scheduler does.
273 bool ForceUnitLatencies() const {
277 } // end anonymous namespace
279 /// GetCostForDef - Looks up the register class and cost for a given definition.
280 /// Typically this just means looking up the representative register class,
281 /// but for untyped values (MVT::untyped) it means inspecting the node's
282 /// opcode to determine what register class is being generated.
283 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter
&RegDefPos
,
284 const TargetLowering
*TLI
,
285 const TargetInstrInfo
*TII
,
286 const TargetRegisterInfo
*TRI
,
287 unsigned &RegClass
, unsigned &Cost
) {
288 EVT VT
= RegDefPos
.GetValue();
290 // Special handling for untyped values. These values can only come from
291 // the expansion of custom DAG-to-DAG patterns.
292 if (VT
== MVT::untyped
) {
293 const SDNode
*Node
= RegDefPos
.GetNode();
294 unsigned Opcode
= Node
->getMachineOpcode();
296 if (Opcode
== TargetOpcode::REG_SEQUENCE
) {
297 unsigned DstRCIdx
= cast
<ConstantSDNode
>(Node
->getOperand(0))->getZExtValue();
298 const TargetRegisterClass
*RC
= TRI
->getRegClass(DstRCIdx
);
299 RegClass
= RC
->getID();
304 unsigned Idx
= RegDefPos
.GetIdx();
305 const MCInstrDesc Desc
= TII
->get(Opcode
);
306 const TargetRegisterClass
*RC
= TII
->getRegClass(Desc
, Idx
, TRI
);
307 RegClass
= RC
->getID();
308 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
309 // better way to determine it.
312 RegClass
= TLI
->getRepRegClassFor(VT
)->getID();
313 Cost
= TLI
->getRepRegClassCostFor(VT
);
317 /// Schedule - Schedule the DAG using list scheduling.
318 void ScheduleDAGRRList::Schedule() {
320 << "********** List Scheduling BB#" << BB
->getNumber()
321 << " '" << BB
->getName() << "' **********\n");
323 for (int i
= 0; i
< NumFactors
; ++i
) {
330 MinAvailableCycle
= DisableSchedCycles
? 0 : UINT_MAX
;
332 LiveRegDefs
.resize(TRI
->getNumRegs(), NULL
);
333 LiveRegGens
.resize(TRI
->getNumRegs(), NULL
);
335 // Build the scheduling graph.
336 BuildSchedGraph(NULL
);
338 DEBUG(for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
)
339 SUnits
[su
].dumpAll(this));
340 Topo
.InitDAGTopologicalSorting();
342 AvailableQueue
->initNodes(SUnits
);
346 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
348 ListScheduleBottomUp();
350 ListScheduleTopDown();
353 for (int i
= 0; i
< NumFactors
; ++i
) {
354 DEBUG(dbgs() << FactorName
[i
] << "\t" << FactorCount
[i
] << "\n");
357 AvailableQueue
->releaseState();
360 //===----------------------------------------------------------------------===//
361 // Bottom-Up Scheduling
362 //===----------------------------------------------------------------------===//
364 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
365 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
366 void ScheduleDAGRRList::ReleasePred(SUnit
*SU
, const SDep
*PredEdge
) {
367 SUnit
*PredSU
= PredEdge
->getSUnit();
370 if (PredSU
->NumSuccsLeft
== 0) {
371 dbgs() << "*** Scheduling failed! ***\n";
373 dbgs() << " has been released too many times!\n";
377 --PredSU
->NumSuccsLeft
;
379 if (!ForceUnitLatencies()) {
380 // Updating predecessor's height. This is now the cycle when the
381 // predecessor can be scheduled without causing a pipeline stall.
382 PredSU
->setHeightToAtLeast(SU
->getHeight() + PredEdge
->getLatency());
385 // If all the node's successors are scheduled, this node is ready
386 // to be scheduled. Ignore the special EntrySU node.
387 if (PredSU
->NumSuccsLeft
== 0 && PredSU
!= &EntrySU
) {
388 PredSU
->isAvailable
= true;
390 unsigned Height
= PredSU
->getHeight();
391 if (Height
< MinAvailableCycle
)
392 MinAvailableCycle
= Height
;
394 if (isReady(PredSU
)) {
395 AvailableQueue
->push(PredSU
);
397 // CapturePred and others may have left the node in the pending queue, avoid
399 else if (!PredSU
->isPending
) {
400 PredSU
->isPending
= true;
401 PendingQueue
.push_back(PredSU
);
406 /// Call ReleasePred for each predecessor, then update register live def/gen.
407 /// Always update LiveRegDefs for a register dependence even if the current SU
408 /// also defines the register. This effectively create one large live range
409 /// across a sequence of two-address node. This is important because the
410 /// entire chain must be scheduled together. Example:
413 /// flags = (2) addc flags
414 /// flags = (1) addc flags
418 /// LiveRegDefs[flags] = 3
419 /// LiveRegGens[flags] = 1
421 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
422 /// interference on flags.
423 void ScheduleDAGRRList::ReleasePredecessors(SUnit
*SU
) {
424 // Bottom up: release predecessors
425 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
427 ReleasePred(SU
, &*I
);
428 if (I
->isAssignedRegDep()) {
429 // This is a physical register dependency and it's impossible or
430 // expensive to copy the register. Make sure nothing that can
431 // clobber the register is scheduled between the predecessor and
433 SUnit
*RegDef
= LiveRegDefs
[I
->getReg()]; (void)RegDef
;
434 assert((!RegDef
|| RegDef
== SU
|| RegDef
== I
->getSUnit()) &&
435 "interference on register dependence");
436 LiveRegDefs
[I
->getReg()] = I
->getSUnit();
437 if (!LiveRegGens
[I
->getReg()]) {
439 LiveRegGens
[I
->getReg()] = SU
;
445 /// Check to see if any of the pending instructions are ready to issue. If
446 /// so, add them to the available queue.
447 void ScheduleDAGRRList::ReleasePending() {
448 if (DisableSchedCycles
) {
449 assert(PendingQueue
.empty() && "pending instrs not allowed in this mode");
453 // If the available queue is empty, it is safe to reset MinAvailableCycle.
454 if (AvailableQueue
->empty())
455 MinAvailableCycle
= UINT_MAX
;
457 // Check to see if any of the pending instructions are ready to issue. If
458 // so, add them to the available queue.
459 for (unsigned i
= 0, e
= PendingQueue
.size(); i
!= e
; ++i
) {
460 unsigned ReadyCycle
=
461 isBottomUp
? PendingQueue
[i
]->getHeight() : PendingQueue
[i
]->getDepth();
462 if (ReadyCycle
< MinAvailableCycle
)
463 MinAvailableCycle
= ReadyCycle
;
465 if (PendingQueue
[i
]->isAvailable
) {
466 if (!isReady(PendingQueue
[i
]))
468 AvailableQueue
->push(PendingQueue
[i
]);
470 PendingQueue
[i
]->isPending
= false;
471 PendingQueue
[i
] = PendingQueue
.back();
472 PendingQueue
.pop_back();
477 /// Move the scheduler state forward by the specified number of Cycles.
478 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle
) {
479 if (NextCycle
<= CurCycle
)
483 AvailableQueue
->setCurCycle(NextCycle
);
484 if (!HazardRec
->isEnabled()) {
485 // Bypass lots of virtual calls in case of long latency.
486 CurCycle
= NextCycle
;
489 for (; CurCycle
!= NextCycle
; ++CurCycle
) {
491 HazardRec
->RecedeCycle();
493 HazardRec
->AdvanceCycle();
496 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
497 // available Q to release pending nodes at least once before popping.
501 /// Move the scheduler state forward until the specified node's dependents are
502 /// ready and can be scheduled with no resource conflicts.
503 void ScheduleDAGRRList::AdvancePastStalls(SUnit
*SU
) {
504 if (DisableSchedCycles
)
507 // FIXME: Nodes such as CopyFromReg probably should not advance the current
508 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
509 // has predecessors the cycle will be advanced when they are scheduled.
510 // But given the crude nature of modeling latency though such nodes, we
511 // currently need to treat these nodes like real instructions.
512 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
514 unsigned ReadyCycle
= isBottomUp
? SU
->getHeight() : SU
->getDepth();
516 // Bump CurCycle to account for latency. We assume the latency of other
517 // available instructions may be hidden by the stall (not a full pipe stall).
518 // This updates the hazard recognizer's cycle before reserving resources for
520 AdvanceToCycle(ReadyCycle
);
522 // Calls are scheduled in their preceding cycle, so don't conflict with
523 // hazards from instructions after the call. EmitNode will reset the
524 // scoreboard state before emitting the call.
525 if (isBottomUp
&& SU
->isCall
)
528 // FIXME: For resource conflicts in very long non-pipelined stages, we
529 // should probably skip ahead here to avoid useless scoreboard checks.
532 ScheduleHazardRecognizer::HazardType HT
=
533 HazardRec
->getHazardType(SU
, isBottomUp
? -Stalls
: Stalls
);
535 if (HT
== ScheduleHazardRecognizer::NoHazard
)
540 AdvanceToCycle(CurCycle
+ Stalls
);
543 /// Record this SUnit in the HazardRecognizer.
544 /// Does not update CurCycle.
545 void ScheduleDAGRRList::EmitNode(SUnit
*SU
) {
546 if (!HazardRec
->isEnabled())
549 // Check for phys reg copy.
553 switch (SU
->getNode()->getOpcode()) {
555 assert(SU
->getNode()->isMachineOpcode() &&
556 "This target-independent node should not be scheduled.");
558 case ISD::MERGE_VALUES
:
559 case ISD::TokenFactor
:
561 case ISD::CopyFromReg
:
563 // Noops don't affect the scoreboard state. Copies are likely to be
567 // For inline asm, clear the pipeline state.
571 if (isBottomUp
&& SU
->isCall
) {
572 // Calls are scheduled with their preceding instructions. For bottom-up
573 // scheduling, clear the pipeline state before emitting.
577 HazardRec
->EmitInstruction(SU
);
579 if (!isBottomUp
&& SU
->isCall
) {
584 static void resetVRegCycle(SUnit
*SU
);
586 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
587 /// count of its predecessors. If a predecessor pending count is zero, add it to
588 /// the Available queue.
589 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit
*SU
) {
590 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle
<< "]: ");
591 DEBUG(SU
->dump(this));
594 if (CurCycle
< SU
->getHeight())
595 DEBUG(dbgs() << " Height [" << SU
->getHeight()
596 << "] pipeline stall!\n");
599 // FIXME: Do not modify node height. It may interfere with
600 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
601 // node its ready cycle can aid heuristics, and after scheduling it can
602 // indicate the scheduled cycle.
603 SU
->setHeightToAtLeast(CurCycle
);
605 // Reserve resources for the scheduled intruction.
608 Sequence
.push_back(SU
);
610 AvailableQueue
->ScheduledNode(SU
);
612 // If HazardRec is disabled, and each inst counts as one cycle, then
613 // advance CurCycle before ReleasePredecessors to avoid useless pushes to
614 // PendingQueue for schedulers that implement HasReadyFilter.
615 if (!HazardRec
->isEnabled() && AvgIPC
< 2)
616 AdvanceToCycle(CurCycle
+ 1);
618 // Update liveness of predecessors before successors to avoid treating a
619 // two-address node as a live range def.
620 ReleasePredecessors(SU
);
622 // Release all the implicit physical register defs that are live.
623 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
625 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
626 if (I
->isAssignedRegDep() && LiveRegDefs
[I
->getReg()] == SU
) {
627 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
629 LiveRegDefs
[I
->getReg()] = NULL
;
630 LiveRegGens
[I
->getReg()] = NULL
;
636 SU
->isScheduled
= true;
638 // Conditions under which the scheduler should eagerly advance the cycle:
639 // (1) No available instructions
640 // (2) All pipelines full, so available instructions must have hazards.
642 // If HazardRec is disabled, the cycle was pre-advanced before calling
643 // ReleasePredecessors. In that case, IssueCount should remain 0.
645 // Check AvailableQueue after ReleasePredecessors in case of zero latency.
646 if (HazardRec
->isEnabled() || AvgIPC
> 1) {
647 if (SU
->getNode() && SU
->getNode()->isMachineOpcode())
649 if ((HazardRec
->isEnabled() && HazardRec
->atIssueLimit())
650 || (!HazardRec
->isEnabled() && IssueCount
== AvgIPC
))
651 AdvanceToCycle(CurCycle
+ 1);
655 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
656 /// unscheduled, incrcease the succ left count of its predecessors. Remove
657 /// them from AvailableQueue if necessary.
658 void ScheduleDAGRRList::CapturePred(SDep
*PredEdge
) {
659 SUnit
*PredSU
= PredEdge
->getSUnit();
660 if (PredSU
->isAvailable
) {
661 PredSU
->isAvailable
= false;
662 if (!PredSU
->isPending
)
663 AvailableQueue
->remove(PredSU
);
666 assert(PredSU
->NumSuccsLeft
< UINT_MAX
&& "NumSuccsLeft will overflow!");
667 ++PredSU
->NumSuccsLeft
;
670 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
671 /// its predecessor states to reflect the change.
672 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit
*SU
) {
673 DEBUG(dbgs() << "*** Unscheduling [" << SU
->getHeight() << "]: ");
674 DEBUG(SU
->dump(this));
676 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
679 if (I
->isAssignedRegDep() && SU
== LiveRegGens
[I
->getReg()]){
680 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
681 assert(LiveRegDefs
[I
->getReg()] == I
->getSUnit() &&
682 "Physical register dependency violated?");
684 LiveRegDefs
[I
->getReg()] = NULL
;
685 LiveRegGens
[I
->getReg()] = NULL
;
689 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
691 if (I
->isAssignedRegDep()) {
692 // This becomes the nearest def. Note that an earlier def may still be
693 // pending if this is a two-address node.
694 LiveRegDefs
[I
->getReg()] = SU
;
695 if (!LiveRegDefs
[I
->getReg()]) {
698 if (LiveRegGens
[I
->getReg()] == NULL
||
699 I
->getSUnit()->getHeight() < LiveRegGens
[I
->getReg()]->getHeight())
700 LiveRegGens
[I
->getReg()] = I
->getSUnit();
703 if (SU
->getHeight() < MinAvailableCycle
)
704 MinAvailableCycle
= SU
->getHeight();
706 SU
->setHeightDirty();
707 SU
->isScheduled
= false;
708 SU
->isAvailable
= true;
709 if (!DisableSchedCycles
&& AvailableQueue
->hasReadyFilter()) {
710 // Don't make available until backtracking is complete.
711 SU
->isPending
= true;
712 PendingQueue
.push_back(SU
);
715 AvailableQueue
->push(SU
);
717 AvailableQueue
->UnscheduledNode(SU
);
720 /// After backtracking, the hazard checker needs to be restored to a state
721 /// corresponding the the current cycle.
722 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
725 unsigned LookAhead
= std::min((unsigned)Sequence
.size(),
726 HazardRec
->getMaxLookAhead());
730 std::vector
<SUnit
*>::const_iterator I
= (Sequence
.end() - LookAhead
);
731 unsigned HazardCycle
= (*I
)->getHeight();
732 for (std::vector
<SUnit
*>::const_iterator E
= Sequence
.end(); I
!= E
; ++I
) {
734 for (; SU
->getHeight() > HazardCycle
; ++HazardCycle
) {
735 HazardRec
->RecedeCycle();
741 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
742 /// BTCycle in order to schedule a specific node.
743 void ScheduleDAGRRList::BacktrackBottomUp(SUnit
*SU
, SUnit
*BtSU
) {
744 SUnit
*OldSU
= Sequence
.back();
747 if (SU
->isSucc(OldSU
))
748 // Don't try to remove SU from AvailableQueue.
749 SU
->isAvailable
= false;
750 // FIXME: use ready cycle instead of height
751 CurCycle
= OldSU
->getHeight();
752 UnscheduleNodeBottomUp(OldSU
);
753 AvailableQueue
->setCurCycle(CurCycle
);
756 OldSU
= Sequence
.back();
759 assert(!SU
->isSucc(OldSU
) && "Something is wrong!");
761 RestoreHazardCheckerBottomUp();
768 static bool isOperandOf(const SUnit
*SU
, SDNode
*N
) {
769 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
770 SUNode
= SUNode
->getGluedNode()) {
771 if (SUNode
->isOperandOf(N
))
777 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
778 /// successors to the newly created node.
779 SUnit
*ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit
*SU
) {
780 SDNode
*N
= SU
->getNode();
784 if (SU
->getNode()->getGluedNode())
788 bool TryUnfold
= false;
789 for (unsigned i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
790 EVT VT
= N
->getValueType(i
);
793 else if (VT
== MVT::Other
)
796 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
797 const SDValue
&Op
= N
->getOperand(i
);
798 EVT VT
= Op
.getNode()->getValueType(Op
.getResNo());
804 SmallVector
<SDNode
*, 2> NewNodes
;
805 if (!TII
->unfoldMemoryOperand(*DAG
, N
, NewNodes
))
808 DEBUG(dbgs() << "Unfolding SU #" << SU
->NodeNum
<< "\n");
809 assert(NewNodes
.size() == 2 && "Expected a load folding node!");
812 SDNode
*LoadNode
= NewNodes
[0];
813 unsigned NumVals
= N
->getNumValues();
814 unsigned OldNumVals
= SU
->getNode()->getNumValues();
815 for (unsigned i
= 0; i
!= NumVals
; ++i
)
816 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), i
), SDValue(N
, i
));
817 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), OldNumVals
-1),
818 SDValue(LoadNode
, 1));
820 // LoadNode may already exist. This can happen when there is another
821 // load from the same location and producing the same type of value
822 // but it has different alignment or volatileness.
823 bool isNewLoad
= true;
825 if (LoadNode
->getNodeId() != -1) {
826 LoadSU
= &SUnits
[LoadNode
->getNodeId()];
829 LoadSU
= CreateNewSUnit(LoadNode
);
830 LoadNode
->setNodeId(LoadSU
->NodeNum
);
832 InitNumRegDefsLeft(LoadSU
);
833 ComputeLatency(LoadSU
);
836 SUnit
*NewSU
= CreateNewSUnit(N
);
837 assert(N
->getNodeId() == -1 && "Node already inserted!");
838 N
->setNodeId(NewSU
->NodeNum
);
840 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
841 for (unsigned i
= 0; i
!= MCID
.getNumOperands(); ++i
) {
842 if (MCID
.getOperandConstraint(i
, MCOI::TIED_TO
) != -1) {
843 NewSU
->isTwoAddress
= true;
847 if (MCID
.isCommutable())
848 NewSU
->isCommutable
= true;
850 InitNumRegDefsLeft(NewSU
);
851 ComputeLatency(NewSU
);
853 // Record all the edges to and from the old SU, by category.
854 SmallVector
<SDep
, 4> ChainPreds
;
855 SmallVector
<SDep
, 4> ChainSuccs
;
856 SmallVector
<SDep
, 4> LoadPreds
;
857 SmallVector
<SDep
, 4> NodePreds
;
858 SmallVector
<SDep
, 4> NodeSuccs
;
859 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
862 ChainPreds
.push_back(*I
);
863 else if (isOperandOf(I
->getSUnit(), LoadNode
))
864 LoadPreds
.push_back(*I
);
866 NodePreds
.push_back(*I
);
868 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
871 ChainSuccs
.push_back(*I
);
873 NodeSuccs
.push_back(*I
);
876 // Now assign edges to the newly-created nodes.
877 for (unsigned i
= 0, e
= ChainPreds
.size(); i
!= e
; ++i
) {
878 const SDep
&Pred
= ChainPreds
[i
];
879 RemovePred(SU
, Pred
);
881 AddPred(LoadSU
, Pred
);
883 for (unsigned i
= 0, e
= LoadPreds
.size(); i
!= e
; ++i
) {
884 const SDep
&Pred
= LoadPreds
[i
];
885 RemovePred(SU
, Pred
);
887 AddPred(LoadSU
, Pred
);
889 for (unsigned i
= 0, e
= NodePreds
.size(); i
!= e
; ++i
) {
890 const SDep
&Pred
= NodePreds
[i
];
891 RemovePred(SU
, Pred
);
892 AddPred(NewSU
, Pred
);
894 for (unsigned i
= 0, e
= NodeSuccs
.size(); i
!= e
; ++i
) {
895 SDep D
= NodeSuccs
[i
];
896 SUnit
*SuccDep
= D
.getSUnit();
898 RemovePred(SuccDep
, D
);
901 // Balance register pressure.
902 if (AvailableQueue
->tracksRegPressure() && SuccDep
->isScheduled
903 && !D
.isCtrl() && NewSU
->NumRegDefsLeft
> 0)
904 --NewSU
->NumRegDefsLeft
;
906 for (unsigned i
= 0, e
= ChainSuccs
.size(); i
!= e
; ++i
) {
907 SDep D
= ChainSuccs
[i
];
908 SUnit
*SuccDep
= D
.getSUnit();
910 RemovePred(SuccDep
, D
);
917 // Add a data dependency to reflect that NewSU reads the value defined
919 AddPred(NewSU
, SDep(LoadSU
, SDep::Data
, LoadSU
->Latency
));
922 AvailableQueue
->addNode(LoadSU
);
923 AvailableQueue
->addNode(NewSU
);
927 if (NewSU
->NumSuccsLeft
== 0) {
928 NewSU
->isAvailable
= true;
934 DEBUG(dbgs() << " Duplicating SU #" << SU
->NodeNum
<< "\n");
935 NewSU
= CreateClone(SU
);
937 // New SUnit has the exact same predecessors.
938 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
940 if (!I
->isArtificial())
943 // Only copy scheduled successors. Cut them from old node's successor
944 // list and move them over.
945 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
946 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
948 if (I
->isArtificial())
950 SUnit
*SuccSU
= I
->getSUnit();
951 if (SuccSU
->isScheduled
) {
956 DelDeps
.push_back(std::make_pair(SuccSU
, D
));
959 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
960 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
962 AvailableQueue
->updateNode(SU
);
963 AvailableQueue
->addNode(NewSU
);
969 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
970 /// scheduled successors of the given SUnit to the last copy.
971 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit
*SU
, unsigned Reg
,
972 const TargetRegisterClass
*DestRC
,
973 const TargetRegisterClass
*SrcRC
,
974 SmallVector
<SUnit
*, 2> &Copies
) {
975 SUnit
*CopyFromSU
= CreateNewSUnit(NULL
);
976 CopyFromSU
->CopySrcRC
= SrcRC
;
977 CopyFromSU
->CopyDstRC
= DestRC
;
979 SUnit
*CopyToSU
= CreateNewSUnit(NULL
);
980 CopyToSU
->CopySrcRC
= DestRC
;
981 CopyToSU
->CopyDstRC
= SrcRC
;
983 // Only copy scheduled successors. Cut them from old node's successor
984 // list and move them over.
985 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
986 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
988 if (I
->isArtificial())
990 SUnit
*SuccSU
= I
->getSUnit();
991 if (SuccSU
->isScheduled
) {
993 D
.setSUnit(CopyToSU
);
995 DelDeps
.push_back(std::make_pair(SuccSU
, *I
));
998 // Avoid scheduling the def-side copy before other successors. Otherwise
999 // we could introduce another physreg interference on the copy and
1000 // continue inserting copies indefinitely.
1001 SDep
D(CopyFromSU
, SDep::Order
, /*Latency=*/0,
1002 /*Reg=*/0, /*isNormalMemory=*/false,
1003 /*isMustAlias=*/false, /*isArtificial=*/true);
1007 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
1008 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
1010 AddPred(CopyFromSU
, SDep(SU
, SDep::Data
, SU
->Latency
, Reg
));
1011 AddPred(CopyToSU
, SDep(CopyFromSU
, SDep::Data
, CopyFromSU
->Latency
, 0));
1013 AvailableQueue
->updateNode(SU
);
1014 AvailableQueue
->addNode(CopyFromSU
);
1015 AvailableQueue
->addNode(CopyToSU
);
1016 Copies
.push_back(CopyFromSU
);
1017 Copies
.push_back(CopyToSU
);
1022 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1023 /// definition of the specified node.
1024 /// FIXME: Move to SelectionDAG?
1025 static EVT
getPhysicalRegisterVT(SDNode
*N
, unsigned Reg
,
1026 const TargetInstrInfo
*TII
) {
1027 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
1028 assert(MCID
.ImplicitDefs
&& "Physical reg def must be in implicit def list!");
1029 unsigned NumRes
= MCID
.getNumDefs();
1030 for (const unsigned *ImpDef
= MCID
.getImplicitDefs(); *ImpDef
; ++ImpDef
) {
1035 return N
->getValueType(NumRes
);
1038 /// CheckForLiveRegDef - Return true and update live register vector if the
1039 /// specified register def of the specified SUnit clobbers any "live" registers.
1040 static void CheckForLiveRegDef(SUnit
*SU
, unsigned Reg
,
1041 std::vector
<SUnit
*> &LiveRegDefs
,
1042 SmallSet
<unsigned, 4> &RegAdded
,
1043 SmallVector
<unsigned, 4> &LRegs
,
1044 const TargetRegisterInfo
*TRI
) {
1045 for (const unsigned *AliasI
= TRI
->getOverlaps(Reg
); *AliasI
; ++AliasI
) {
1047 // Check if Ref is live.
1048 if (!LiveRegDefs
[*AliasI
]) continue;
1050 // Allow multiple uses of the same def.
1051 if (LiveRegDefs
[*AliasI
] == SU
) continue;
1053 // Add Reg to the set of interfering live regs.
1054 if (RegAdded
.insert(*AliasI
)) {
1055 LRegs
.push_back(*AliasI
);
1060 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1061 /// scheduling of the given node to satisfy live physical register dependencies.
1062 /// If the specific node is the last one that's available to schedule, do
1063 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1064 bool ScheduleDAGRRList::
1065 DelayForLiveRegsBottomUp(SUnit
*SU
, SmallVector
<unsigned, 4> &LRegs
) {
1066 if (NumLiveRegs
== 0)
1069 SmallSet
<unsigned, 4> RegAdded
;
1070 // If this node would clobber any "live" register, then it's not ready.
1072 // If SU is the currently live definition of the same register that it uses,
1073 // then we are free to schedule it.
1074 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
1076 if (I
->isAssignedRegDep() && LiveRegDefs
[I
->getReg()] != SU
)
1077 CheckForLiveRegDef(I
->getSUnit(), I
->getReg(), LiveRegDefs
,
1078 RegAdded
, LRegs
, TRI
);
1081 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getGluedNode()) {
1082 if (Node
->getOpcode() == ISD::INLINEASM
) {
1083 // Inline asm can clobber physical defs.
1084 unsigned NumOps
= Node
->getNumOperands();
1085 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Glue
)
1086 --NumOps
; // Ignore the glue operand.
1088 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
1090 cast
<ConstantSDNode
>(Node
->getOperand(i
))->getZExtValue();
1091 unsigned NumVals
= InlineAsm::getNumOperandRegisters(Flags
);
1093 ++i
; // Skip the ID value.
1094 if (InlineAsm::isRegDefKind(Flags
) ||
1095 InlineAsm::isRegDefEarlyClobberKind(Flags
) ||
1096 InlineAsm::isClobberKind(Flags
)) {
1097 // Check for def of register or earlyclobber register.
1098 for (; NumVals
; --NumVals
, ++i
) {
1099 unsigned Reg
= cast
<RegisterSDNode
>(Node
->getOperand(i
))->getReg();
1100 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
1101 CheckForLiveRegDef(SU
, Reg
, LiveRegDefs
, RegAdded
, LRegs
, TRI
);
1109 if (!Node
->isMachineOpcode())
1111 const MCInstrDesc
&MCID
= TII
->get(Node
->getMachineOpcode());
1112 if (!MCID
.ImplicitDefs
)
1114 for (const unsigned *Reg
= MCID
.ImplicitDefs
; *Reg
; ++Reg
)
1115 CheckForLiveRegDef(SU
, *Reg
, LiveRegDefs
, RegAdded
, LRegs
, TRI
);
1118 return !LRegs
.empty();
1121 /// Return a node that can be scheduled in this cycle. Requirements:
1122 /// (1) Ready: latency has been satisfied
1123 /// (2) No Hazards: resources are available
1124 /// (3) No Interferences: may unschedule to break register interferences.
1125 SUnit
*ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1126 SmallVector
<SUnit
*, 4> Interferences
;
1127 DenseMap
<SUnit
*, SmallVector
<unsigned, 4> > LRegsMap
;
1129 SUnit
*CurSU
= AvailableQueue
->pop();
1131 SmallVector
<unsigned, 4> LRegs
;
1132 if (!DelayForLiveRegsBottomUp(CurSU
, LRegs
))
1134 LRegsMap
.insert(std::make_pair(CurSU
, LRegs
));
1136 CurSU
->isPending
= true; // This SU is not in AvailableQueue right now.
1137 Interferences
.push_back(CurSU
);
1138 CurSU
= AvailableQueue
->pop();
1141 // Add the nodes that aren't ready back onto the available list.
1142 for (unsigned i
= 0, e
= Interferences
.size(); i
!= e
; ++i
) {
1143 Interferences
[i
]->isPending
= false;
1144 assert(Interferences
[i
]->isAvailable
&& "must still be available");
1145 AvailableQueue
->push(Interferences
[i
]);
1150 // All candidates are delayed due to live physical reg dependencies.
1151 // Try backtracking, code duplication, or inserting cross class copies
1153 for (unsigned i
= 0, e
= Interferences
.size(); i
!= e
; ++i
) {
1154 SUnit
*TrySU
= Interferences
[i
];
1155 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
1157 // Try unscheduling up to the point where it's safe to schedule
1160 unsigned LiveCycle
= UINT_MAX
;
1161 for (unsigned j
= 0, ee
= LRegs
.size(); j
!= ee
; ++j
) {
1162 unsigned Reg
= LRegs
[j
];
1163 if (LiveRegGens
[Reg
]->getHeight() < LiveCycle
) {
1164 BtSU
= LiveRegGens
[Reg
];
1165 LiveCycle
= BtSU
->getHeight();
1168 if (!WillCreateCycle(TrySU
, BtSU
)) {
1169 BacktrackBottomUp(TrySU
, BtSU
);
1171 // Force the current node to be scheduled before the node that
1172 // requires the physical reg dep.
1173 if (BtSU
->isAvailable
) {
1174 BtSU
->isAvailable
= false;
1175 if (!BtSU
->isPending
)
1176 AvailableQueue
->remove(BtSU
);
1178 AddPred(TrySU
, SDep(BtSU
, SDep::Order
, /*Latency=*/1,
1179 /*Reg=*/0, /*isNormalMemory=*/false,
1180 /*isMustAlias=*/false, /*isArtificial=*/true));
1182 // If one or more successors has been unscheduled, then the current
1183 // node is no longer avaialable. Schedule a successor that's now
1184 // available instead.
1185 if (!TrySU
->isAvailable
) {
1186 CurSU
= AvailableQueue
->pop();
1190 TrySU
->isPending
= false;
1191 Interferences
.erase(Interferences
.begin()+i
);
1198 // Can't backtrack. If it's too expensive to copy the value, then try
1199 // duplicate the nodes that produces these "too expensive to copy"
1200 // values to break the dependency. In case even that doesn't work,
1201 // insert cross class copies.
1202 // If it's not too expensive, i.e. cost != -1, issue copies.
1203 SUnit
*TrySU
= Interferences
[0];
1204 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
1205 assert(LRegs
.size() == 1 && "Can't handle this yet!");
1206 unsigned Reg
= LRegs
[0];
1207 SUnit
*LRDef
= LiveRegDefs
[Reg
];
1208 EVT VT
= getPhysicalRegisterVT(LRDef
->getNode(), Reg
, TII
);
1209 const TargetRegisterClass
*RC
=
1210 TRI
->getMinimalPhysRegClass(Reg
, VT
);
1211 const TargetRegisterClass
*DestRC
= TRI
->getCrossCopyRegClass(RC
);
1213 // If cross copy register class is the same as RC, then it must be possible
1214 // copy the value directly. Do not try duplicate the def.
1215 // If cross copy register class is not the same as RC, then it's possible to
1216 // copy the value but it require cross register class copies and it is
1218 // If cross copy register class is null, then it's not possible to copy
1219 // the value at all.
1222 NewDef
= CopyAndMoveSuccessors(LRDef
);
1223 if (!DestRC
&& !NewDef
)
1224 report_fatal_error("Can't handle live physical register dependency!");
1227 // Issue copies, these can be expensive cross register class copies.
1228 SmallVector
<SUnit
*, 2> Copies
;
1229 InsertCopiesAndMoveSuccs(LRDef
, Reg
, DestRC
, RC
, Copies
);
1230 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU
->NodeNum
1231 << " to SU #" << Copies
.front()->NodeNum
<< "\n");
1232 AddPred(TrySU
, SDep(Copies
.front(), SDep::Order
, /*Latency=*/1,
1233 /*Reg=*/0, /*isNormalMemory=*/false,
1234 /*isMustAlias=*/false,
1235 /*isArtificial=*/true));
1236 NewDef
= Copies
.back();
1239 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef
->NodeNum
1240 << " to SU #" << TrySU
->NodeNum
<< "\n");
1241 LiveRegDefs
[Reg
] = NewDef
;
1242 AddPred(NewDef
, SDep(TrySU
, SDep::Order
, /*Latency=*/1,
1243 /*Reg=*/0, /*isNormalMemory=*/false,
1244 /*isMustAlias=*/false,
1245 /*isArtificial=*/true));
1246 TrySU
->isAvailable
= false;
1250 assert(CurSU
&& "Unable to resolve live physical register dependencies!");
1252 // Add the nodes that aren't ready back onto the available list.
1253 for (unsigned i
= 0, e
= Interferences
.size(); i
!= e
; ++i
) {
1254 Interferences
[i
]->isPending
= false;
1255 // May no longer be available due to backtracking.
1256 if (Interferences
[i
]->isAvailable
) {
1257 AvailableQueue
->push(Interferences
[i
]);
1263 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1265 void ScheduleDAGRRList::ListScheduleBottomUp() {
1266 // Release any predecessors of the special Exit node.
1267 ReleasePredecessors(&ExitSU
);
1269 // Add root to Available queue.
1270 if (!SUnits
.empty()) {
1271 SUnit
*RootSU
= &SUnits
[DAG
->getRoot().getNode()->getNodeId()];
1272 assert(RootSU
->Succs
.empty() && "Graph root shouldn't have successors!");
1273 RootSU
->isAvailable
= true;
1274 AvailableQueue
->push(RootSU
);
1277 // While Available queue is not empty, grab the node with the highest
1278 // priority. If it is not ready put it back. Schedule the node.
1279 Sequence
.reserve(SUnits
.size());
1280 while (!AvailableQueue
->empty()) {
1281 DEBUG(dbgs() << "\nExamining Available:\n";
1282 AvailableQueue
->dump(this));
1284 // Pick the best node to schedule taking all constraints into
1286 SUnit
*SU
= PickNodeToScheduleBottomUp();
1288 AdvancePastStalls(SU
);
1290 ScheduleNodeBottomUp(SU
);
1292 while (AvailableQueue
->empty() && !PendingQueue
.empty()) {
1293 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1294 assert(MinAvailableCycle
< UINT_MAX
&& "MinAvailableCycle uninitialized");
1295 AdvanceToCycle(std::max(CurCycle
+ 1, MinAvailableCycle
));
1299 // Reverse the order if it is bottom up.
1300 std::reverse(Sequence
.begin(), Sequence
.end());
1303 VerifySchedule(isBottomUp
);
1307 //===----------------------------------------------------------------------===//
1308 // Top-Down Scheduling
1309 //===----------------------------------------------------------------------===//
1311 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1312 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1313 void ScheduleDAGRRList::ReleaseSucc(SUnit
*SU
, const SDep
*SuccEdge
) {
1314 SUnit
*SuccSU
= SuccEdge
->getSUnit();
1317 if (SuccSU
->NumPredsLeft
== 0) {
1318 dbgs() << "*** Scheduling failed! ***\n";
1320 dbgs() << " has been released too many times!\n";
1321 llvm_unreachable(0);
1324 --SuccSU
->NumPredsLeft
;
1326 // If all the node's predecessors are scheduled, this node is ready
1327 // to be scheduled. Ignore the special ExitSU node.
1328 if (SuccSU
->NumPredsLeft
== 0 && SuccSU
!= &ExitSU
) {
1329 SuccSU
->isAvailable
= true;
1330 AvailableQueue
->push(SuccSU
);
1334 void ScheduleDAGRRList::ReleaseSuccessors(SUnit
*SU
) {
1335 // Top down: release successors
1336 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
1338 assert(!I
->isAssignedRegDep() &&
1339 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1341 ReleaseSucc(SU
, &*I
);
1345 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1346 /// count of its successors. If a successor pending count is zero, add it to
1347 /// the Available queue.
1348 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit
*SU
) {
1349 DEBUG(dbgs() << "*** Scheduling [" << CurCycle
<< "]: ");
1350 DEBUG(SU
->dump(this));
1352 assert(CurCycle
>= SU
->getDepth() && "Node scheduled above its depth!");
1353 SU
->setDepthToAtLeast(CurCycle
);
1354 Sequence
.push_back(SU
);
1356 ReleaseSuccessors(SU
);
1357 SU
->isScheduled
= true;
1358 AvailableQueue
->ScheduledNode(SU
);
1361 /// ListScheduleTopDown - The main loop of list scheduling for top-down
1363 void ScheduleDAGRRList::ListScheduleTopDown() {
1364 AvailableQueue
->setCurCycle(CurCycle
);
1366 // Release any successors of the special Entry node.
1367 ReleaseSuccessors(&EntrySU
);
1369 // All leaves to Available queue.
1370 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
1371 // It is available if it has no predecessors.
1372 if (SUnits
[i
].Preds
.empty()) {
1373 AvailableQueue
->push(&SUnits
[i
]);
1374 SUnits
[i
].isAvailable
= true;
1378 // While Available queue is not empty, grab the node with the highest
1379 // priority. If it is not ready put it back. Schedule the node.
1380 Sequence
.reserve(SUnits
.size());
1381 while (!AvailableQueue
->empty()) {
1382 SUnit
*CurSU
= AvailableQueue
->pop();
1385 ScheduleNodeTopDown(CurSU
);
1387 AvailableQueue
->setCurCycle(CurCycle
);
1391 VerifySchedule(isBottomUp
);
1396 //===----------------------------------------------------------------------===//
1397 // RegReductionPriorityQueue Definition
1398 //===----------------------------------------------------------------------===//
1400 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1401 // to reduce register pressure.
1404 class RegReductionPQBase
;
1406 struct queue_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
1407 bool isReady(SUnit
* SU
, unsigned CurCycle
) const { return true; }
1412 struct reverse_sort
: public queue_sort
{
1414 reverse_sort(SF
&sf
) : SortFunc(sf
) {}
1415 reverse_sort(const reverse_sort
&RHS
) : SortFunc(RHS
.SortFunc
) {}
1417 bool operator()(SUnit
* left
, SUnit
* right
) const {
1418 // reverse left/right rather than simply !SortFunc(left, right)
1419 // to expose different paths in the comparison logic.
1420 return SortFunc(right
, left
);
1425 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1426 // reduction scheduler.
1427 struct bu_ls_rr_sort
: public queue_sort
{
1430 HasReadyFilter
= false
1433 RegReductionPQBase
*SPQ
;
1434 bu_ls_rr_sort(RegReductionPQBase
*spq
) : SPQ(spq
) {}
1435 bu_ls_rr_sort(const bu_ls_rr_sort
&RHS
) : SPQ(RHS
.SPQ
) {}
1437 bool operator()(SUnit
* left
, SUnit
* right
) const;
1440 // td_ls_rr_sort - Priority function for top down register pressure reduction
1442 struct td_ls_rr_sort
: public queue_sort
{
1445 HasReadyFilter
= false
1448 RegReductionPQBase
*SPQ
;
1449 td_ls_rr_sort(RegReductionPQBase
*spq
) : SPQ(spq
) {}
1450 td_ls_rr_sort(const td_ls_rr_sort
&RHS
) : SPQ(RHS
.SPQ
) {}
1452 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
1455 // src_ls_rr_sort - Priority function for source order scheduler.
1456 struct src_ls_rr_sort
: public queue_sort
{
1459 HasReadyFilter
= false
1462 RegReductionPQBase
*SPQ
;
1463 src_ls_rr_sort(RegReductionPQBase
*spq
)
1465 src_ls_rr_sort(const src_ls_rr_sort
&RHS
)
1468 bool operator()(SUnit
* left
, SUnit
* right
) const;
1471 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1472 struct hybrid_ls_rr_sort
: public queue_sort
{
1475 HasReadyFilter
= false
1478 RegReductionPQBase
*SPQ
;
1479 hybrid_ls_rr_sort(RegReductionPQBase
*spq
)
1481 hybrid_ls_rr_sort(const hybrid_ls_rr_sort
&RHS
)
1484 bool isReady(SUnit
*SU
, unsigned CurCycle
) const;
1486 bool operator()(SUnit
* left
, SUnit
* right
) const;
1489 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1491 struct ilp_ls_rr_sort
: public queue_sort
{
1494 HasReadyFilter
= false
1497 RegReductionPQBase
*SPQ
;
1498 ilp_ls_rr_sort(RegReductionPQBase
*spq
)
1500 ilp_ls_rr_sort(const ilp_ls_rr_sort
&RHS
)
1503 bool isReady(SUnit
*SU
, unsigned CurCycle
) const;
1505 bool operator()(SUnit
* left
, SUnit
* right
) const;
1508 class RegReductionPQBase
: public SchedulingPriorityQueue
{
1510 std::vector
<SUnit
*> Queue
;
1511 unsigned CurQueueId
;
1512 bool TracksRegPressure
;
1514 // SUnits - The SUnits for the current graph.
1515 std::vector
<SUnit
> *SUnits
;
1517 MachineFunction
&MF
;
1518 const TargetInstrInfo
*TII
;
1519 const TargetRegisterInfo
*TRI
;
1520 const TargetLowering
*TLI
;
1521 ScheduleDAGRRList
*scheduleDAG
;
1523 // SethiUllmanNumbers - The SethiUllman number for each node.
1524 std::vector
<unsigned> SethiUllmanNumbers
;
1526 /// RegPressure - Tracking current reg pressure per register class.
1528 std::vector
<unsigned> RegPressure
;
1530 /// RegLimit - Tracking the number of allocatable registers per register
1532 std::vector
<unsigned> RegLimit
;
1535 RegReductionPQBase(MachineFunction
&mf
,
1536 bool hasReadyFilter
,
1538 const TargetInstrInfo
*tii
,
1539 const TargetRegisterInfo
*tri
,
1540 const TargetLowering
*tli
)
1541 : SchedulingPriorityQueue(hasReadyFilter
),
1542 CurQueueId(0), TracksRegPressure(tracksrp
),
1543 MF(mf
), TII(tii
), TRI(tri
), TLI(tli
), scheduleDAG(NULL
) {
1544 if (TracksRegPressure
) {
1545 unsigned NumRC
= TRI
->getNumRegClasses();
1546 RegLimit
.resize(NumRC
);
1547 RegPressure
.resize(NumRC
);
1548 std::fill(RegLimit
.begin(), RegLimit
.end(), 0);
1549 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
1550 for (TargetRegisterInfo::regclass_iterator I
= TRI
->regclass_begin(),
1551 E
= TRI
->regclass_end(); I
!= E
; ++I
)
1552 RegLimit
[(*I
)->getID()] = tri
->getRegPressureLimit(*I
, MF
);
1556 void setScheduleDAG(ScheduleDAGRRList
*scheduleDag
) {
1557 scheduleDAG
= scheduleDag
;
1560 ScheduleHazardRecognizer
* getHazardRec() {
1561 return scheduleDAG
->getHazardRec();
1564 void initNodes(std::vector
<SUnit
> &sunits
);
1566 void addNode(const SUnit
*SU
);
1568 void updateNode(const SUnit
*SU
);
1570 void releaseState() {
1572 SethiUllmanNumbers
.clear();
1573 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
1576 unsigned getNodePriority(const SUnit
*SU
) const;
1578 unsigned getNodeOrdering(const SUnit
*SU
) const {
1579 if (!SU
->getNode()) return 0;
1581 return scheduleDAG
->DAG
->GetOrdering(SU
->getNode());
1584 bool empty() const { return Queue
.empty(); }
1586 void push(SUnit
*U
) {
1587 assert(!U
->NodeQueueId
&& "Node in the queue already");
1588 U
->NodeQueueId
= ++CurQueueId
;
1592 void remove(SUnit
*SU
) {
1593 assert(!Queue
.empty() && "Queue is empty!");
1594 assert(SU
->NodeQueueId
!= 0 && "Not in queue!");
1595 std::vector
<SUnit
*>::iterator I
= std::find(Queue
.begin(), Queue
.end(),
1597 if (I
!= prior(Queue
.end()))
1598 std::swap(*I
, Queue
.back());
1600 SU
->NodeQueueId
= 0;
1603 bool tracksRegPressure() const { return TracksRegPressure
; }
1605 void dumpRegPressure() const;
1607 bool HighRegPressure(const SUnit
*SU
) const;
1609 bool MayReduceRegPressure(SUnit
*SU
) const;
1611 int RegPressureDiff(SUnit
*SU
, unsigned &LiveUses
) const;
1613 void ScheduledNode(SUnit
*SU
);
1615 void UnscheduledNode(SUnit
*SU
);
1618 bool canClobber(const SUnit
*SU
, const SUnit
*Op
);
1619 void AddPseudoTwoAddrDeps();
1620 void PrescheduleNodesWithMultipleUses();
1621 void CalculateSethiUllmanNumbers();
1625 static SUnit
*popFromQueueImpl(std::vector
<SUnit
*> &Q
, SF
&Picker
) {
1626 std::vector
<SUnit
*>::iterator Best
= Q
.begin();
1627 for (std::vector
<SUnit
*>::iterator I
= llvm::next(Q
.begin()),
1628 E
= Q
.end(); I
!= E
; ++I
)
1629 if (Picker(*Best
, *I
))
1632 if (Best
!= prior(Q
.end()))
1633 std::swap(*Best
, Q
.back());
1639 SUnit
*popFromQueue(std::vector
<SUnit
*> &Q
, SF
&Picker
, ScheduleDAG
*DAG
) {
1641 if (DAG
->StressSched
) {
1642 reverse_sort
<SF
> RPicker(Picker
);
1643 return popFromQueueImpl(Q
, RPicker
);
1647 return popFromQueueImpl(Q
, Picker
);
1651 class RegReductionPriorityQueue
: public RegReductionPQBase
{
1655 RegReductionPriorityQueue(MachineFunction
&mf
,
1657 const TargetInstrInfo
*tii
,
1658 const TargetRegisterInfo
*tri
,
1659 const TargetLowering
*tli
)
1660 : RegReductionPQBase(mf
, SF::HasReadyFilter
, tracksrp
, tii
, tri
, tli
),
1663 bool isBottomUp() const { return SF::IsBottomUp
; }
1665 bool isReady(SUnit
*U
) const {
1666 return Picker
.HasReadyFilter
&& Picker
.isReady(U
, getCurCycle());
1670 if (Queue
.empty()) return NULL
;
1672 SUnit
*V
= popFromQueue(Queue
, Picker
, scheduleDAG
);
1677 void dump(ScheduleDAG
*DAG
) const {
1678 // Emulate pop() without clobbering NodeQueueIds.
1679 std::vector
<SUnit
*> DumpQueue
= Queue
;
1680 SF DumpPicker
= Picker
;
1681 while (!DumpQueue
.empty()) {
1682 SUnit
*SU
= popFromQueue(DumpQueue
, DumpPicker
, scheduleDAG
);
1684 dbgs() << "Height " << SU
->getHeight() << ": ";
1686 dbgs() << "Depth " << SU
->getDepth() << ": ";
1692 typedef RegReductionPriorityQueue
<bu_ls_rr_sort
>
1693 BURegReductionPriorityQueue
;
1695 typedef RegReductionPriorityQueue
<td_ls_rr_sort
>
1696 TDRegReductionPriorityQueue
;
1698 typedef RegReductionPriorityQueue
<src_ls_rr_sort
>
1699 SrcRegReductionPriorityQueue
;
1701 typedef RegReductionPriorityQueue
<hybrid_ls_rr_sort
>
1702 HybridBURRPriorityQueue
;
1704 typedef RegReductionPriorityQueue
<ilp_ls_rr_sort
>
1705 ILPBURRPriorityQueue
;
1706 } // end anonymous namespace
1708 //===----------------------------------------------------------------------===//
1709 // Static Node Priority for Register Pressure Reduction
1710 //===----------------------------------------------------------------------===//
1712 // Check for special nodes that bypass scheduling heuristics.
1713 // Currently this pushes TokenFactor nodes down, but may be used for other
1714 // pseudo-ops as well.
1716 // Return -1 to schedule right above left, 1 for left above right.
1717 // Return 0 if no bias exists.
1718 static int checkSpecialNodes(const SUnit
*left
, const SUnit
*right
) {
1719 bool LSchedLow
= left
->isScheduleLow
;
1720 bool RSchedLow
= right
->isScheduleLow
;
1721 if (LSchedLow
!= RSchedLow
)
1722 return LSchedLow
< RSchedLow
? 1 : -1;
1726 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1727 /// Smaller number is the higher priority.
1729 CalcNodeSethiUllmanNumber(const SUnit
*SU
, std::vector
<unsigned> &SUNumbers
) {
1730 unsigned &SethiUllmanNumber
= SUNumbers
[SU
->NodeNum
];
1731 if (SethiUllmanNumber
!= 0)
1732 return SethiUllmanNumber
;
1735 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
1737 if (I
->isCtrl()) continue; // ignore chain preds
1738 SUnit
*PredSU
= I
->getSUnit();
1739 unsigned PredSethiUllman
= CalcNodeSethiUllmanNumber(PredSU
, SUNumbers
);
1740 if (PredSethiUllman
> SethiUllmanNumber
) {
1741 SethiUllmanNumber
= PredSethiUllman
;
1743 } else if (PredSethiUllman
== SethiUllmanNumber
)
1747 SethiUllmanNumber
+= Extra
;
1749 if (SethiUllmanNumber
== 0)
1750 SethiUllmanNumber
= 1;
1752 return SethiUllmanNumber
;
1755 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1756 /// scheduling units.
1757 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1758 SethiUllmanNumbers
.assign(SUnits
->size(), 0);
1760 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
)
1761 CalcNodeSethiUllmanNumber(&(*SUnits
)[i
], SethiUllmanNumbers
);
1764 void RegReductionPQBase::addNode(const SUnit
*SU
) {
1765 unsigned SUSize
= SethiUllmanNumbers
.size();
1766 if (SUnits
->size() > SUSize
)
1767 SethiUllmanNumbers
.resize(SUSize
*2, 0);
1768 CalcNodeSethiUllmanNumber(SU
, SethiUllmanNumbers
);
1771 void RegReductionPQBase::updateNode(const SUnit
*SU
) {
1772 SethiUllmanNumbers
[SU
->NodeNum
] = 0;
1773 CalcNodeSethiUllmanNumber(SU
, SethiUllmanNumbers
);
1776 // Lower priority means schedule further down. For bottom-up scheduling, lower
1777 // priority SUs are scheduled before higher priority SUs.
1778 unsigned RegReductionPQBase::getNodePriority(const SUnit
*SU
) const {
1779 assert(SU
->NodeNum
< SethiUllmanNumbers
.size());
1780 unsigned Opc
= SU
->getNode() ? SU
->getNode()->getOpcode() : 0;
1781 if (Opc
== ISD::TokenFactor
|| Opc
== ISD::CopyToReg
)
1782 // CopyToReg should be close to its uses to facilitate coalescing and
1785 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
1786 Opc
== TargetOpcode::SUBREG_TO_REG
||
1787 Opc
== TargetOpcode::INSERT_SUBREG
)
1788 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1789 // close to their uses to facilitate coalescing.
1791 if (SU
->NumSuccs
== 0 && SU
->NumPreds
!= 0)
1792 // If SU does not have a register use, i.e. it doesn't produce a value
1793 // that would be consumed (e.g. store), then it terminates a chain of
1794 // computation. Give it a large SethiUllman number so it will be
1795 // scheduled right before its predecessors that it doesn't lengthen
1796 // their live ranges.
1798 if (SU
->NumPreds
== 0 && SU
->NumSuccs
!= 0)
1799 // If SU does not have a register def, schedule it close to its uses
1800 // because it does not lengthen any live ranges.
1803 return SethiUllmanNumbers
[SU
->NodeNum
];
1805 unsigned Priority
= SethiUllmanNumbers
[SU
->NodeNum
];
1807 // FIXME: This assumes all of the defs are used as call operands.
1808 int NP
= (int)Priority
- SU
->getNode()->getNumValues();
1809 return (NP
> 0) ? NP
: 0;
1815 //===----------------------------------------------------------------------===//
1816 // Register Pressure Tracking
1817 //===----------------------------------------------------------------------===//
1819 void RegReductionPQBase::dumpRegPressure() const {
1820 for (TargetRegisterInfo::regclass_iterator I
= TRI
->regclass_begin(),
1821 E
= TRI
->regclass_end(); I
!= E
; ++I
) {
1822 const TargetRegisterClass
*RC
= *I
;
1823 unsigned Id
= RC
->getID();
1824 unsigned RP
= RegPressure
[Id
];
1826 DEBUG(dbgs() << RC
->getName() << ": " << RP
<< " / " << RegLimit
[Id
]
1831 bool RegReductionPQBase::HighRegPressure(const SUnit
*SU
) const {
1835 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(),E
= SU
->Preds
.end();
1839 SUnit
*PredSU
= I
->getSUnit();
1840 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1841 // to cover the number of registers defined (they are all live).
1842 if (PredSU
->NumRegDefsLeft
== 0) {
1845 for (ScheduleDAGSDNodes::RegDefIter
RegDefPos(PredSU
, scheduleDAG
);
1846 RegDefPos
.IsValid(); RegDefPos
.Advance()) {
1847 unsigned RCId
, Cost
;
1848 GetCostForDef(RegDefPos
, TLI
, TII
, TRI
, RCId
, Cost
);
1850 if ((RegPressure
[RCId
] + Cost
) >= RegLimit
[RCId
])
1857 bool RegReductionPQBase::MayReduceRegPressure(SUnit
*SU
) const {
1858 const SDNode
*N
= SU
->getNode();
1860 if (!N
->isMachineOpcode() || !SU
->NumSuccs
)
1863 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
1864 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
1865 EVT VT
= N
->getValueType(i
);
1866 if (!N
->hasAnyUseOfValue(i
))
1868 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1869 if (RegPressure
[RCId
] >= RegLimit
[RCId
])
1875 // Compute the register pressure contribution by this instruction by count up
1876 // for uses that are not live and down for defs. Only count register classes
1877 // that are already under high pressure. As a side effect, compute the number of
1878 // uses of registers that are already live.
1880 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1881 // so could probably be factored.
1882 int RegReductionPQBase::RegPressureDiff(SUnit
*SU
, unsigned &LiveUses
) const {
1885 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(),E
= SU
->Preds
.end();
1889 SUnit
*PredSU
= I
->getSUnit();
1890 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1891 // to cover the number of registers defined (they are all live).
1892 if (PredSU
->NumRegDefsLeft
== 0) {
1893 if (PredSU
->getNode()->isMachineOpcode())
1897 for (ScheduleDAGSDNodes::RegDefIter
RegDefPos(PredSU
, scheduleDAG
);
1898 RegDefPos
.IsValid(); RegDefPos
.Advance()) {
1899 EVT VT
= RegDefPos
.GetValue();
1900 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1901 if (RegPressure
[RCId
] >= RegLimit
[RCId
])
1905 const SDNode
*N
= SU
->getNode();
1907 if (!N
|| !N
->isMachineOpcode() || !SU
->NumSuccs
)
1910 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
1911 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
1912 EVT VT
= N
->getValueType(i
);
1913 if (!N
->hasAnyUseOfValue(i
))
1915 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1916 if (RegPressure
[RCId
] >= RegLimit
[RCId
])
1922 void RegReductionPQBase::ScheduledNode(SUnit
*SU
) {
1923 if (!TracksRegPressure
)
1929 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
1933 SUnit
*PredSU
= I
->getSUnit();
1934 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1935 // to cover the number of registers defined (they are all live).
1936 if (PredSU
->NumRegDefsLeft
== 0) {
1939 // FIXME: The ScheduleDAG currently loses information about which of a
1940 // node's values is consumed by each dependence. Consequently, if the node
1941 // defines multiple register classes, we don't know which to pressurize
1942 // here. Instead the following loop consumes the register defs in an
1943 // arbitrary order. At least it handles the common case of clustered loads
1944 // to the same class. For precise liveness, each SDep needs to indicate the
1945 // result number. But that tightly couples the ScheduleDAG with the
1946 // SelectionDAG making updates tricky. A simpler hack would be to attach a
1947 // value type or register class to SDep.
1949 // The most important aspect of register tracking is balancing the increase
1950 // here with the reduction further below. Note that this SU may use multiple
1951 // defs in PredSU. The can't be determined here, but we've already
1952 // compensated by reducing NumRegDefsLeft in PredSU during
1953 // ScheduleDAGSDNodes::AddSchedEdges.
1954 --PredSU
->NumRegDefsLeft
;
1955 unsigned SkipRegDefs
= PredSU
->NumRegDefsLeft
;
1956 for (ScheduleDAGSDNodes::RegDefIter
RegDefPos(PredSU
, scheduleDAG
);
1957 RegDefPos
.IsValid(); RegDefPos
.Advance(), --SkipRegDefs
) {
1961 unsigned RCId
, Cost
;
1962 GetCostForDef(RegDefPos
, TLI
, TII
, TRI
, RCId
, Cost
);
1963 RegPressure
[RCId
] += Cost
;
1968 // We should have this assert, but there may be dead SDNodes that never
1969 // materialize as SUnits, so they don't appear to generate liveness.
1970 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
1971 int SkipRegDefs
= (int)SU
->NumRegDefsLeft
;
1972 for (ScheduleDAGSDNodes::RegDefIter
RegDefPos(SU
, scheduleDAG
);
1973 RegDefPos
.IsValid(); RegDefPos
.Advance(), --SkipRegDefs
) {
1974 if (SkipRegDefs
> 0)
1976 unsigned RCId
, Cost
;
1977 GetCostForDef(RegDefPos
, TLI
, TII
, TRI
, RCId
, Cost
);
1978 if (RegPressure
[RCId
] < Cost
) {
1979 // Register pressure tracking is imprecise. This can happen. But we try
1980 // hard not to let it happen because it likely results in poor scheduling.
1981 DEBUG(dbgs() << " SU(" << SU
->NodeNum
<< ") has too many regdefs\n");
1982 RegPressure
[RCId
] = 0;
1985 RegPressure
[RCId
] -= Cost
;
1991 void RegReductionPQBase::UnscheduledNode(SUnit
*SU
) {
1992 if (!TracksRegPressure
)
1995 const SDNode
*N
= SU
->getNode();
1998 if (!N
->isMachineOpcode()) {
1999 if (N
->getOpcode() != ISD::CopyToReg
)
2002 unsigned Opc
= N
->getMachineOpcode();
2003 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
2004 Opc
== TargetOpcode::INSERT_SUBREG
||
2005 Opc
== TargetOpcode::SUBREG_TO_REG
||
2006 Opc
== TargetOpcode::REG_SEQUENCE
||
2007 Opc
== TargetOpcode::IMPLICIT_DEF
)
2011 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
2015 SUnit
*PredSU
= I
->getSUnit();
2016 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2017 // counts data deps.
2018 if (PredSU
->NumSuccsLeft
!= PredSU
->Succs
.size())
2020 const SDNode
*PN
= PredSU
->getNode();
2021 if (!PN
->isMachineOpcode()) {
2022 if (PN
->getOpcode() == ISD::CopyFromReg
) {
2023 EVT VT
= PN
->getValueType(0);
2024 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2025 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
2029 unsigned POpc
= PN
->getMachineOpcode();
2030 if (POpc
== TargetOpcode::IMPLICIT_DEF
)
2032 if (POpc
== TargetOpcode::EXTRACT_SUBREG
||
2033 POpc
== TargetOpcode::INSERT_SUBREG
||
2034 POpc
== TargetOpcode::SUBREG_TO_REG
) {
2035 EVT VT
= PN
->getValueType(0);
2036 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2037 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
2040 unsigned NumDefs
= TII
->get(PN
->getMachineOpcode()).getNumDefs();
2041 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
2042 EVT VT
= PN
->getValueType(i
);
2043 if (!PN
->hasAnyUseOfValue(i
))
2045 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2046 if (RegPressure
[RCId
] < TLI
->getRepRegClassCostFor(VT
))
2047 // Register pressure tracking is imprecise. This can happen.
2048 RegPressure
[RCId
] = 0;
2050 RegPressure
[RCId
] -= TLI
->getRepRegClassCostFor(VT
);
2054 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2055 // may transfer data dependencies to CopyToReg.
2056 if (SU
->NumSuccs
&& N
->isMachineOpcode()) {
2057 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
2058 for (unsigned i
= NumDefs
, e
= N
->getNumValues(); i
!= e
; ++i
) {
2059 EVT VT
= N
->getValueType(i
);
2060 if (VT
== MVT::Glue
|| VT
== MVT::Other
)
2062 if (!N
->hasAnyUseOfValue(i
))
2064 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
2065 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
2072 //===----------------------------------------------------------------------===//
2073 // Dynamic Node Priority for Register Pressure Reduction
2074 //===----------------------------------------------------------------------===//
2076 /// closestSucc - Returns the scheduled cycle of the successor which is
2077 /// closest to the current cycle.
2078 static unsigned closestSucc(const SUnit
*SU
) {
2079 unsigned MaxHeight
= 0;
2080 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
2082 if (I
->isCtrl()) continue; // ignore chain succs
2083 unsigned Height
= I
->getSUnit()->getHeight();
2084 // If there are bunch of CopyToRegs stacked up, they should be considered
2085 // to be at the same position.
2086 if (I
->getSUnit()->getNode() &&
2087 I
->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg
)
2088 Height
= closestSucc(I
->getSUnit())+1;
2089 if (Height
> MaxHeight
)
2095 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2096 /// for scratch registers, i.e. number of data dependencies.
2097 static unsigned calcMaxScratches(const SUnit
*SU
) {
2098 unsigned Scratches
= 0;
2099 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
2101 if (I
->isCtrl()) continue; // ignore chain preds
2107 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2108 /// CopyFromReg from a virtual register.
2109 static bool hasOnlyLiveInOpers(const SUnit
*SU
) {
2110 bool RetVal
= false;
2111 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
2113 if (I
->isCtrl()) continue;
2114 const SUnit
*PredSU
= I
->getSUnit();
2115 if (PredSU
->getNode() &&
2116 PredSU
->getNode()->getOpcode() == ISD::CopyFromReg
) {
2118 cast
<RegisterSDNode
>(PredSU
->getNode()->getOperand(1))->getReg();
2119 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
2129 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2130 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2131 /// it has no other use. It should be scheduled closer to the terminator.
2132 static bool hasOnlyLiveOutUses(const SUnit
*SU
) {
2133 bool RetVal
= false;
2134 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
2136 if (I
->isCtrl()) continue;
2137 const SUnit
*SuccSU
= I
->getSUnit();
2138 if (SuccSU
->getNode() && SuccSU
->getNode()->getOpcode() == ISD::CopyToReg
) {
2140 cast
<RegisterSDNode
>(SuccSU
->getNode()->getOperand(1))->getReg();
2141 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
2151 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2152 // set isVRegCycle for its CopyFromReg operands.
2154 // This is only relevant for single-block loops, in which case the VRegCycle
2155 // node is likely an induction variable in which the operand and target virtual
2156 // registers should be coalesced (e.g. pre/post increment values). Setting the
2157 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2158 // CopyFromReg so that this node becomes the virtual register "kill". This
2159 // avoids interference between the values live in and out of the block and
2160 // eliminates a copy inside the loop.
2161 static void initVRegCycle(SUnit
*SU
) {
2162 if (DisableSchedVRegCycle
)
2165 if (!hasOnlyLiveInOpers(SU
) || !hasOnlyLiveOutUses(SU
))
2168 DEBUG(dbgs() << "VRegCycle: SU(" << SU
->NodeNum
<< ")\n");
2170 SU
->isVRegCycle
= true;
2172 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
2174 if (I
->isCtrl()) continue;
2175 I
->getSUnit()->isVRegCycle
= true;
2179 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2180 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2181 static void resetVRegCycle(SUnit
*SU
) {
2182 if (!SU
->isVRegCycle
)
2185 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(),E
= SU
->Preds
.end();
2187 if (I
->isCtrl()) continue; // ignore chain preds
2188 SUnit
*PredSU
= I
->getSUnit();
2189 if (PredSU
->isVRegCycle
) {
2190 assert(PredSU
->getNode()->getOpcode() == ISD::CopyFromReg
&&
2191 "VRegCycle def must be CopyFromReg");
2192 I
->getSUnit()->isVRegCycle
= 0;
2197 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2198 // means a node that defines the VRegCycle has not been scheduled yet.
2199 static bool hasVRegCycleUse(const SUnit
*SU
) {
2200 // If this SU also defines the VReg, don't hoist it as a "use".
2201 if (SU
->isVRegCycle
)
2204 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(),E
= SU
->Preds
.end();
2206 if (I
->isCtrl()) continue; // ignore chain preds
2207 if (I
->getSUnit()->isVRegCycle
&&
2208 I
->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg
) {
2209 DEBUG(dbgs() << " VReg cycle use: SU (" << SU
->NodeNum
<< ")\n");
2216 // Check for either a dependence (latency) or resource (hazard) stall.
2218 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2219 static bool BUHasStall(SUnit
*SU
, int Height
, RegReductionPQBase
*SPQ
) {
2220 if ((int)SPQ
->getCurCycle() < Height
) return true;
2221 if (SPQ
->getHazardRec()->getHazardType(SU
, 0)
2222 != ScheduleHazardRecognizer::NoHazard
)
2227 // Return -1 if left has higher priority, 1 if right has higher priority.
2228 // Return 0 if latency-based priority is equivalent.
2229 static int BUCompareLatency(SUnit
*left
, SUnit
*right
, bool checkPref
,
2230 RegReductionPQBase
*SPQ
) {
2231 // Scheduling an instruction that uses a VReg whose postincrement has not yet
2232 // been scheduled will induce a copy. Model this as an extra cycle of latency.
2233 int LPenalty
= hasVRegCycleUse(left
) ? 1 : 0;
2234 int RPenalty
= hasVRegCycleUse(right
) ? 1 : 0;
2235 int LHeight
= (int)left
->getHeight() + LPenalty
;
2236 int RHeight
= (int)right
->getHeight() + RPenalty
;
2238 bool LStall
= (!checkPref
|| left
->SchedulingPref
== Sched::Latency
) &&
2239 BUHasStall(left
, LHeight
, SPQ
);
2240 bool RStall
= (!checkPref
|| right
->SchedulingPref
== Sched::Latency
) &&
2241 BUHasStall(right
, RHeight
, SPQ
);
2243 // If scheduling one of the node will cause a pipeline stall, delay it.
2244 // If scheduling either one of the node will cause a pipeline stall, sort
2245 // them according to their height.
2248 DEBUG(++FactorCount
[FactStall
]);
2251 if (LHeight
!= RHeight
) {
2252 DEBUG(++FactorCount
[FactStall
]);
2253 return LHeight
> RHeight
? 1 : -1;
2255 } else if (RStall
) {
2256 DEBUG(++FactorCount
[FactStall
]);
2260 // If either node is scheduling for latency, sort them by height/depth
2262 if (!checkPref
|| (left
->SchedulingPref
== Sched::Latency
||
2263 right
->SchedulingPref
== Sched::Latency
)) {
2264 if (DisableSchedCycles
) {
2265 if (LHeight
!= RHeight
) {
2266 DEBUG(++FactorCount
[FactHeight
]);
2267 return LHeight
> RHeight
? 1 : -1;
2271 // If neither instruction stalls (!LStall && !RStall) then
2272 // its height is already covered so only its depth matters. We also reach
2273 // this if both stall but have the same height.
2274 int LDepth
= left
->getDepth() - LPenalty
;
2275 int RDepth
= right
->getDepth() - RPenalty
;
2276 if (LDepth
!= RDepth
) {
2277 DEBUG(++FactorCount
[FactDepth
]);
2278 DEBUG(dbgs() << " Comparing latency of SU (" << left
->NodeNum
2279 << ") depth " << LDepth
<< " vs SU (" << right
->NodeNum
2280 << ") depth " << RDepth
<< "\n");
2281 return LDepth
< RDepth
? 1 : -1;
2284 if (left
->Latency
!= right
->Latency
) {
2285 DEBUG(++FactorCount
[FactOther
]);
2286 return left
->Latency
> right
->Latency
? 1 : -1;
2292 static bool BURRSort(SUnit
*left
, SUnit
*right
, RegReductionPQBase
*SPQ
) {
2293 // Schedule physical register definitions close to their use. This is
2294 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2295 // long as shortening physreg live ranges is generally good, we can defer
2296 // creating a subtarget hook.
2297 if (!DisableSchedPhysRegJoin
) {
2298 bool LHasPhysReg
= left
->hasPhysRegDefs
;
2299 bool RHasPhysReg
= right
->hasPhysRegDefs
;
2300 if (LHasPhysReg
!= RHasPhysReg
) {
2301 DEBUG(++FactorCount
[FactRegUses
]);
2303 const char *PhysRegMsg
[] = {" has no physreg", " defines a physreg"};
2305 DEBUG(dbgs() << " SU (" << left
->NodeNum
<< ") "
2306 << PhysRegMsg
[LHasPhysReg
] << " SU(" << right
->NodeNum
<< ") "
2307 << PhysRegMsg
[RHasPhysReg
] << "\n");
2308 return LHasPhysReg
< RHasPhysReg
;
2312 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2313 unsigned LPriority
= SPQ
->getNodePriority(left
);
2314 unsigned RPriority
= SPQ
->getNodePriority(right
);
2316 // Be really careful about hoisting call operands above previous calls.
2317 // Only allows it if it would reduce register pressure.
2318 if (left
->isCall
&& right
->isCallOp
) {
2319 unsigned RNumVals
= right
->getNode()->getNumValues();
2320 RPriority
= (RPriority
> RNumVals
) ? (RPriority
- RNumVals
) : 0;
2322 if (right
->isCall
&& left
->isCallOp
) {
2323 unsigned LNumVals
= left
->getNode()->getNumValues();
2324 LPriority
= (LPriority
> LNumVals
) ? (LPriority
- LNumVals
) : 0;
2327 if (LPriority
!= RPriority
) {
2328 DEBUG(++FactorCount
[FactStatic
]);
2329 return LPriority
> RPriority
;
2332 // One or both of the nodes are calls and their sethi-ullman numbers are the
2333 // same, then keep source order.
2334 if (left
->isCall
|| right
->isCall
) {
2335 unsigned LOrder
= SPQ
->getNodeOrdering(left
);
2336 unsigned ROrder
= SPQ
->getNodeOrdering(right
);
2338 // Prefer an ordering where the lower the non-zero order number, the higher
2340 if ((LOrder
|| ROrder
) && LOrder
!= ROrder
)
2341 return LOrder
!= 0 && (LOrder
< ROrder
|| ROrder
== 0);
2344 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2349 // and the following instructions are both ready.
2353 // Then schedule t2 = op first.
2360 // This creates more short live intervals.
2361 unsigned LDist
= closestSucc(left
);
2362 unsigned RDist
= closestSucc(right
);
2363 if (LDist
!= RDist
) {
2364 DEBUG(++FactorCount
[FactOther
]);
2365 return LDist
< RDist
;
2368 // How many registers becomes live when the node is scheduled.
2369 unsigned LScratch
= calcMaxScratches(left
);
2370 unsigned RScratch
= calcMaxScratches(right
);
2371 if (LScratch
!= RScratch
) {
2372 DEBUG(++FactorCount
[FactOther
]);
2373 return LScratch
> RScratch
;
2376 // Comparing latency against a call makes little sense unless the node
2377 // is register pressure-neutral.
2378 if ((left
->isCall
&& RPriority
> 0) || (right
->isCall
&& LPriority
> 0))
2379 return (left
->NodeQueueId
> right
->NodeQueueId
);
2381 // Do not compare latencies when one or both of the nodes are calls.
2382 if (!DisableSchedCycles
&&
2383 !(left
->isCall
|| right
->isCall
)) {
2384 int result
= BUCompareLatency(left
, right
, false /*checkPref*/, SPQ
);
2389 if (left
->getHeight() != right
->getHeight()) {
2390 DEBUG(++FactorCount
[FactHeight
]);
2391 return left
->getHeight() > right
->getHeight();
2394 if (left
->getDepth() != right
->getDepth()) {
2395 DEBUG(++FactorCount
[FactDepth
]);
2396 return left
->getDepth() < right
->getDepth();
2400 assert(left
->NodeQueueId
&& right
->NodeQueueId
&&
2401 "NodeQueueId cannot be zero");
2402 DEBUG(++FactorCount
[FactOther
]);
2403 return (left
->NodeQueueId
> right
->NodeQueueId
);
2407 bool bu_ls_rr_sort::operator()(SUnit
*left
, SUnit
*right
) const {
2408 if (int res
= checkSpecialNodes(left
, right
))
2411 return BURRSort(left
, right
, SPQ
);
2414 // Source order, otherwise bottom up.
2415 bool src_ls_rr_sort::operator()(SUnit
*left
, SUnit
*right
) const {
2416 if (int res
= checkSpecialNodes(left
, right
))
2419 unsigned LOrder
= SPQ
->getNodeOrdering(left
);
2420 unsigned ROrder
= SPQ
->getNodeOrdering(right
);
2422 // Prefer an ordering where the lower the non-zero order number, the higher
2424 if ((LOrder
|| ROrder
) && LOrder
!= ROrder
)
2425 return LOrder
!= 0 && (LOrder
< ROrder
|| ROrder
== 0);
2427 return BURRSort(left
, right
, SPQ
);
2430 // If the time between now and when the instruction will be ready can cover
2431 // the spill code, then avoid adding it to the ready queue. This gives long
2432 // stalls highest priority and allows hoisting across calls. It should also
2433 // speed up processing the available queue.
2434 bool hybrid_ls_rr_sort::isReady(SUnit
*SU
, unsigned CurCycle
) const {
2435 static const unsigned ReadyDelay
= 3;
2437 if (SPQ
->MayReduceRegPressure(SU
)) return true;
2439 if (SU
->getHeight() > (CurCycle
+ ReadyDelay
)) return false;
2441 if (SPQ
->getHazardRec()->getHazardType(SU
, -ReadyDelay
)
2442 != ScheduleHazardRecognizer::NoHazard
)
2448 // Return true if right should be scheduled with higher priority than left.
2449 bool hybrid_ls_rr_sort::operator()(SUnit
*left
, SUnit
*right
) const {
2450 if (int res
= checkSpecialNodes(left
, right
))
2453 if (left
->isCall
|| right
->isCall
)
2454 // No way to compute latency of calls.
2455 return BURRSort(left
, right
, SPQ
);
2457 bool LHigh
= SPQ
->HighRegPressure(left
);
2458 bool RHigh
= SPQ
->HighRegPressure(right
);
2459 // Avoid causing spills. If register pressure is high, schedule for
2460 // register pressure reduction.
2461 if (LHigh
&& !RHigh
) {
2462 DEBUG(++FactorCount
[FactPressureDiff
]);
2463 DEBUG(dbgs() << " pressure SU(" << left
->NodeNum
<< ") > SU("
2464 << right
->NodeNum
<< ")\n");
2467 else if (!LHigh
&& RHigh
) {
2468 DEBUG(++FactorCount
[FactPressureDiff
]);
2469 DEBUG(dbgs() << " pressure SU(" << right
->NodeNum
<< ") > SU("
2470 << left
->NodeNum
<< ")\n");
2473 if (!LHigh
&& !RHigh
) {
2474 int result
= BUCompareLatency(left
, right
, true /*checkPref*/, SPQ
);
2478 return BURRSort(left
, right
, SPQ
);
2481 // Schedule as many instructions in each cycle as possible. So don't make an
2482 // instruction available unless it is ready in the current cycle.
2483 bool ilp_ls_rr_sort::isReady(SUnit
*SU
, unsigned CurCycle
) const {
2484 if (SU
->getHeight() > CurCycle
) return false;
2486 if (SPQ
->getHazardRec()->getHazardType(SU
, 0)
2487 != ScheduleHazardRecognizer::NoHazard
)
2493 static bool canEnableCoalescing(SUnit
*SU
) {
2494 unsigned Opc
= SU
->getNode() ? SU
->getNode()->getOpcode() : 0;
2495 if (Opc
== ISD::TokenFactor
|| Opc
== ISD::CopyToReg
)
2496 // CopyToReg should be close to its uses to facilitate coalescing and
2500 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
2501 Opc
== TargetOpcode::SUBREG_TO_REG
||
2502 Opc
== TargetOpcode::INSERT_SUBREG
)
2503 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2504 // close to their uses to facilitate coalescing.
2507 if (SU
->NumPreds
== 0 && SU
->NumSuccs
!= 0)
2508 // If SU does not have a register def, schedule it close to its uses
2509 // because it does not lengthen any live ranges.
2515 // list-ilp is currently an experimental scheduler that allows various
2516 // heuristics to be enabled prior to the normal register reduction logic.
2517 bool ilp_ls_rr_sort::operator()(SUnit
*left
, SUnit
*right
) const {
2518 if (int res
= checkSpecialNodes(left
, right
))
2521 if (left
->isCall
|| right
->isCall
)
2522 // No way to compute latency of calls.
2523 return BURRSort(left
, right
, SPQ
);
2525 unsigned LLiveUses
= 0, RLiveUses
= 0;
2526 int LPDiff
= 0, RPDiff
= 0;
2527 if (!DisableSchedRegPressure
|| !DisableSchedLiveUses
) {
2528 LPDiff
= SPQ
->RegPressureDiff(left
, LLiveUses
);
2529 RPDiff
= SPQ
->RegPressureDiff(right
, RLiveUses
);
2531 if (!DisableSchedRegPressure
&& LPDiff
!= RPDiff
) {
2532 DEBUG(++FactorCount
[FactPressureDiff
]);
2533 DEBUG(dbgs() << "RegPressureDiff SU(" << left
->NodeNum
<< "): " << LPDiff
2534 << " != SU(" << right
->NodeNum
<< "): " << RPDiff
<< "\n");
2535 return LPDiff
> RPDiff
;
2538 if (!DisableSchedRegPressure
&& (LPDiff
> 0 || RPDiff
> 0)) {
2539 bool LReduce
= canEnableCoalescing(left
);
2540 bool RReduce
= canEnableCoalescing(right
);
2541 DEBUG(if (LReduce
!= RReduce
) ++FactorCount
[FactPressureDiff
]);
2542 if (LReduce
&& !RReduce
) return false;
2543 if (RReduce
&& !LReduce
) return true;
2546 if (!DisableSchedLiveUses
&& (LLiveUses
!= RLiveUses
)) {
2547 DEBUG(dbgs() << "Live uses SU(" << left
->NodeNum
<< "): " << LLiveUses
2548 << " != SU(" << right
->NodeNum
<< "): " << RLiveUses
<< "\n");
2549 DEBUG(++FactorCount
[FactRegUses
]);
2550 return LLiveUses
< RLiveUses
;
2553 if (!DisableSchedStalls
) {
2554 bool LStall
= BUHasStall(left
, left
->getHeight(), SPQ
);
2555 bool RStall
= BUHasStall(right
, right
->getHeight(), SPQ
);
2556 if (LStall
!= RStall
) {
2557 DEBUG(++FactorCount
[FactHeight
]);
2558 return left
->getHeight() > right
->getHeight();
2562 if (!DisableSchedCriticalPath
) {
2563 int spread
= (int)left
->getDepth() - (int)right
->getDepth();
2564 if (std::abs(spread
) > MaxReorderWindow
) {
2565 DEBUG(dbgs() << "Depth of SU(" << left
->NodeNum
<< "): "
2566 << left
->getDepth() << " != SU(" << right
->NodeNum
<< "): "
2567 << right
->getDepth() << "\n");
2568 DEBUG(++FactorCount
[FactDepth
]);
2569 return left
->getDepth() < right
->getDepth();
2573 if (!DisableSchedHeight
&& left
->getHeight() != right
->getHeight()) {
2574 int spread
= (int)left
->getHeight() - (int)right
->getHeight();
2575 if (std::abs(spread
) > MaxReorderWindow
) {
2576 DEBUG(++FactorCount
[FactHeight
]);
2577 return left
->getHeight() > right
->getHeight();
2581 return BURRSort(left
, right
, SPQ
);
2584 void RegReductionPQBase::initNodes(std::vector
<SUnit
> &sunits
) {
2586 // Add pseudo dependency edges for two-address nodes.
2587 AddPseudoTwoAddrDeps();
2588 // Reroute edges to nodes with multiple uses.
2589 if (!TracksRegPressure
)
2590 PrescheduleNodesWithMultipleUses();
2591 // Calculate node priorities.
2592 CalculateSethiUllmanNumbers();
2594 // For single block loops, mark nodes that look like canonical IV increments.
2595 if (scheduleDAG
->BB
->isSuccessor(scheduleDAG
->BB
)) {
2596 for (unsigned i
= 0, e
= sunits
.size(); i
!= e
; ++i
) {
2597 initVRegCycle(&sunits
[i
]);
2602 //===----------------------------------------------------------------------===//
2603 // Preschedule for Register Pressure
2604 //===----------------------------------------------------------------------===//
2606 bool RegReductionPQBase::canClobber(const SUnit
*SU
, const SUnit
*Op
) {
2607 if (SU
->isTwoAddress
) {
2608 unsigned Opc
= SU
->getNode()->getMachineOpcode();
2609 const MCInstrDesc
&MCID
= TII
->get(Opc
);
2610 unsigned NumRes
= MCID
.getNumDefs();
2611 unsigned NumOps
= MCID
.getNumOperands() - NumRes
;
2612 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
2613 if (MCID
.getOperandConstraint(i
+NumRes
, MCOI::TIED_TO
) != -1) {
2614 SDNode
*DU
= SU
->getNode()->getOperand(i
).getNode();
2615 if (DU
->getNodeId() != -1 &&
2616 Op
->OrigNode
== &(*SUnits
)[DU
->getNodeId()])
2624 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2625 /// physical register defs.
2626 static bool canClobberPhysRegDefs(const SUnit
*SuccSU
, const SUnit
*SU
,
2627 const TargetInstrInfo
*TII
,
2628 const TargetRegisterInfo
*TRI
) {
2629 SDNode
*N
= SuccSU
->getNode();
2630 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
2631 const unsigned *ImpDefs
= TII
->get(N
->getMachineOpcode()).getImplicitDefs();
2632 assert(ImpDefs
&& "Caller should check hasPhysRegDefs");
2633 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
2634 SUNode
= SUNode
->getGluedNode()) {
2635 if (!SUNode
->isMachineOpcode())
2637 const unsigned *SUImpDefs
=
2638 TII
->get(SUNode
->getMachineOpcode()).getImplicitDefs();
2641 for (unsigned i
= NumDefs
, e
= N
->getNumValues(); i
!= e
; ++i
) {
2642 EVT VT
= N
->getValueType(i
);
2643 if (VT
== MVT::Glue
|| VT
== MVT::Other
)
2645 if (!N
->hasAnyUseOfValue(i
))
2647 unsigned Reg
= ImpDefs
[i
- NumDefs
];
2648 for (;*SUImpDefs
; ++SUImpDefs
) {
2649 unsigned SUReg
= *SUImpDefs
;
2650 if (TRI
->regsOverlap(Reg
, SUReg
))
2658 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2659 /// are not handled well by the general register pressure reduction
2660 /// heuristics. When presented with code like this:
2669 /// the heuristics tend to push the store up, but since the
2670 /// operand of the store has another use (U), this would increase
2671 /// the length of that other use (the U->N edge).
2673 /// This function transforms code like the above to route U's
2674 /// dependence through the store when possible, like this:
2685 /// This results in the store being scheduled immediately
2686 /// after N, which shortens the U->N live range, reducing
2687 /// register pressure.
2689 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2690 // Visit all the nodes in topological order, working top-down.
2691 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
) {
2692 SUnit
*SU
= &(*SUnits
)[i
];
2693 // For now, only look at nodes with no data successors, such as stores.
2694 // These are especially important, due to the heuristics in
2695 // getNodePriority for nodes with no data successors.
2696 if (SU
->NumSuccs
!= 0)
2698 // For now, only look at nodes with exactly one data predecessor.
2699 if (SU
->NumPreds
!= 1)
2701 // Avoid prescheduling copies to virtual registers, which don't behave
2702 // like other nodes from the perspective of scheduling heuristics.
2703 if (SDNode
*N
= SU
->getNode())
2704 if (N
->getOpcode() == ISD::CopyToReg
&&
2705 TargetRegisterInfo::isVirtualRegister
2706 (cast
<RegisterSDNode
>(N
->getOperand(1))->getReg()))
2709 // Locate the single data predecessor.
2711 for (SUnit::const_pred_iterator II
= SU
->Preds
.begin(),
2712 EE
= SU
->Preds
.end(); II
!= EE
; ++II
)
2713 if (!II
->isCtrl()) {
2714 PredSU
= II
->getSUnit();
2719 // Don't rewrite edges that carry physregs, because that requires additional
2720 // support infrastructure.
2721 if (PredSU
->hasPhysRegDefs
)
2723 // Short-circuit the case where SU is PredSU's only data successor.
2724 if (PredSU
->NumSuccs
== 1)
2726 // Avoid prescheduling to copies from virtual registers, which don't behave
2727 // like other nodes from the perspective of scheduling heuristics.
2728 if (SDNode
*N
= SU
->getNode())
2729 if (N
->getOpcode() == ISD::CopyFromReg
&&
2730 TargetRegisterInfo::isVirtualRegister
2731 (cast
<RegisterSDNode
>(N
->getOperand(1))->getReg()))
2734 // Perform checks on the successors of PredSU.
2735 for (SUnit::const_succ_iterator II
= PredSU
->Succs
.begin(),
2736 EE
= PredSU
->Succs
.end(); II
!= EE
; ++II
) {
2737 SUnit
*PredSuccSU
= II
->getSUnit();
2738 if (PredSuccSU
== SU
) continue;
2739 // If PredSU has another successor with no data successors, for
2740 // now don't attempt to choose either over the other.
2741 if (PredSuccSU
->NumSuccs
== 0)
2742 goto outer_loop_continue
;
2743 // Don't break physical register dependencies.
2744 if (SU
->hasPhysRegClobbers
&& PredSuccSU
->hasPhysRegDefs
)
2745 if (canClobberPhysRegDefs(PredSuccSU
, SU
, TII
, TRI
))
2746 goto outer_loop_continue
;
2747 // Don't introduce graph cycles.
2748 if (scheduleDAG
->IsReachable(SU
, PredSuccSU
))
2749 goto outer_loop_continue
;
2752 // Ok, the transformation is safe and the heuristics suggest it is
2753 // profitable. Update the graph.
2754 DEBUG(dbgs() << " Prescheduling SU #" << SU
->NodeNum
2755 << " next to PredSU #" << PredSU
->NodeNum
2756 << " to guide scheduling in the presence of multiple uses\n");
2757 for (unsigned i
= 0; i
!= PredSU
->Succs
.size(); ++i
) {
2758 SDep Edge
= PredSU
->Succs
[i
];
2759 assert(!Edge
.isAssignedRegDep());
2760 SUnit
*SuccSU
= Edge
.getSUnit();
2762 Edge
.setSUnit(PredSU
);
2763 scheduleDAG
->RemovePred(SuccSU
, Edge
);
2764 scheduleDAG
->AddPred(SU
, Edge
);
2766 scheduleDAG
->AddPred(SuccSU
, Edge
);
2770 outer_loop_continue
:;
2774 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2775 /// it as a def&use operand. Add a pseudo control edge from it to the other
2776 /// node (if it won't create a cycle) so the two-address one will be scheduled
2777 /// first (lower in the schedule). If both nodes are two-address, favor the
2778 /// one that has a CopyToReg use (more likely to be a loop induction update).
2779 /// If both are two-address, but one is commutable while the other is not
2780 /// commutable, favor the one that's not commutable.
2781 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2782 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
) {
2783 SUnit
*SU
= &(*SUnits
)[i
];
2784 if (!SU
->isTwoAddress
)
2787 SDNode
*Node
= SU
->getNode();
2788 if (!Node
|| !Node
->isMachineOpcode() || SU
->getNode()->getGluedNode())
2791 bool isLiveOut
= hasOnlyLiveOutUses(SU
);
2792 unsigned Opc
= Node
->getMachineOpcode();
2793 const MCInstrDesc
&MCID
= TII
->get(Opc
);
2794 unsigned NumRes
= MCID
.getNumDefs();
2795 unsigned NumOps
= MCID
.getNumOperands() - NumRes
;
2796 for (unsigned j
= 0; j
!= NumOps
; ++j
) {
2797 if (MCID
.getOperandConstraint(j
+NumRes
, MCOI::TIED_TO
) == -1)
2799 SDNode
*DU
= SU
->getNode()->getOperand(j
).getNode();
2800 if (DU
->getNodeId() == -1)
2802 const SUnit
*DUSU
= &(*SUnits
)[DU
->getNodeId()];
2803 if (!DUSU
) continue;
2804 for (SUnit::const_succ_iterator I
= DUSU
->Succs
.begin(),
2805 E
= DUSU
->Succs
.end(); I
!= E
; ++I
) {
2806 if (I
->isCtrl()) continue;
2807 SUnit
*SuccSU
= I
->getSUnit();
2810 // Be conservative. Ignore if nodes aren't at roughly the same
2811 // depth and height.
2812 if (SuccSU
->getHeight() < SU
->getHeight() &&
2813 (SU
->getHeight() - SuccSU
->getHeight()) > 1)
2815 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2816 // constrains whatever is using the copy, instead of the copy
2817 // itself. In the case that the copy is coalesced, this
2818 // preserves the intent of the pseudo two-address heurietics.
2819 while (SuccSU
->Succs
.size() == 1 &&
2820 SuccSU
->getNode()->isMachineOpcode() &&
2821 SuccSU
->getNode()->getMachineOpcode() ==
2822 TargetOpcode::COPY_TO_REGCLASS
)
2823 SuccSU
= SuccSU
->Succs
.front().getSUnit();
2824 // Don't constrain non-instruction nodes.
2825 if (!SuccSU
->getNode() || !SuccSU
->getNode()->isMachineOpcode())
2827 // Don't constrain nodes with physical register defs if the
2828 // predecessor can clobber them.
2829 if (SuccSU
->hasPhysRegDefs
&& SU
->hasPhysRegClobbers
) {
2830 if (canClobberPhysRegDefs(SuccSU
, SU
, TII
, TRI
))
2833 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2834 // these may be coalesced away. We want them close to their uses.
2835 unsigned SuccOpc
= SuccSU
->getNode()->getMachineOpcode();
2836 if (SuccOpc
== TargetOpcode::EXTRACT_SUBREG
||
2837 SuccOpc
== TargetOpcode::INSERT_SUBREG
||
2838 SuccOpc
== TargetOpcode::SUBREG_TO_REG
)
2840 if ((!canClobber(SuccSU
, DUSU
) ||
2841 (isLiveOut
&& !hasOnlyLiveOutUses(SuccSU
)) ||
2842 (!SU
->isCommutable
&& SuccSU
->isCommutable
)) &&
2843 !scheduleDAG
->IsReachable(SuccSU
, SU
)) {
2844 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
2845 << SU
->NodeNum
<< " to SU #" << SuccSU
->NodeNum
<< "\n");
2846 scheduleDAG
->AddPred(SU
, SDep(SuccSU
, SDep::Order
, /*Latency=*/0,
2847 /*Reg=*/0, /*isNormalMemory=*/false,
2848 /*isMustAlias=*/false,
2849 /*isArtificial=*/true));
2856 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2857 /// predecessors of the successors of the SUnit SU. Stop when the provided
2858 /// limit is exceeded.
2859 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit
*SU
,
2862 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
2864 const SUnit
*SuccSU
= I
->getSUnit();
2865 for (SUnit::const_pred_iterator II
= SuccSU
->Preds
.begin(),
2866 EE
= SuccSU
->Preds
.end(); II
!= EE
; ++II
) {
2867 SUnit
*PredSU
= II
->getSUnit();
2868 if (!PredSU
->isScheduled
)
2878 bool td_ls_rr_sort::operator()(const SUnit
*left
, const SUnit
*right
) const {
2879 if (int res
= checkSpecialNodes(left
, right
))
2882 unsigned LPriority
= SPQ
->getNodePriority(left
);
2883 unsigned RPriority
= SPQ
->getNodePriority(right
);
2884 bool LIsTarget
= left
->getNode() && left
->getNode()->isMachineOpcode();
2885 bool RIsTarget
= right
->getNode() && right
->getNode()->isMachineOpcode();
2886 bool LIsFloater
= LIsTarget
&& left
->NumPreds
== 0;
2887 bool RIsFloater
= RIsTarget
&& right
->NumPreds
== 0;
2888 unsigned LBonus
= (LimitedSumOfUnscheduledPredsOfSuccs(left
,1) == 1) ? 2 : 0;
2889 unsigned RBonus
= (LimitedSumOfUnscheduledPredsOfSuccs(right
,1) == 1) ? 2 : 0;
2891 if (left
->NumSuccs
== 0 && right
->NumSuccs
!= 0)
2893 else if (left
->NumSuccs
!= 0 && right
->NumSuccs
== 0)
2900 if (left
->NumSuccs
== 1)
2902 if (right
->NumSuccs
== 1)
2905 if (LPriority
+LBonus
!= RPriority
+RBonus
)
2906 return LPriority
+LBonus
< RPriority
+RBonus
;
2908 if (left
->getDepth() != right
->getDepth())
2909 return left
->getDepth() < right
->getDepth();
2911 if (left
->NumSuccsLeft
!= right
->NumSuccsLeft
)
2912 return left
->NumSuccsLeft
> right
->NumSuccsLeft
;
2914 assert(left
->NodeQueueId
&& right
->NodeQueueId
&&
2915 "NodeQueueId cannot be zero");
2916 return (left
->NodeQueueId
> right
->NodeQueueId
);
2919 //===----------------------------------------------------------------------===//
2920 // Public Constructor Functions
2921 //===----------------------------------------------------------------------===//
2923 llvm::ScheduleDAGSDNodes
*
2924 llvm::createBURRListDAGScheduler(SelectionDAGISel
*IS
,
2925 CodeGenOpt::Level OptLevel
) {
2926 const TargetMachine
&TM
= IS
->TM
;
2927 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2928 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2930 BURegReductionPriorityQueue
*PQ
=
2931 new BURegReductionPriorityQueue(*IS
->MF
, false, TII
, TRI
, 0);
2932 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, false, PQ
, OptLevel
);
2933 PQ
->setScheduleDAG(SD
);
2937 llvm::ScheduleDAGSDNodes
*
2938 llvm::createTDRRListDAGScheduler(SelectionDAGISel
*IS
,
2939 CodeGenOpt::Level OptLevel
) {
2940 const TargetMachine
&TM
= IS
->TM
;
2941 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2942 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2944 TDRegReductionPriorityQueue
*PQ
=
2945 new TDRegReductionPriorityQueue(*IS
->MF
, false, TII
, TRI
, 0);
2946 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, false, PQ
, OptLevel
);
2947 PQ
->setScheduleDAG(SD
);
2951 llvm::ScheduleDAGSDNodes
*
2952 llvm::createSourceListDAGScheduler(SelectionDAGISel
*IS
,
2953 CodeGenOpt::Level OptLevel
) {
2954 const TargetMachine
&TM
= IS
->TM
;
2955 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2956 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2958 SrcRegReductionPriorityQueue
*PQ
=
2959 new SrcRegReductionPriorityQueue(*IS
->MF
, false, TII
, TRI
, 0);
2960 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, false, PQ
, OptLevel
);
2961 PQ
->setScheduleDAG(SD
);
2965 llvm::ScheduleDAGSDNodes
*
2966 llvm::createHybridListDAGScheduler(SelectionDAGISel
*IS
,
2967 CodeGenOpt::Level OptLevel
) {
2968 const TargetMachine
&TM
= IS
->TM
;
2969 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2970 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2971 const TargetLowering
*TLI
= &IS
->getTargetLowering();
2973 HybridBURRPriorityQueue
*PQ
=
2974 new HybridBURRPriorityQueue(*IS
->MF
, true, TII
, TRI
, TLI
);
2976 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, true, PQ
, OptLevel
);
2977 PQ
->setScheduleDAG(SD
);
2981 llvm::ScheduleDAGSDNodes
*
2982 llvm::createILPListDAGScheduler(SelectionDAGISel
*IS
,
2983 CodeGenOpt::Level OptLevel
) {
2984 const TargetMachine
&TM
= IS
->TM
;
2985 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2986 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2987 const TargetLowering
*TLI
= &IS
->getTargetLowering();
2989 ILPBURRPriorityQueue
*PQ
=
2990 new ILPBURRPriorityQueue(*IS
->MF
, true, TII
, TRI
, TLI
);
2991 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, true, PQ
, OptLevel
);
2992 PQ
->setScheduleDAG(SD
);