1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/InlineAsm.h"
21 #include "llvm/CodeGen/SchedulerRegistry.h"
22 #include "llvm/CodeGen/SelectionDAGISel.h"
23 #include "llvm/Target/TargetRegisterInfo.h"
24 #include "llvm/Target/TargetData.h"
25 #include "llvm/Target/TargetMachine.h"
26 #include "llvm/Target/TargetInstrInfo.h"
27 #include "llvm/Target/TargetLowering.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
37 STATISTIC(NumBacktracks
, "Number of times scheduler backtracked");
38 STATISTIC(NumUnfolds
, "Number of nodes unfolded");
39 STATISTIC(NumDups
, "Number of duplicated nodes");
40 STATISTIC(NumPRCopies
, "Number of physical register copies");
42 static RegisterScheduler
43 burrListDAGScheduler("list-burr",
44 "Bottom-up register reduction list scheduling",
45 createBURRListDAGScheduler
);
46 static RegisterScheduler
47 tdrListrDAGScheduler("list-tdrr",
48 "Top-down register reduction list scheduling",
49 createTDRRListDAGScheduler
);
50 static RegisterScheduler
51 sourceListDAGScheduler("source",
52 "Similar to list-burr but schedules in source "
53 "order when possible",
54 createSourceListDAGScheduler
);
56 static RegisterScheduler
57 hybridListDAGScheduler("list-hybrid",
58 "Bottom-up register pressure aware list scheduling "
59 "which tries to balance latency and register pressure",
60 createHybridListDAGScheduler
);
62 static RegisterScheduler
63 ILPListDAGScheduler("list-ilp",
64 "Bottom-up register pressure aware list scheduling "
65 "which tries to balance ILP and register pressure",
66 createILPListDAGScheduler
);
69 //===----------------------------------------------------------------------===//
70 /// ScheduleDAGRRList - The actual register reduction list scheduler
71 /// implementation. This supports both top-down and bottom-up scheduling.
73 class ScheduleDAGRRList
: public ScheduleDAGSDNodes
{
75 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
79 /// NeedLatency - True if the scheduler will make use of latency information.
83 /// AvailableQueue - The priority queue to use for the available SUnits.
84 SchedulingPriorityQueue
*AvailableQueue
;
86 /// LiveRegDefs - A set of physical registers and their definition
87 /// that are "live". These nodes must be scheduled before any other nodes that
88 /// modifies the registers can be scheduled.
90 std::vector
<SUnit
*> LiveRegDefs
;
91 std::vector
<unsigned> LiveRegCycles
;
93 /// Topo - A topological ordering for SUnits which permits fast IsReachable
94 /// and similar queries.
95 ScheduleDAGTopologicalSort Topo
;
98 ScheduleDAGRRList(MachineFunction
&mf
,
99 bool isbottomup
, bool needlatency
,
100 SchedulingPriorityQueue
*availqueue
)
101 : ScheduleDAGSDNodes(mf
), isBottomUp(isbottomup
), NeedLatency(needlatency
),
102 AvailableQueue(availqueue
), Topo(SUnits
) {
105 ~ScheduleDAGRRList() {
106 delete AvailableQueue
;
111 /// IsReachable - Checks if SU is reachable from TargetSU.
112 bool IsReachable(const SUnit
*SU
, const SUnit
*TargetSU
) {
113 return Topo
.IsReachable(SU
, TargetSU
);
116 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
118 bool WillCreateCycle(SUnit
*SU
, SUnit
*TargetSU
) {
119 return Topo
.WillCreateCycle(SU
, TargetSU
);
122 /// AddPred - adds a predecessor edge to SUnit SU.
123 /// This returns true if this is a new predecessor.
124 /// Updates the topological ordering if required.
125 void AddPred(SUnit
*SU
, const SDep
&D
) {
126 Topo
.AddPred(SU
, D
.getSUnit());
130 /// RemovePred - removes a predecessor edge from SUnit SU.
131 /// This returns true if an edge was removed.
132 /// Updates the topological ordering if required.
133 void RemovePred(SUnit
*SU
, const SDep
&D
) {
134 Topo
.RemovePred(SU
, D
.getSUnit());
139 void ReleasePred(SUnit
*SU
, const SDep
*PredEdge
);
140 void ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
);
141 void ReleaseSucc(SUnit
*SU
, const SDep
*SuccEdge
);
142 void ReleaseSuccessors(SUnit
*SU
);
143 void CapturePred(SDep
*PredEdge
);
144 void ScheduleNodeBottomUp(SUnit
*, unsigned);
145 void ScheduleNodeTopDown(SUnit
*, unsigned);
146 void UnscheduleNodeBottomUp(SUnit
*);
147 void BacktrackBottomUp(SUnit
*, unsigned, unsigned&);
148 SUnit
*CopyAndMoveSuccessors(SUnit
*);
149 void InsertCopiesAndMoveSuccs(SUnit
*, unsigned,
150 const TargetRegisterClass
*,
151 const TargetRegisterClass
*,
152 SmallVector
<SUnit
*, 2>&);
153 bool DelayForLiveRegsBottomUp(SUnit
*, SmallVector
<unsigned, 4>&);
154 void ListScheduleTopDown();
155 void ListScheduleBottomUp();
158 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
159 /// Updates the topological ordering if required.
160 SUnit
*CreateNewSUnit(SDNode
*N
) {
161 unsigned NumSUnits
= SUnits
.size();
162 SUnit
*NewNode
= NewSUnit(N
);
163 // Update the topological ordering.
164 if (NewNode
->NodeNum
>= NumSUnits
)
165 Topo
.InitDAGTopologicalSorting();
169 /// CreateClone - Creates a new SUnit from an existing one.
170 /// Updates the topological ordering if required.
171 SUnit
*CreateClone(SUnit
*N
) {
172 unsigned NumSUnits
= SUnits
.size();
173 SUnit
*NewNode
= Clone(N
);
174 // Update the topological ordering.
175 if (NewNode
->NodeNum
>= NumSUnits
)
176 Topo
.InitDAGTopologicalSorting();
180 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
181 /// need actual latency information but the hybrid scheduler does.
182 bool ForceUnitLatencies() const {
186 } // end anonymous namespace
189 /// Schedule - Schedule the DAG using list scheduling.
190 void ScheduleDAGRRList::Schedule() {
192 << "********** List Scheduling BB#" << BB
->getNumber()
193 << " '" << BB
->getName() << "' **********\n");
196 LiveRegDefs
.resize(TRI
->getNumRegs(), NULL
);
197 LiveRegCycles
.resize(TRI
->getNumRegs(), 0);
199 // Build the scheduling graph.
200 BuildSchedGraph(NULL
);
202 DEBUG(for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
)
203 SUnits
[su
].dumpAll(this));
204 Topo
.InitDAGTopologicalSorting();
206 AvailableQueue
->initNodes(SUnits
);
208 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
210 ListScheduleBottomUp();
212 ListScheduleTopDown();
214 AvailableQueue
->releaseState();
217 //===----------------------------------------------------------------------===//
218 // Bottom-Up Scheduling
219 //===----------------------------------------------------------------------===//
221 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
222 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
223 void ScheduleDAGRRList::ReleasePred(SUnit
*SU
, const SDep
*PredEdge
) {
224 SUnit
*PredSU
= PredEdge
->getSUnit();
227 if (PredSU
->NumSuccsLeft
== 0) {
228 dbgs() << "*** Scheduling failed! ***\n";
230 dbgs() << " has been released too many times!\n";
234 --PredSU
->NumSuccsLeft
;
236 if (!ForceUnitLatencies()) {
237 // Updating predecessor's height. This is now the cycle when the
238 // predecessor can be scheduled without causing a pipeline stall.
239 PredSU
->setHeightToAtLeast(SU
->getHeight() + PredEdge
->getLatency());
242 // If all the node's successors are scheduled, this node is ready
243 // to be scheduled. Ignore the special EntrySU node.
244 if (PredSU
->NumSuccsLeft
== 0 && PredSU
!= &EntrySU
) {
245 PredSU
->isAvailable
= true;
246 AvailableQueue
->push(PredSU
);
250 void ScheduleDAGRRList::ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
) {
251 // Bottom up: release predecessors
252 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
254 ReleasePred(SU
, &*I
);
255 if (I
->isAssignedRegDep()) {
256 // This is a physical register dependency and it's impossible or
257 // expensive to copy the register. Make sure nothing that can
258 // clobber the register is scheduled between the predecessor and
260 if (!LiveRegDefs
[I
->getReg()]) {
262 LiveRegDefs
[I
->getReg()] = I
->getSUnit();
263 LiveRegCycles
[I
->getReg()] = CurCycle
;
269 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
270 /// count of its predecessors. If a predecessor pending count is zero, add it to
271 /// the Available queue.
272 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit
*SU
, unsigned CurCycle
) {
273 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle
<< "]: ");
274 DEBUG(SU
->dump(this));
277 if (CurCycle
< SU
->getHeight())
278 DEBUG(dbgs() << " Height [" << SU
->getHeight() << "] pipeline stall!\n");
281 // FIXME: Handle noop hazard.
282 SU
->setHeightToAtLeast(CurCycle
);
283 Sequence
.push_back(SU
);
285 AvailableQueue
->ScheduledNode(SU
);
287 ReleasePredecessors(SU
, CurCycle
);
289 // Release all the implicit physical register defs that are live.
290 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
292 if (I
->isAssignedRegDep()) {
293 if (LiveRegCycles
[I
->getReg()] == I
->getSUnit()->getHeight()) {
294 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
295 assert(LiveRegDefs
[I
->getReg()] == SU
&&
296 "Physical register dependency violated?");
298 LiveRegDefs
[I
->getReg()] = NULL
;
299 LiveRegCycles
[I
->getReg()] = 0;
304 SU
->isScheduled
= true;
307 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
308 /// unscheduled, incrcease the succ left count of its predecessors. Remove
309 /// them from AvailableQueue if necessary.
310 void ScheduleDAGRRList::CapturePred(SDep
*PredEdge
) {
311 SUnit
*PredSU
= PredEdge
->getSUnit();
312 if (PredSU
->isAvailable
) {
313 PredSU
->isAvailable
= false;
314 if (!PredSU
->isPending
)
315 AvailableQueue
->remove(PredSU
);
318 assert(PredSU
->NumSuccsLeft
< UINT_MAX
&& "NumSuccsLeft will overflow!");
319 ++PredSU
->NumSuccsLeft
;
322 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
323 /// its predecessor states to reflect the change.
324 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit
*SU
) {
325 DEBUG(dbgs() << "*** Unscheduling [" << SU
->getHeight() << "]: ");
326 DEBUG(SU
->dump(this));
328 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
331 if (I
->isAssignedRegDep() && SU
->getHeight() == LiveRegCycles
[I
->getReg()]){
332 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
333 assert(LiveRegDefs
[I
->getReg()] == I
->getSUnit() &&
334 "Physical register dependency violated?");
336 LiveRegDefs
[I
->getReg()] = NULL
;
337 LiveRegCycles
[I
->getReg()] = 0;
341 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
343 if (I
->isAssignedRegDep()) {
344 if (!LiveRegDefs
[I
->getReg()]) {
345 LiveRegDefs
[I
->getReg()] = SU
;
348 if (I
->getSUnit()->getHeight() < LiveRegCycles
[I
->getReg()])
349 LiveRegCycles
[I
->getReg()] = I
->getSUnit()->getHeight();
353 SU
->setHeightDirty();
354 SU
->isScheduled
= false;
355 SU
->isAvailable
= true;
356 AvailableQueue
->push(SU
);
357 AvailableQueue
->UnscheduledNode(SU
);
360 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
361 /// BTCycle in order to schedule a specific node.
362 void ScheduleDAGRRList::BacktrackBottomUp(SUnit
*SU
, unsigned BtCycle
,
363 unsigned &CurCycle
) {
365 while (CurCycle
> BtCycle
) {
366 OldSU
= Sequence
.back();
368 if (SU
->isSucc(OldSU
))
369 // Don't try to remove SU from AvailableQueue.
370 SU
->isAvailable
= false;
371 UnscheduleNodeBottomUp(OldSU
);
373 AvailableQueue
->setCurCycle(CurCycle
);
376 assert(!SU
->isSucc(OldSU
) && "Something is wrong!");
381 static bool isOperandOf(const SUnit
*SU
, SDNode
*N
) {
382 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
383 SUNode
= SUNode
->getFlaggedNode()) {
384 if (SUNode
->isOperandOf(N
))
390 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
391 /// successors to the newly created node.
392 SUnit
*ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit
*SU
) {
393 if (SU
->getNode()->getFlaggedNode())
396 SDNode
*N
= SU
->getNode();
401 bool TryUnfold
= false;
402 for (unsigned i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
403 EVT VT
= N
->getValueType(i
);
406 else if (VT
== MVT::Other
)
409 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
410 const SDValue
&Op
= N
->getOperand(i
);
411 EVT VT
= Op
.getNode()->getValueType(Op
.getResNo());
417 SmallVector
<SDNode
*, 2> NewNodes
;
418 if (!TII
->unfoldMemoryOperand(*DAG
, N
, NewNodes
))
421 DEBUG(dbgs() << "Unfolding SU #" << SU
->NodeNum
<< "\n");
422 assert(NewNodes
.size() == 2 && "Expected a load folding node!");
425 SDNode
*LoadNode
= NewNodes
[0];
426 unsigned NumVals
= N
->getNumValues();
427 unsigned OldNumVals
= SU
->getNode()->getNumValues();
428 for (unsigned i
= 0; i
!= NumVals
; ++i
)
429 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), i
), SDValue(N
, i
));
430 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), OldNumVals
-1),
431 SDValue(LoadNode
, 1));
433 // LoadNode may already exist. This can happen when there is another
434 // load from the same location and producing the same type of value
435 // but it has different alignment or volatileness.
436 bool isNewLoad
= true;
438 if (LoadNode
->getNodeId() != -1) {
439 LoadSU
= &SUnits
[LoadNode
->getNodeId()];
442 LoadSU
= CreateNewSUnit(LoadNode
);
443 LoadNode
->setNodeId(LoadSU
->NodeNum
);
444 ComputeLatency(LoadSU
);
447 SUnit
*NewSU
= CreateNewSUnit(N
);
448 assert(N
->getNodeId() == -1 && "Node already inserted!");
449 N
->setNodeId(NewSU
->NodeNum
);
451 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
452 for (unsigned i
= 0; i
!= TID
.getNumOperands(); ++i
) {
453 if (TID
.getOperandConstraint(i
, TOI::TIED_TO
) != -1) {
454 NewSU
->isTwoAddress
= true;
458 if (TID
.isCommutable())
459 NewSU
->isCommutable
= true;
460 ComputeLatency(NewSU
);
462 // Record all the edges to and from the old SU, by category.
463 SmallVector
<SDep
, 4> ChainPreds
;
464 SmallVector
<SDep
, 4> ChainSuccs
;
465 SmallVector
<SDep
, 4> LoadPreds
;
466 SmallVector
<SDep
, 4> NodePreds
;
467 SmallVector
<SDep
, 4> NodeSuccs
;
468 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
471 ChainPreds
.push_back(*I
);
472 else if (isOperandOf(I
->getSUnit(), LoadNode
))
473 LoadPreds
.push_back(*I
);
475 NodePreds
.push_back(*I
);
477 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
480 ChainSuccs
.push_back(*I
);
482 NodeSuccs
.push_back(*I
);
485 // Now assign edges to the newly-created nodes.
486 for (unsigned i
= 0, e
= ChainPreds
.size(); i
!= e
; ++i
) {
487 const SDep
&Pred
= ChainPreds
[i
];
488 RemovePred(SU
, Pred
);
490 AddPred(LoadSU
, Pred
);
492 for (unsigned i
= 0, e
= LoadPreds
.size(); i
!= e
; ++i
) {
493 const SDep
&Pred
= LoadPreds
[i
];
494 RemovePred(SU
, Pred
);
496 AddPred(LoadSU
, Pred
);
498 for (unsigned i
= 0, e
= NodePreds
.size(); i
!= e
; ++i
) {
499 const SDep
&Pred
= NodePreds
[i
];
500 RemovePred(SU
, Pred
);
501 AddPred(NewSU
, Pred
);
503 for (unsigned i
= 0, e
= NodeSuccs
.size(); i
!= e
; ++i
) {
504 SDep D
= NodeSuccs
[i
];
505 SUnit
*SuccDep
= D
.getSUnit();
507 RemovePred(SuccDep
, D
);
511 for (unsigned i
= 0, e
= ChainSuccs
.size(); i
!= e
; ++i
) {
512 SDep D
= ChainSuccs
[i
];
513 SUnit
*SuccDep
= D
.getSUnit();
515 RemovePred(SuccDep
, D
);
522 // Add a data dependency to reflect that NewSU reads the value defined
524 AddPred(NewSU
, SDep(LoadSU
, SDep::Data
, LoadSU
->Latency
));
527 AvailableQueue
->addNode(LoadSU
);
528 AvailableQueue
->addNode(NewSU
);
532 if (NewSU
->NumSuccsLeft
== 0) {
533 NewSU
->isAvailable
= true;
539 DEBUG(dbgs() << " Duplicating SU #" << SU
->NodeNum
<< "\n");
540 NewSU
= CreateClone(SU
);
542 // New SUnit has the exact same predecessors.
543 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
545 if (!I
->isArtificial())
548 // Only copy scheduled successors. Cut them from old node's successor
549 // list and move them over.
550 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
551 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
553 if (I
->isArtificial())
555 SUnit
*SuccSU
= I
->getSUnit();
556 if (SuccSU
->isScheduled
) {
561 DelDeps
.push_back(std::make_pair(SuccSU
, D
));
564 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
565 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
567 AvailableQueue
->updateNode(SU
);
568 AvailableQueue
->addNode(NewSU
);
574 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
575 /// scheduled successors of the given SUnit to the last copy.
576 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit
*SU
, unsigned Reg
,
577 const TargetRegisterClass
*DestRC
,
578 const TargetRegisterClass
*SrcRC
,
579 SmallVector
<SUnit
*, 2> &Copies
) {
580 SUnit
*CopyFromSU
= CreateNewSUnit(NULL
);
581 CopyFromSU
->CopySrcRC
= SrcRC
;
582 CopyFromSU
->CopyDstRC
= DestRC
;
584 SUnit
*CopyToSU
= CreateNewSUnit(NULL
);
585 CopyToSU
->CopySrcRC
= DestRC
;
586 CopyToSU
->CopyDstRC
= SrcRC
;
588 // Only copy scheduled successors. Cut them from old node's successor
589 // list and move them over.
590 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
591 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
593 if (I
->isArtificial())
595 SUnit
*SuccSU
= I
->getSUnit();
596 if (SuccSU
->isScheduled
) {
598 D
.setSUnit(CopyToSU
);
600 DelDeps
.push_back(std::make_pair(SuccSU
, *I
));
603 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
604 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
606 AddPred(CopyFromSU
, SDep(SU
, SDep::Data
, SU
->Latency
, Reg
));
607 AddPred(CopyToSU
, SDep(CopyFromSU
, SDep::Data
, CopyFromSU
->Latency
, 0));
609 AvailableQueue
->updateNode(SU
);
610 AvailableQueue
->addNode(CopyFromSU
);
611 AvailableQueue
->addNode(CopyToSU
);
612 Copies
.push_back(CopyFromSU
);
613 Copies
.push_back(CopyToSU
);
618 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
619 /// definition of the specified node.
620 /// FIXME: Move to SelectionDAG?
621 static EVT
getPhysicalRegisterVT(SDNode
*N
, unsigned Reg
,
622 const TargetInstrInfo
*TII
) {
623 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
624 assert(TID
.ImplicitDefs
&& "Physical reg def must be in implicit def list!");
625 unsigned NumRes
= TID
.getNumDefs();
626 for (const unsigned *ImpDef
= TID
.getImplicitDefs(); *ImpDef
; ++ImpDef
) {
631 return N
->getValueType(NumRes
);
634 /// CheckForLiveRegDef - Return true and update live register vector if the
635 /// specified register def of the specified SUnit clobbers any "live" registers.
636 static bool CheckForLiveRegDef(SUnit
*SU
, unsigned Reg
,
637 std::vector
<SUnit
*> &LiveRegDefs
,
638 SmallSet
<unsigned, 4> &RegAdded
,
639 SmallVector
<unsigned, 4> &LRegs
,
640 const TargetRegisterInfo
*TRI
) {
642 if (LiveRegDefs
[Reg
] && LiveRegDefs
[Reg
] != SU
) {
643 if (RegAdded
.insert(Reg
)) {
644 LRegs
.push_back(Reg
);
648 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
)
649 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != SU
) {
650 if (RegAdded
.insert(*Alias
)) {
651 LRegs
.push_back(*Alias
);
658 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
659 /// scheduling of the given node to satisfy live physical register dependencies.
660 /// If the specific node is the last one that's available to schedule, do
661 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
662 bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit
*SU
,
663 SmallVector
<unsigned, 4> &LRegs
){
664 if (NumLiveRegs
== 0)
667 SmallSet
<unsigned, 4> RegAdded
;
668 // If this node would clobber any "live" register, then it's not ready.
669 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
671 if (I
->isAssignedRegDep())
672 CheckForLiveRegDef(I
->getSUnit(), I
->getReg(), LiveRegDefs
,
673 RegAdded
, LRegs
, TRI
);
676 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getFlaggedNode()) {
677 if (Node
->getOpcode() == ISD::INLINEASM
) {
678 // Inline asm can clobber physical defs.
679 unsigned NumOps
= Node
->getNumOperands();
680 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Flag
)
681 --NumOps
; // Ignore the flag operand.
683 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
685 cast
<ConstantSDNode
>(Node
->getOperand(i
))->getZExtValue();
686 unsigned NumVals
= InlineAsm::getNumOperandRegisters(Flags
);
688 ++i
; // Skip the ID value.
689 if (InlineAsm::isRegDefKind(Flags
) ||
690 InlineAsm::isRegDefEarlyClobberKind(Flags
)) {
691 // Check for def of register or earlyclobber register.
692 for (; NumVals
; --NumVals
, ++i
) {
693 unsigned Reg
= cast
<RegisterSDNode
>(Node
->getOperand(i
))->getReg();
694 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
695 CheckForLiveRegDef(SU
, Reg
, LiveRegDefs
, RegAdded
, LRegs
, TRI
);
703 if (!Node
->isMachineOpcode())
705 const TargetInstrDesc
&TID
= TII
->get(Node
->getMachineOpcode());
706 if (!TID
.ImplicitDefs
)
708 for (const unsigned *Reg
= TID
.ImplicitDefs
; *Reg
; ++Reg
)
709 CheckForLiveRegDef(SU
, *Reg
, LiveRegDefs
, RegAdded
, LRegs
, TRI
);
711 return !LRegs
.empty();
715 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
717 void ScheduleDAGRRList::ListScheduleBottomUp() {
718 unsigned CurCycle
= 0;
720 // Release any predecessors of the special Exit node.
721 ReleasePredecessors(&ExitSU
, CurCycle
);
723 // Add root to Available queue.
724 if (!SUnits
.empty()) {
725 SUnit
*RootSU
= &SUnits
[DAG
->getRoot().getNode()->getNodeId()];
726 assert(RootSU
->Succs
.empty() && "Graph root shouldn't have successors!");
727 RootSU
->isAvailable
= true;
728 AvailableQueue
->push(RootSU
);
731 // While Available queue is not empty, grab the node with the highest
732 // priority. If it is not ready put it back. Schedule the node.
733 SmallVector
<SUnit
*, 4> NotReady
;
734 DenseMap
<SUnit
*, SmallVector
<unsigned, 4> > LRegsMap
;
735 Sequence
.reserve(SUnits
.size());
736 while (!AvailableQueue
->empty()) {
737 bool Delayed
= false;
739 SUnit
*CurSU
= AvailableQueue
->pop();
741 SmallVector
<unsigned, 4> LRegs
;
742 if (!DelayForLiveRegsBottomUp(CurSU
, LRegs
))
745 LRegsMap
.insert(std::make_pair(CurSU
, LRegs
));
747 CurSU
->isPending
= true; // This SU is not in AvailableQueue right now.
748 NotReady
.push_back(CurSU
);
749 CurSU
= AvailableQueue
->pop();
752 // All candidates are delayed due to live physical reg dependencies.
753 // Try backtracking, code duplication, or inserting cross class copies
755 if (Delayed
&& !CurSU
) {
756 for (unsigned i
= 0, e
= NotReady
.size(); i
!= e
; ++i
) {
757 SUnit
*TrySU
= NotReady
[i
];
758 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
760 // Try unscheduling up to the point where it's safe to schedule
762 unsigned LiveCycle
= CurCycle
;
763 for (unsigned j
= 0, ee
= LRegs
.size(); j
!= ee
; ++j
) {
764 unsigned Reg
= LRegs
[j
];
765 unsigned LCycle
= LiveRegCycles
[Reg
];
766 LiveCycle
= std::min(LiveCycle
, LCycle
);
768 SUnit
*OldSU
= Sequence
[LiveCycle
];
769 if (!WillCreateCycle(TrySU
, OldSU
)) {
770 BacktrackBottomUp(TrySU
, LiveCycle
, CurCycle
);
771 // Force the current node to be scheduled before the node that
772 // requires the physical reg dep.
773 if (OldSU
->isAvailable
) {
774 OldSU
->isAvailable
= false;
775 AvailableQueue
->remove(OldSU
);
777 AddPred(TrySU
, SDep(OldSU
, SDep::Order
, /*Latency=*/1,
778 /*Reg=*/0, /*isNormalMemory=*/false,
779 /*isMustAlias=*/false, /*isArtificial=*/true));
780 // If one or more successors has been unscheduled, then the current
781 // node is no longer avaialable. Schedule a successor that's now
782 // available instead.
783 if (!TrySU
->isAvailable
)
784 CurSU
= AvailableQueue
->pop();
787 TrySU
->isPending
= false;
788 NotReady
.erase(NotReady
.begin()+i
);
795 // Can't backtrack. If it's too expensive to copy the value, then try
796 // duplicate the nodes that produces these "too expensive to copy"
797 // values to break the dependency. In case even that doesn't work,
798 // insert cross class copies.
799 // If it's not too expensive, i.e. cost != -1, issue copies.
800 SUnit
*TrySU
= NotReady
[0];
801 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
802 assert(LRegs
.size() == 1 && "Can't handle this yet!");
803 unsigned Reg
= LRegs
[0];
804 SUnit
*LRDef
= LiveRegDefs
[Reg
];
805 EVT VT
= getPhysicalRegisterVT(LRDef
->getNode(), Reg
, TII
);
806 const TargetRegisterClass
*RC
=
807 TRI
->getMinimalPhysRegClass(Reg
, VT
);
808 const TargetRegisterClass
*DestRC
= TRI
->getCrossCopyRegClass(RC
);
810 // If cross copy register class is null, then it must be possible copy
811 // the value directly. Do not try duplicate the def.
814 NewDef
= CopyAndMoveSuccessors(LRDef
);
818 // Issue copies, these can be expensive cross register class copies.
819 SmallVector
<SUnit
*, 2> Copies
;
820 InsertCopiesAndMoveSuccs(LRDef
, Reg
, DestRC
, RC
, Copies
);
821 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU
->NodeNum
822 << " to SU #" << Copies
.front()->NodeNum
<< "\n");
823 AddPred(TrySU
, SDep(Copies
.front(), SDep::Order
, /*Latency=*/1,
824 /*Reg=*/0, /*isNormalMemory=*/false,
825 /*isMustAlias=*/false,
826 /*isArtificial=*/true));
827 NewDef
= Copies
.back();
830 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef
->NodeNum
831 << " to SU #" << TrySU
->NodeNum
<< "\n");
832 LiveRegDefs
[Reg
] = NewDef
;
833 AddPred(NewDef
, SDep(TrySU
, SDep::Order
, /*Latency=*/1,
834 /*Reg=*/0, /*isNormalMemory=*/false,
835 /*isMustAlias=*/false,
836 /*isArtificial=*/true));
837 TrySU
->isAvailable
= false;
841 assert(CurSU
&& "Unable to resolve live physical register dependencies!");
844 // Add the nodes that aren't ready back onto the available list.
845 for (unsigned i
= 0, e
= NotReady
.size(); i
!= e
; ++i
) {
846 NotReady
[i
]->isPending
= false;
847 // May no longer be available due to backtracking.
848 if (NotReady
[i
]->isAvailable
)
849 AvailableQueue
->push(NotReady
[i
]);
854 ScheduleNodeBottomUp(CurSU
, CurCycle
);
856 AvailableQueue
->setCurCycle(CurCycle
);
859 // Reverse the order if it is bottom up.
860 std::reverse(Sequence
.begin(), Sequence
.end());
863 VerifySchedule(isBottomUp
);
867 //===----------------------------------------------------------------------===//
868 // Top-Down Scheduling
869 //===----------------------------------------------------------------------===//
871 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
872 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
873 void ScheduleDAGRRList::ReleaseSucc(SUnit
*SU
, const SDep
*SuccEdge
) {
874 SUnit
*SuccSU
= SuccEdge
->getSUnit();
877 if (SuccSU
->NumPredsLeft
== 0) {
878 dbgs() << "*** Scheduling failed! ***\n";
880 dbgs() << " has been released too many times!\n";
884 --SuccSU
->NumPredsLeft
;
886 // If all the node's predecessors are scheduled, this node is ready
887 // to be scheduled. Ignore the special ExitSU node.
888 if (SuccSU
->NumPredsLeft
== 0 && SuccSU
!= &ExitSU
) {
889 SuccSU
->isAvailable
= true;
890 AvailableQueue
->push(SuccSU
);
894 void ScheduleDAGRRList::ReleaseSuccessors(SUnit
*SU
) {
895 // Top down: release successors
896 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
898 assert(!I
->isAssignedRegDep() &&
899 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
901 ReleaseSucc(SU
, &*I
);
905 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
906 /// count of its successors. If a successor pending count is zero, add it to
907 /// the Available queue.
908 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit
*SU
, unsigned CurCycle
) {
909 DEBUG(dbgs() << "*** Scheduling [" << CurCycle
<< "]: ");
910 DEBUG(SU
->dump(this));
912 assert(CurCycle
>= SU
->getDepth() && "Node scheduled above its depth!");
913 SU
->setDepthToAtLeast(CurCycle
);
914 Sequence
.push_back(SU
);
916 ReleaseSuccessors(SU
);
917 SU
->isScheduled
= true;
918 AvailableQueue
->ScheduledNode(SU
);
921 /// ListScheduleTopDown - The main loop of list scheduling for top-down
923 void ScheduleDAGRRList::ListScheduleTopDown() {
924 unsigned CurCycle
= 0;
925 AvailableQueue
->setCurCycle(CurCycle
);
927 // Release any successors of the special Entry node.
928 ReleaseSuccessors(&EntrySU
);
930 // All leaves to Available queue.
931 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
932 // It is available if it has no predecessors.
933 if (SUnits
[i
].Preds
.empty()) {
934 AvailableQueue
->push(&SUnits
[i
]);
935 SUnits
[i
].isAvailable
= true;
939 // While Available queue is not empty, grab the node with the highest
940 // priority. If it is not ready put it back. Schedule the node.
941 Sequence
.reserve(SUnits
.size());
942 while (!AvailableQueue
->empty()) {
943 SUnit
*CurSU
= AvailableQueue
->pop();
946 ScheduleNodeTopDown(CurSU
, CurCycle
);
948 AvailableQueue
->setCurCycle(CurCycle
);
952 VerifySchedule(isBottomUp
);
957 //===----------------------------------------------------------------------===//
958 // RegReductionPriorityQueue Implementation
959 //===----------------------------------------------------------------------===//
961 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
962 // to reduce register pressure.
966 class RegReductionPriorityQueue
;
968 /// bu_ls_rr_sort - Priority function for bottom up register pressure
969 // reduction scheduler.
970 struct bu_ls_rr_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
971 RegReductionPriorityQueue
<bu_ls_rr_sort
> *SPQ
;
972 bu_ls_rr_sort(RegReductionPriorityQueue
<bu_ls_rr_sort
> *spq
) : SPQ(spq
) {}
973 bu_ls_rr_sort(const bu_ls_rr_sort
&RHS
) : SPQ(RHS
.SPQ
) {}
975 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
978 // td_ls_rr_sort - Priority function for top down register pressure reduction
980 struct td_ls_rr_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
981 RegReductionPriorityQueue
<td_ls_rr_sort
> *SPQ
;
982 td_ls_rr_sort(RegReductionPriorityQueue
<td_ls_rr_sort
> *spq
) : SPQ(spq
) {}
983 td_ls_rr_sort(const td_ls_rr_sort
&RHS
) : SPQ(RHS
.SPQ
) {}
985 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
988 // src_ls_rr_sort - Priority function for source order scheduler.
989 struct src_ls_rr_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
990 RegReductionPriorityQueue
<src_ls_rr_sort
> *SPQ
;
991 src_ls_rr_sort(RegReductionPriorityQueue
<src_ls_rr_sort
> *spq
)
993 src_ls_rr_sort(const src_ls_rr_sort
&RHS
)
996 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
999 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1000 struct hybrid_ls_rr_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
1001 RegReductionPriorityQueue
<hybrid_ls_rr_sort
> *SPQ
;
1002 hybrid_ls_rr_sort(RegReductionPriorityQueue
<hybrid_ls_rr_sort
> *spq
)
1004 hybrid_ls_rr_sort(const hybrid_ls_rr_sort
&RHS
)
1007 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
1010 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1012 struct ilp_ls_rr_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
1013 RegReductionPriorityQueue
<ilp_ls_rr_sort
> *SPQ
;
1014 ilp_ls_rr_sort(RegReductionPriorityQueue
<ilp_ls_rr_sort
> *spq
)
1016 ilp_ls_rr_sort(const ilp_ls_rr_sort
&RHS
)
1019 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
1021 } // end anonymous namespace
1023 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1024 /// Smaller number is the higher priority.
1026 CalcNodeSethiUllmanNumber(const SUnit
*SU
, std::vector
<unsigned> &SUNumbers
) {
1027 unsigned &SethiUllmanNumber
= SUNumbers
[SU
->NodeNum
];
1028 if (SethiUllmanNumber
!= 0)
1029 return SethiUllmanNumber
;
1032 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
1034 if (I
->isCtrl()) continue; // ignore chain preds
1035 SUnit
*PredSU
= I
->getSUnit();
1036 unsigned PredSethiUllman
= CalcNodeSethiUllmanNumber(PredSU
, SUNumbers
);
1037 if (PredSethiUllman
> SethiUllmanNumber
) {
1038 SethiUllmanNumber
= PredSethiUllman
;
1040 } else if (PredSethiUllman
== SethiUllmanNumber
)
1044 SethiUllmanNumber
+= Extra
;
1046 if (SethiUllmanNumber
== 0)
1047 SethiUllmanNumber
= 1;
1049 return SethiUllmanNumber
;
1054 class RegReductionPriorityQueue
: public SchedulingPriorityQueue
{
1055 std::vector
<SUnit
*> Queue
;
1057 unsigned CurQueueId
;
1058 bool TracksRegPressure
;
1061 // SUnits - The SUnits for the current graph.
1062 std::vector
<SUnit
> *SUnits
;
1064 MachineFunction
&MF
;
1065 const TargetInstrInfo
*TII
;
1066 const TargetRegisterInfo
*TRI
;
1067 const TargetLowering
*TLI
;
1068 ScheduleDAGRRList
*scheduleDAG
;
1070 // SethiUllmanNumbers - The SethiUllman number for each node.
1071 std::vector
<unsigned> SethiUllmanNumbers
;
1073 /// RegPressure - Tracking current reg pressure per register class.
1075 std::vector
<unsigned> RegPressure
;
1077 /// RegLimit - Tracking the number of allocatable registers per register
1079 std::vector
<unsigned> RegLimit
;
1082 RegReductionPriorityQueue(MachineFunction
&mf
,
1084 const TargetInstrInfo
*tii
,
1085 const TargetRegisterInfo
*tri
,
1086 const TargetLowering
*tli
)
1087 : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp
),
1088 MF(mf
), TII(tii
), TRI(tri
), TLI(tli
), scheduleDAG(NULL
) {
1089 if (TracksRegPressure
) {
1090 unsigned NumRC
= TRI
->getNumRegClasses();
1091 RegLimit
.resize(NumRC
);
1092 RegPressure
.resize(NumRC
);
1093 std::fill(RegLimit
.begin(), RegLimit
.end(), 0);
1094 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
1095 for (TargetRegisterInfo::regclass_iterator I
= TRI
->regclass_begin(),
1096 E
= TRI
->regclass_end(); I
!= E
; ++I
)
1097 RegLimit
[(*I
)->getID()] = tli
->getRegPressureLimit(*I
, MF
);
1101 void initNodes(std::vector
<SUnit
> &sunits
) {
1103 // Add pseudo dependency edges for two-address nodes.
1104 AddPseudoTwoAddrDeps();
1105 // Reroute edges to nodes with multiple uses.
1106 PrescheduleNodesWithMultipleUses();
1107 // Calculate node priorities.
1108 CalculateSethiUllmanNumbers();
1111 void addNode(const SUnit
*SU
) {
1112 unsigned SUSize
= SethiUllmanNumbers
.size();
1113 if (SUnits
->size() > SUSize
)
1114 SethiUllmanNumbers
.resize(SUSize
*2, 0);
1115 CalcNodeSethiUllmanNumber(SU
, SethiUllmanNumbers
);
1118 void updateNode(const SUnit
*SU
) {
1119 SethiUllmanNumbers
[SU
->NodeNum
] = 0;
1120 CalcNodeSethiUllmanNumber(SU
, SethiUllmanNumbers
);
1123 void releaseState() {
1125 SethiUllmanNumbers
.clear();
1126 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
1129 unsigned getNodePriority(const SUnit
*SU
) const {
1130 assert(SU
->NodeNum
< SethiUllmanNumbers
.size());
1131 unsigned Opc
= SU
->getNode() ? SU
->getNode()->getOpcode() : 0;
1132 if (Opc
== ISD::TokenFactor
|| Opc
== ISD::CopyToReg
)
1133 // CopyToReg should be close to its uses to facilitate coalescing and
1136 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
1137 Opc
== TargetOpcode::SUBREG_TO_REG
||
1138 Opc
== TargetOpcode::INSERT_SUBREG
)
1139 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1140 // close to their uses to facilitate coalescing.
1142 if (SU
->NumSuccs
== 0 && SU
->NumPreds
!= 0)
1143 // If SU does not have a register use, i.e. it doesn't produce a value
1144 // that would be consumed (e.g. store), then it terminates a chain of
1145 // computation. Give it a large SethiUllman number so it will be
1146 // scheduled right before its predecessors that it doesn't lengthen
1147 // their live ranges.
1149 if (SU
->NumPreds
== 0 && SU
->NumSuccs
!= 0)
1150 // If SU does not have a register def, schedule it close to its uses
1151 // because it does not lengthen any live ranges.
1153 return SethiUllmanNumbers
[SU
->NodeNum
];
1156 unsigned getNodeOrdering(const SUnit
*SU
) const {
1157 return scheduleDAG
->DAG
->GetOrdering(SU
->getNode());
1160 bool empty() const { return Queue
.empty(); }
1162 void push(SUnit
*U
) {
1163 assert(!U
->NodeQueueId
&& "Node in the queue already");
1164 U
->NodeQueueId
= ++CurQueueId
;
1169 if (empty()) return NULL
;
1170 std::vector
<SUnit
*>::iterator Best
= Queue
.begin();
1171 for (std::vector
<SUnit
*>::iterator I
= llvm::next(Queue
.begin()),
1172 E
= Queue
.end(); I
!= E
; ++I
)
1173 if (Picker(*Best
, *I
))
1176 if (Best
!= prior(Queue
.end()))
1177 std::swap(*Best
, Queue
.back());
1183 void remove(SUnit
*SU
) {
1184 assert(!Queue
.empty() && "Queue is empty!");
1185 assert(SU
->NodeQueueId
!= 0 && "Not in queue!");
1186 std::vector
<SUnit
*>::iterator I
= std::find(Queue
.begin(), Queue
.end(),
1188 if (I
!= prior(Queue
.end()))
1189 std::swap(*I
, Queue
.back());
1191 SU
->NodeQueueId
= 0;
1194 bool HighRegPressure(const SUnit
*SU
) const {
1198 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(),E
= SU
->Preds
.end();
1202 SUnit
*PredSU
= I
->getSUnit();
1203 const SDNode
*PN
= PredSU
->getNode();
1204 if (!PN
->isMachineOpcode()) {
1205 if (PN
->getOpcode() == ISD::CopyFromReg
) {
1206 EVT VT
= PN
->getValueType(0);
1207 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1208 unsigned Cost
= TLI
->getRepRegClassCostFor(VT
);
1209 if ((RegPressure
[RCId
] + Cost
) >= RegLimit
[RCId
])
1214 unsigned POpc
= PN
->getMachineOpcode();
1215 if (POpc
== TargetOpcode::IMPLICIT_DEF
)
1217 if (POpc
== TargetOpcode::EXTRACT_SUBREG
) {
1218 EVT VT
= PN
->getOperand(0).getValueType();
1219 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1220 unsigned Cost
= TLI
->getRepRegClassCostFor(VT
);
1221 // Check if this increases register pressure of the specific register
1222 // class to the point where it would cause spills.
1223 if ((RegPressure
[RCId
] + Cost
) >= RegLimit
[RCId
])
1226 } else if (POpc
== TargetOpcode::INSERT_SUBREG
||
1227 POpc
== TargetOpcode::SUBREG_TO_REG
) {
1228 EVT VT
= PN
->getValueType(0);
1229 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1230 unsigned Cost
= TLI
->getRepRegClassCostFor(VT
);
1231 // Check if this increases register pressure of the specific register
1232 // class to the point where it would cause spills.
1233 if ((RegPressure
[RCId
] + Cost
) >= RegLimit
[RCId
])
1237 unsigned NumDefs
= TII
->get(PN
->getMachineOpcode()).getNumDefs();
1238 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
1239 EVT VT
= PN
->getValueType(i
);
1240 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1241 if (RegPressure
[RCId
] >= RegLimit
[RCId
])
1242 return true; // Reg pressure already high.
1243 unsigned Cost
= TLI
->getRepRegClassCostFor(VT
);
1244 if (!PN
->hasAnyUseOfValue(i
))
1246 // Check if this increases register pressure of the specific register
1247 // class to the point where it would cause spills.
1248 if ((RegPressure
[RCId
] + Cost
) >= RegLimit
[RCId
])
1256 void ScheduledNode(SUnit
*SU
) {
1257 if (!TracksRegPressure
)
1260 const SDNode
*N
= SU
->getNode();
1261 if (!N
->isMachineOpcode()) {
1262 if (N
->getOpcode() != ISD::CopyToReg
)
1265 unsigned Opc
= N
->getMachineOpcode();
1266 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
1267 Opc
== TargetOpcode::INSERT_SUBREG
||
1268 Opc
== TargetOpcode::SUBREG_TO_REG
||
1269 Opc
== TargetOpcode::REG_SEQUENCE
||
1270 Opc
== TargetOpcode::IMPLICIT_DEF
)
1274 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
1278 SUnit
*PredSU
= I
->getSUnit();
1279 if (PredSU
->NumSuccsLeft
!= PredSU
->NumSuccs
)
1281 const SDNode
*PN
= PredSU
->getNode();
1282 if (!PN
->isMachineOpcode()) {
1283 if (PN
->getOpcode() == ISD::CopyFromReg
) {
1284 EVT VT
= PN
->getValueType(0);
1285 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1286 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
1290 unsigned POpc
= PN
->getMachineOpcode();
1291 if (POpc
== TargetOpcode::IMPLICIT_DEF
)
1293 if (POpc
== TargetOpcode::EXTRACT_SUBREG
) {
1294 EVT VT
= PN
->getOperand(0).getValueType();
1295 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1296 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
1298 } else if (POpc
== TargetOpcode::INSERT_SUBREG
||
1299 POpc
== TargetOpcode::SUBREG_TO_REG
) {
1300 EVT VT
= PN
->getValueType(0);
1301 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1302 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
1305 unsigned NumDefs
= TII
->get(PN
->getMachineOpcode()).getNumDefs();
1306 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
1307 EVT VT
= PN
->getValueType(i
);
1308 if (!PN
->hasAnyUseOfValue(i
))
1310 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1311 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
1315 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1316 // may transfer data dependencies to CopyToReg.
1317 if (SU
->NumSuccs
&& N
->isMachineOpcode()) {
1318 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
1319 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
1320 EVT VT
= N
->getValueType(i
);
1321 if (!N
->hasAnyUseOfValue(i
))
1323 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1324 if (RegPressure
[RCId
] < TLI
->getRepRegClassCostFor(VT
))
1325 // Register pressure tracking is imprecise. This can happen.
1326 RegPressure
[RCId
] = 0;
1328 RegPressure
[RCId
] -= TLI
->getRepRegClassCostFor(VT
);
1335 void UnscheduledNode(SUnit
*SU
) {
1336 if (!TracksRegPressure
)
1339 const SDNode
*N
= SU
->getNode();
1340 if (!N
->isMachineOpcode()) {
1341 if (N
->getOpcode() != ISD::CopyToReg
)
1344 unsigned Opc
= N
->getMachineOpcode();
1345 if (Opc
== TargetOpcode::EXTRACT_SUBREG
||
1346 Opc
== TargetOpcode::INSERT_SUBREG
||
1347 Opc
== TargetOpcode::SUBREG_TO_REG
||
1348 Opc
== TargetOpcode::REG_SEQUENCE
||
1349 Opc
== TargetOpcode::IMPLICIT_DEF
)
1353 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
1357 SUnit
*PredSU
= I
->getSUnit();
1358 if (PredSU
->NumSuccsLeft
!= PredSU
->NumSuccs
)
1360 const SDNode
*PN
= PredSU
->getNode();
1361 if (!PN
->isMachineOpcode()) {
1362 if (PN
->getOpcode() == ISD::CopyFromReg
) {
1363 EVT VT
= PN
->getValueType(0);
1364 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1365 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
1369 unsigned POpc
= PN
->getMachineOpcode();
1370 if (POpc
== TargetOpcode::IMPLICIT_DEF
)
1372 if (POpc
== TargetOpcode::EXTRACT_SUBREG
) {
1373 EVT VT
= PN
->getOperand(0).getValueType();
1374 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1375 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
1377 } else if (POpc
== TargetOpcode::INSERT_SUBREG
||
1378 POpc
== TargetOpcode::SUBREG_TO_REG
) {
1379 EVT VT
= PN
->getValueType(0);
1380 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1381 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
1384 unsigned NumDefs
= TII
->get(PN
->getMachineOpcode()).getNumDefs();
1385 for (unsigned i
= 0; i
!= NumDefs
; ++i
) {
1386 EVT VT
= PN
->getValueType(i
);
1387 if (!PN
->hasAnyUseOfValue(i
))
1389 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1390 if (RegPressure
[RCId
] < TLI
->getRepRegClassCostFor(VT
))
1391 // Register pressure tracking is imprecise. This can happen.
1392 RegPressure
[RCId
] = 0;
1394 RegPressure
[RCId
] -= TLI
->getRepRegClassCostFor(VT
);
1398 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1399 // may transfer data dependencies to CopyToReg.
1400 if (SU
->NumSuccs
&& N
->isMachineOpcode()) {
1401 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
1402 for (unsigned i
= NumDefs
, e
= N
->getNumValues(); i
!= e
; ++i
) {
1403 EVT VT
= N
->getValueType(i
);
1404 if (VT
== MVT::Flag
|| VT
== MVT::Other
)
1406 if (!N
->hasAnyUseOfValue(i
))
1408 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
1409 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
1416 void setScheduleDAG(ScheduleDAGRRList
*scheduleDag
) {
1417 scheduleDAG
= scheduleDag
;
1420 void dumpRegPressure() const {
1421 for (TargetRegisterInfo::regclass_iterator I
= TRI
->regclass_begin(),
1422 E
= TRI
->regclass_end(); I
!= E
; ++I
) {
1423 const TargetRegisterClass
*RC
= *I
;
1424 unsigned Id
= RC
->getID();
1425 unsigned RP
= RegPressure
[Id
];
1427 DEBUG(dbgs() << RC
->getName() << ": " << RP
<< " / " << RegLimit
[Id
]
1433 bool canClobber(const SUnit
*SU
, const SUnit
*Op
);
1434 void AddPseudoTwoAddrDeps();
1435 void PrescheduleNodesWithMultipleUses();
1436 void CalculateSethiUllmanNumbers();
1439 typedef RegReductionPriorityQueue
<bu_ls_rr_sort
>
1440 BURegReductionPriorityQueue
;
1442 typedef RegReductionPriorityQueue
<td_ls_rr_sort
>
1443 TDRegReductionPriorityQueue
;
1445 typedef RegReductionPriorityQueue
<src_ls_rr_sort
>
1446 SrcRegReductionPriorityQueue
;
1448 typedef RegReductionPriorityQueue
<hybrid_ls_rr_sort
>
1449 HybridBURRPriorityQueue
;
1451 typedef RegReductionPriorityQueue
<ilp_ls_rr_sort
>
1452 ILPBURRPriorityQueue
;
1455 /// closestSucc - Returns the scheduled cycle of the successor which is
1456 /// closest to the current cycle.
1457 static unsigned closestSucc(const SUnit
*SU
) {
1458 unsigned MaxHeight
= 0;
1459 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
1461 if (I
->isCtrl()) continue; // ignore chain succs
1462 unsigned Height
= I
->getSUnit()->getHeight();
1463 // If there are bunch of CopyToRegs stacked up, they should be considered
1464 // to be at the same position.
1465 if (I
->getSUnit()->getNode() &&
1466 I
->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg
)
1467 Height
= closestSucc(I
->getSUnit())+1;
1468 if (Height
> MaxHeight
)
1474 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1475 /// for scratch registers, i.e. number of data dependencies.
1476 static unsigned calcMaxScratches(const SUnit
*SU
) {
1477 unsigned Scratches
= 0;
1478 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
1480 if (I
->isCtrl()) continue; // ignore chain preds
1486 /// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1487 /// CopyToReg to a virtual register. This SU def is probably a liveout and
1488 /// it has no other use. It should be scheduled closer to the terminator.
1489 static bool hasOnlyLiveOutUses(const SUnit
*SU
) {
1490 bool RetVal
= false;
1491 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
1493 if (I
->isCtrl()) continue;
1494 const SUnit
*SuccSU
= I
->getSUnit();
1495 if (SuccSU
->getNode() && SuccSU
->getNode()->getOpcode() == ISD::CopyToReg
) {
1497 cast
<RegisterSDNode
>(SuccSU
->getNode()->getOperand(1))->getReg();
1498 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
1508 /// UnitsSharePred - Return true if the two scheduling units share a common
1509 /// data predecessor.
1510 static bool UnitsSharePred(const SUnit
*left
, const SUnit
*right
) {
1511 SmallSet
<const SUnit
*, 4> Preds
;
1512 for (SUnit::const_pred_iterator I
= left
->Preds
.begin(),E
= left
->Preds
.end();
1514 if (I
->isCtrl()) continue; // ignore chain preds
1515 Preds
.insert(I
->getSUnit());
1517 for (SUnit::const_pred_iterator I
= right
->Preds
.begin(),E
= right
->Preds
.end();
1519 if (I
->isCtrl()) continue; // ignore chain preds
1520 if (Preds
.count(I
->getSUnit()))
1526 template <typename RRSort
>
1527 static bool BURRSort(const SUnit
*left
, const SUnit
*right
,
1528 const RegReductionPriorityQueue
<RRSort
> *SPQ
) {
1529 unsigned LPriority
= SPQ
->getNodePriority(left
);
1530 unsigned RPriority
= SPQ
->getNodePriority(right
);
1531 if (LPriority
!= RPriority
)
1532 return LPriority
> RPriority
;
1534 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1539 // and the following instructions are both ready.
1543 // Then schedule t2 = op first.
1550 // This creates more short live intervals.
1551 unsigned LDist
= closestSucc(left
);
1552 unsigned RDist
= closestSucc(right
);
1554 return LDist
< RDist
;
1556 // How many registers becomes live when the node is scheduled.
1557 unsigned LScratch
= calcMaxScratches(left
);
1558 unsigned RScratch
= calcMaxScratches(right
);
1559 if (LScratch
!= RScratch
)
1560 return LScratch
> RScratch
;
1562 if (left
->getHeight() != right
->getHeight())
1563 return left
->getHeight() > right
->getHeight();
1565 if (left
->getDepth() != right
->getDepth())
1566 return left
->getDepth() < right
->getDepth();
1568 assert(left
->NodeQueueId
&& right
->NodeQueueId
&&
1569 "NodeQueueId cannot be zero");
1570 return (left
->NodeQueueId
> right
->NodeQueueId
);
1574 bool bu_ls_rr_sort::operator()(const SUnit
*left
, const SUnit
*right
) const {
1575 return BURRSort(left
, right
, SPQ
);
1578 // Source order, otherwise bottom up.
1579 bool src_ls_rr_sort::operator()(const SUnit
*left
, const SUnit
*right
) const {
1580 unsigned LOrder
= SPQ
->getNodeOrdering(left
);
1581 unsigned ROrder
= SPQ
->getNodeOrdering(right
);
1583 // Prefer an ordering where the lower the non-zero order number, the higher
1585 if ((LOrder
|| ROrder
) && LOrder
!= ROrder
)
1586 return LOrder
!= 0 && (LOrder
< ROrder
|| ROrder
== 0);
1588 return BURRSort(left
, right
, SPQ
);
1591 bool hybrid_ls_rr_sort::operator()(const SUnit
*left
, const SUnit
*right
) const{
1592 if (left
->isCall
|| right
->isCall
)
1593 // No way to compute latency of calls.
1594 return BURRSort(left
, right
, SPQ
);
1596 bool LHigh
= SPQ
->HighRegPressure(left
);
1597 bool RHigh
= SPQ
->HighRegPressure(right
);
1598 // Avoid causing spills. If register pressure is high, schedule for
1599 // register pressure reduction.
1600 if (LHigh
&& !RHigh
)
1602 else if (!LHigh
&& RHigh
)
1604 else if (!LHigh
&& !RHigh
) {
1605 // If the two nodes share an operand and one of them has a single
1606 // use that is a live out copy, favor the one that is live out. Otherwise
1607 // it will be difficult to eliminate the copy if the instruction is a
1608 // loop induction variable update. e.g.
1615 bool SharePred
= UnitsSharePred(left
, right
);
1616 // FIXME: Only adjust if BB is a loop back edge.
1617 // FIXME: What's the cost of a copy?
1618 int LBonus
= (SharePred
&& hasOnlyLiveOutUses(left
)) ? 1 : 0;
1619 int RBonus
= (SharePred
&& hasOnlyLiveOutUses(right
)) ? 1 : 0;
1620 int LHeight
= (int)left
->getHeight() - LBonus
;
1621 int RHeight
= (int)right
->getHeight() - RBonus
;
1623 // Low register pressure situation, schedule for latency if possible.
1624 bool LStall
= left
->SchedulingPref
== Sched::Latency
&&
1625 (int)SPQ
->getCurCycle() < LHeight
;
1626 bool RStall
= right
->SchedulingPref
== Sched::Latency
&&
1627 (int)SPQ
->getCurCycle() < RHeight
;
1628 // If scheduling one of the node will cause a pipeline stall, delay it.
1629 // If scheduling either one of the node will cause a pipeline stall, sort
1630 // them according to their height.
1634 if (LHeight
!= RHeight
)
1635 return LHeight
> RHeight
;
1639 // If either node is scheduling for latency, sort them by height
1641 if (left
->SchedulingPref
== Sched::Latency
||
1642 right
->SchedulingPref
== Sched::Latency
) {
1643 if (LHeight
!= RHeight
)
1644 return LHeight
> RHeight
;
1645 if (left
->Latency
!= right
->Latency
)
1646 return left
->Latency
> right
->Latency
;
1650 return BURRSort(left
, right
, SPQ
);
1653 bool ilp_ls_rr_sort::operator()(const SUnit
*left
,
1654 const SUnit
*right
) const {
1655 if (left
->isCall
|| right
->isCall
)
1656 // No way to compute latency of calls.
1657 return BURRSort(left
, right
, SPQ
);
1659 bool LHigh
= SPQ
->HighRegPressure(left
);
1660 bool RHigh
= SPQ
->HighRegPressure(right
);
1661 // Avoid causing spills. If register pressure is high, schedule for
1662 // register pressure reduction.
1663 if (LHigh
&& !RHigh
)
1665 else if (!LHigh
&& RHigh
)
1667 else if (!LHigh
&& !RHigh
) {
1668 // Low register pressure situation, schedule to maximize instruction level
1670 if (left
->NumPreds
> right
->NumPreds
)
1672 else if (left
->NumPreds
< right
->NumPreds
)
1676 return BURRSort(left
, right
, SPQ
);
1681 RegReductionPriorityQueue
<SF
>::canClobber(const SUnit
*SU
, const SUnit
*Op
) {
1682 if (SU
->isTwoAddress
) {
1683 unsigned Opc
= SU
->getNode()->getMachineOpcode();
1684 const TargetInstrDesc
&TID
= TII
->get(Opc
);
1685 unsigned NumRes
= TID
.getNumDefs();
1686 unsigned NumOps
= TID
.getNumOperands() - NumRes
;
1687 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
1688 if (TID
.getOperandConstraint(i
+NumRes
, TOI::TIED_TO
) != -1) {
1689 SDNode
*DU
= SU
->getNode()->getOperand(i
).getNode();
1690 if (DU
->getNodeId() != -1 &&
1691 Op
->OrigNode
== &(*SUnits
)[DU
->getNodeId()])
1699 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1700 /// physical register defs.
1701 static bool canClobberPhysRegDefs(const SUnit
*SuccSU
, const SUnit
*SU
,
1702 const TargetInstrInfo
*TII
,
1703 const TargetRegisterInfo
*TRI
) {
1704 SDNode
*N
= SuccSU
->getNode();
1705 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
1706 const unsigned *ImpDefs
= TII
->get(N
->getMachineOpcode()).getImplicitDefs();
1707 assert(ImpDefs
&& "Caller should check hasPhysRegDefs");
1708 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
1709 SUNode
= SUNode
->getFlaggedNode()) {
1710 if (!SUNode
->isMachineOpcode())
1712 const unsigned *SUImpDefs
=
1713 TII
->get(SUNode
->getMachineOpcode()).getImplicitDefs();
1716 for (unsigned i
= NumDefs
, e
= N
->getNumValues(); i
!= e
; ++i
) {
1717 EVT VT
= N
->getValueType(i
);
1718 if (VT
== MVT::Flag
|| VT
== MVT::Other
)
1720 if (!N
->hasAnyUseOfValue(i
))
1722 unsigned Reg
= ImpDefs
[i
- NumDefs
];
1723 for (;*SUImpDefs
; ++SUImpDefs
) {
1724 unsigned SUReg
= *SUImpDefs
;
1725 if (TRI
->regsOverlap(Reg
, SUReg
))
1733 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1734 /// are not handled well by the general register pressure reduction
1735 /// heuristics. When presented with code like this:
1744 /// the heuristics tend to push the store up, but since the
1745 /// operand of the store has another use (U), this would increase
1746 /// the length of that other use (the U->N edge).
1748 /// This function transforms code like the above to route U's
1749 /// dependence through the store when possible, like this:
1760 /// This results in the store being scheduled immediately
1761 /// after N, which shortens the U->N live range, reducing
1762 /// register pressure.
1765 void RegReductionPriorityQueue
<SF
>::PrescheduleNodesWithMultipleUses() {
1766 // Visit all the nodes in topological order, working top-down.
1767 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
) {
1768 SUnit
*SU
= &(*SUnits
)[i
];
1769 // For now, only look at nodes with no data successors, such as stores.
1770 // These are especially important, due to the heuristics in
1771 // getNodePriority for nodes with no data successors.
1772 if (SU
->NumSuccs
!= 0)
1774 // For now, only look at nodes with exactly one data predecessor.
1775 if (SU
->NumPreds
!= 1)
1777 // Avoid prescheduling copies to virtual registers, which don't behave
1778 // like other nodes from the perspective of scheduling heuristics.
1779 if (SDNode
*N
= SU
->getNode())
1780 if (N
->getOpcode() == ISD::CopyToReg
&&
1781 TargetRegisterInfo::isVirtualRegister
1782 (cast
<RegisterSDNode
>(N
->getOperand(1))->getReg()))
1785 // Locate the single data predecessor.
1787 for (SUnit::const_pred_iterator II
= SU
->Preds
.begin(),
1788 EE
= SU
->Preds
.end(); II
!= EE
; ++II
)
1789 if (!II
->isCtrl()) {
1790 PredSU
= II
->getSUnit();
1795 // Don't rewrite edges that carry physregs, because that requires additional
1796 // support infrastructure.
1797 if (PredSU
->hasPhysRegDefs
)
1799 // Short-circuit the case where SU is PredSU's only data successor.
1800 if (PredSU
->NumSuccs
== 1)
1802 // Avoid prescheduling to copies from virtual registers, which don't behave
1803 // like other nodes from the perspective of scheduling // heuristics.
1804 if (SDNode
*N
= SU
->getNode())
1805 if (N
->getOpcode() == ISD::CopyFromReg
&&
1806 TargetRegisterInfo::isVirtualRegister
1807 (cast
<RegisterSDNode
>(N
->getOperand(1))->getReg()))
1810 // Perform checks on the successors of PredSU.
1811 for (SUnit::const_succ_iterator II
= PredSU
->Succs
.begin(),
1812 EE
= PredSU
->Succs
.end(); II
!= EE
; ++II
) {
1813 SUnit
*PredSuccSU
= II
->getSUnit();
1814 if (PredSuccSU
== SU
) continue;
1815 // If PredSU has another successor with no data successors, for
1816 // now don't attempt to choose either over the other.
1817 if (PredSuccSU
->NumSuccs
== 0)
1818 goto outer_loop_continue
;
1819 // Don't break physical register dependencies.
1820 if (SU
->hasPhysRegClobbers
&& PredSuccSU
->hasPhysRegDefs
)
1821 if (canClobberPhysRegDefs(PredSuccSU
, SU
, TII
, TRI
))
1822 goto outer_loop_continue
;
1823 // Don't introduce graph cycles.
1824 if (scheduleDAG
->IsReachable(SU
, PredSuccSU
))
1825 goto outer_loop_continue
;
1828 // Ok, the transformation is safe and the heuristics suggest it is
1829 // profitable. Update the graph.
1830 DEBUG(dbgs() << " Prescheduling SU #" << SU
->NodeNum
1831 << " next to PredSU #" << PredSU
->NodeNum
1832 << " to guide scheduling in the presence of multiple uses\n");
1833 for (unsigned i
= 0; i
!= PredSU
->Succs
.size(); ++i
) {
1834 SDep Edge
= PredSU
->Succs
[i
];
1835 assert(!Edge
.isAssignedRegDep());
1836 SUnit
*SuccSU
= Edge
.getSUnit();
1838 Edge
.setSUnit(PredSU
);
1839 scheduleDAG
->RemovePred(SuccSU
, Edge
);
1840 scheduleDAG
->AddPred(SU
, Edge
);
1842 scheduleDAG
->AddPred(SuccSU
, Edge
);
1846 outer_loop_continue
:;
1850 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1851 /// it as a def&use operand. Add a pseudo control edge from it to the other
1852 /// node (if it won't create a cycle) so the two-address one will be scheduled
1853 /// first (lower in the schedule). If both nodes are two-address, favor the
1854 /// one that has a CopyToReg use (more likely to be a loop induction update).
1855 /// If both are two-address, but one is commutable while the other is not
1856 /// commutable, favor the one that's not commutable.
1858 void RegReductionPriorityQueue
<SF
>::AddPseudoTwoAddrDeps() {
1859 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
) {
1860 SUnit
*SU
= &(*SUnits
)[i
];
1861 if (!SU
->isTwoAddress
)
1864 SDNode
*Node
= SU
->getNode();
1865 if (!Node
|| !Node
->isMachineOpcode() || SU
->getNode()->getFlaggedNode())
1868 bool isLiveOut
= hasOnlyLiveOutUses(SU
);
1869 unsigned Opc
= Node
->getMachineOpcode();
1870 const TargetInstrDesc
&TID
= TII
->get(Opc
);
1871 unsigned NumRes
= TID
.getNumDefs();
1872 unsigned NumOps
= TID
.getNumOperands() - NumRes
;
1873 for (unsigned j
= 0; j
!= NumOps
; ++j
) {
1874 if (TID
.getOperandConstraint(j
+NumRes
, TOI::TIED_TO
) == -1)
1876 SDNode
*DU
= SU
->getNode()->getOperand(j
).getNode();
1877 if (DU
->getNodeId() == -1)
1879 const SUnit
*DUSU
= &(*SUnits
)[DU
->getNodeId()];
1880 if (!DUSU
) continue;
1881 for (SUnit::const_succ_iterator I
= DUSU
->Succs
.begin(),
1882 E
= DUSU
->Succs
.end(); I
!= E
; ++I
) {
1883 if (I
->isCtrl()) continue;
1884 SUnit
*SuccSU
= I
->getSUnit();
1887 // Be conservative. Ignore if nodes aren't at roughly the same
1888 // depth and height.
1889 if (SuccSU
->getHeight() < SU
->getHeight() &&
1890 (SU
->getHeight() - SuccSU
->getHeight()) > 1)
1892 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1893 // constrains whatever is using the copy, instead of the copy
1894 // itself. In the case that the copy is coalesced, this
1895 // preserves the intent of the pseudo two-address heurietics.
1896 while (SuccSU
->Succs
.size() == 1 &&
1897 SuccSU
->getNode()->isMachineOpcode() &&
1898 SuccSU
->getNode()->getMachineOpcode() ==
1899 TargetOpcode::COPY_TO_REGCLASS
)
1900 SuccSU
= SuccSU
->Succs
.front().getSUnit();
1901 // Don't constrain non-instruction nodes.
1902 if (!SuccSU
->getNode() || !SuccSU
->getNode()->isMachineOpcode())
1904 // Don't constrain nodes with physical register defs if the
1905 // predecessor can clobber them.
1906 if (SuccSU
->hasPhysRegDefs
&& SU
->hasPhysRegClobbers
) {
1907 if (canClobberPhysRegDefs(SuccSU
, SU
, TII
, TRI
))
1910 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1911 // these may be coalesced away. We want them close to their uses.
1912 unsigned SuccOpc
= SuccSU
->getNode()->getMachineOpcode();
1913 if (SuccOpc
== TargetOpcode::EXTRACT_SUBREG
||
1914 SuccOpc
== TargetOpcode::INSERT_SUBREG
||
1915 SuccOpc
== TargetOpcode::SUBREG_TO_REG
)
1917 if ((!canClobber(SuccSU
, DUSU
) ||
1918 (isLiveOut
&& !hasOnlyLiveOutUses(SuccSU
)) ||
1919 (!SU
->isCommutable
&& SuccSU
->isCommutable
)) &&
1920 !scheduleDAG
->IsReachable(SuccSU
, SU
)) {
1921 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
1922 << SU
->NodeNum
<< " to SU #" << SuccSU
->NodeNum
<< "\n");
1923 scheduleDAG
->AddPred(SU
, SDep(SuccSU
, SDep::Order
, /*Latency=*/0,
1924 /*Reg=*/0, /*isNormalMemory=*/false,
1925 /*isMustAlias=*/false,
1926 /*isArtificial=*/true));
1933 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1934 /// scheduling units.
1936 void RegReductionPriorityQueue
<SF
>::CalculateSethiUllmanNumbers() {
1937 SethiUllmanNumbers
.assign(SUnits
->size(), 0);
1939 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
)
1940 CalcNodeSethiUllmanNumber(&(*SUnits
)[i
], SethiUllmanNumbers
);
1943 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1944 /// predecessors of the successors of the SUnit SU. Stop when the provided
1945 /// limit is exceeded.
1946 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit
*SU
,
1949 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
1951 const SUnit
*SuccSU
= I
->getSUnit();
1952 for (SUnit::const_pred_iterator II
= SuccSU
->Preds
.begin(),
1953 EE
= SuccSU
->Preds
.end(); II
!= EE
; ++II
) {
1954 SUnit
*PredSU
= II
->getSUnit();
1955 if (!PredSU
->isScheduled
)
1965 bool td_ls_rr_sort::operator()(const SUnit
*left
, const SUnit
*right
) const {
1966 unsigned LPriority
= SPQ
->getNodePriority(left
);
1967 unsigned RPriority
= SPQ
->getNodePriority(right
);
1968 bool LIsTarget
= left
->getNode() && left
->getNode()->isMachineOpcode();
1969 bool RIsTarget
= right
->getNode() && right
->getNode()->isMachineOpcode();
1970 bool LIsFloater
= LIsTarget
&& left
->NumPreds
== 0;
1971 bool RIsFloater
= RIsTarget
&& right
->NumPreds
== 0;
1972 unsigned LBonus
= (LimitedSumOfUnscheduledPredsOfSuccs(left
,1) == 1) ? 2 : 0;
1973 unsigned RBonus
= (LimitedSumOfUnscheduledPredsOfSuccs(right
,1) == 1) ? 2 : 0;
1975 if (left
->NumSuccs
== 0 && right
->NumSuccs
!= 0)
1977 else if (left
->NumSuccs
!= 0 && right
->NumSuccs
== 0)
1984 if (left
->NumSuccs
== 1)
1986 if (right
->NumSuccs
== 1)
1989 if (LPriority
+LBonus
!= RPriority
+RBonus
)
1990 return LPriority
+LBonus
< RPriority
+RBonus
;
1992 if (left
->getDepth() != right
->getDepth())
1993 return left
->getDepth() < right
->getDepth();
1995 if (left
->NumSuccsLeft
!= right
->NumSuccsLeft
)
1996 return left
->NumSuccsLeft
> right
->NumSuccsLeft
;
1998 assert(left
->NodeQueueId
&& right
->NodeQueueId
&&
1999 "NodeQueueId cannot be zero");
2000 return (left
->NodeQueueId
> right
->NodeQueueId
);
2003 //===----------------------------------------------------------------------===//
2004 // Public Constructor Functions
2005 //===----------------------------------------------------------------------===//
2007 llvm::ScheduleDAGSDNodes
*
2008 llvm::createBURRListDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
2009 const TargetMachine
&TM
= IS
->TM
;
2010 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2011 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2013 BURegReductionPriorityQueue
*PQ
=
2014 new BURegReductionPriorityQueue(*IS
->MF
, false, TII
, TRI
, 0);
2015 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, true, false, PQ
);
2016 PQ
->setScheduleDAG(SD
);
2020 llvm::ScheduleDAGSDNodes
*
2021 llvm::createTDRRListDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
2022 const TargetMachine
&TM
= IS
->TM
;
2023 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2024 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2026 TDRegReductionPriorityQueue
*PQ
=
2027 new TDRegReductionPriorityQueue(*IS
->MF
, false, TII
, TRI
, 0);
2028 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, false, false, PQ
);
2029 PQ
->setScheduleDAG(SD
);
2033 llvm::ScheduleDAGSDNodes
*
2034 llvm::createSourceListDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
2035 const TargetMachine
&TM
= IS
->TM
;
2036 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2037 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2039 SrcRegReductionPriorityQueue
*PQ
=
2040 new SrcRegReductionPriorityQueue(*IS
->MF
, false, TII
, TRI
, 0);
2041 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, true, false, PQ
);
2042 PQ
->setScheduleDAG(SD
);
2046 llvm::ScheduleDAGSDNodes
*
2047 llvm::createHybridListDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
2048 const TargetMachine
&TM
= IS
->TM
;
2049 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2050 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2051 const TargetLowering
*TLI
= &IS
->getTargetLowering();
2053 HybridBURRPriorityQueue
*PQ
=
2054 new HybridBURRPriorityQueue(*IS
->MF
, true, TII
, TRI
, TLI
);
2055 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, true, true, PQ
);
2056 PQ
->setScheduleDAG(SD
);
2060 llvm::ScheduleDAGSDNodes
*
2061 llvm::createILPListDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
2062 const TargetMachine
&TM
= IS
->TM
;
2063 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
2064 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
2065 const TargetLowering
*TLI
= &IS
->getTargetLowering();
2067 ILPBURRPriorityQueue
*PQ
=
2068 new ILPBURRPriorityQueue(*IS
->MF
, true, TII
, TRI
, TLI
);
2069 ScheduleDAGRRList
*SD
= new ScheduleDAGRRList(*IS
->MF
, true, true, PQ
);
2070 PQ
->setScheduleDAG(SD
);