1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/CodeGen/SchedulerRegistry.h"
21 #include "llvm/CodeGen/SelectionDAGISel.h"
22 #include "llvm/Target/TargetRegisterInfo.h"
23 #include "llvm/Target/TargetData.h"
24 #include "llvm/Target/TargetMachine.h"
25 #include "llvm/Target/TargetInstrInfo.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/Support/raw_ostream.h"
37 STATISTIC(NumBacktracks
, "Number of times scheduler backtracked");
38 STATISTIC(NumUnfolds
, "Number of nodes unfolded");
39 STATISTIC(NumDups
, "Number of duplicated nodes");
40 STATISTIC(NumPRCopies
, "Number of physical register copies");
42 static RegisterScheduler
43 burrListDAGScheduler("list-burr",
44 "Bottom-up register reduction list scheduling",
45 createBURRListDAGScheduler
);
46 static RegisterScheduler
47 tdrListrDAGScheduler("list-tdrr",
48 "Top-down register reduction list scheduling",
49 createTDRRListDAGScheduler
);
52 //===----------------------------------------------------------------------===//
53 /// ScheduleDAGRRList - The actual register reduction list scheduler
54 /// implementation. This supports both top-down and bottom-up scheduling.
56 class VISIBILITY_HIDDEN ScheduleDAGRRList
: public ScheduleDAGSDNodes
{
58 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
62 /// AvailableQueue - The priority queue to use for the available SUnits.
63 SchedulingPriorityQueue
*AvailableQueue
;
65 /// LiveRegDefs - A set of physical registers and their definition
66 /// that are "live". These nodes must be scheduled before any other nodes that
67 /// modifies the registers can be scheduled.
69 std::vector
<SUnit
*> LiveRegDefs
;
70 std::vector
<unsigned> LiveRegCycles
;
72 /// Topo - A topological ordering for SUnits which permits fast IsReachable
73 /// and similar queries.
74 ScheduleDAGTopologicalSort Topo
;
77 ScheduleDAGRRList(MachineFunction
&mf
,
79 SchedulingPriorityQueue
*availqueue
)
80 : ScheduleDAGSDNodes(mf
), isBottomUp(isbottomup
),
81 AvailableQueue(availqueue
), Topo(SUnits
) {
84 ~ScheduleDAGRRList() {
85 delete AvailableQueue
;
90 /// IsReachable - Checks if SU is reachable from TargetSU.
91 bool IsReachable(const SUnit
*SU
, const SUnit
*TargetSU
) {
92 return Topo
.IsReachable(SU
, TargetSU
);
95 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
97 bool WillCreateCycle(SUnit
*SU
, SUnit
*TargetSU
) {
98 return Topo
.WillCreateCycle(SU
, TargetSU
);
101 /// AddPred - adds a predecessor edge to SUnit SU.
102 /// This returns true if this is a new predecessor.
103 /// Updates the topological ordering if required.
104 void AddPred(SUnit
*SU
, const SDep
&D
) {
105 Topo
.AddPred(SU
, D
.getSUnit());
109 /// RemovePred - removes a predecessor edge from SUnit SU.
110 /// This returns true if an edge was removed.
111 /// Updates the topological ordering if required.
112 void RemovePred(SUnit
*SU
, const SDep
&D
) {
113 Topo
.RemovePred(SU
, D
.getSUnit());
118 void ReleasePred(SUnit
*SU
, const SDep
*PredEdge
);
119 void ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
);
120 void ReleaseSucc(SUnit
*SU
, const SDep
*SuccEdge
);
121 void ReleaseSuccessors(SUnit
*SU
);
122 void CapturePred(SDep
*PredEdge
);
123 void ScheduleNodeBottomUp(SUnit
*, unsigned);
124 void ScheduleNodeTopDown(SUnit
*, unsigned);
125 void UnscheduleNodeBottomUp(SUnit
*);
126 void BacktrackBottomUp(SUnit
*, unsigned, unsigned&);
127 SUnit
*CopyAndMoveSuccessors(SUnit
*);
128 void InsertCopiesAndMoveSuccs(SUnit
*, unsigned,
129 const TargetRegisterClass
*,
130 const TargetRegisterClass
*,
131 SmallVector
<SUnit
*, 2>&);
132 bool DelayForLiveRegsBottomUp(SUnit
*, SmallVector
<unsigned, 4>&);
133 void ListScheduleTopDown();
134 void ListScheduleBottomUp();
137 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
138 /// Updates the topological ordering if required.
139 SUnit
*CreateNewSUnit(SDNode
*N
) {
140 unsigned NumSUnits
= SUnits
.size();
141 SUnit
*NewNode
= NewSUnit(N
);
142 // Update the topological ordering.
143 if (NewNode
->NodeNum
>= NumSUnits
)
144 Topo
.InitDAGTopologicalSorting();
148 /// CreateClone - Creates a new SUnit from an existing one.
149 /// Updates the topological ordering if required.
150 SUnit
*CreateClone(SUnit
*N
) {
151 unsigned NumSUnits
= SUnits
.size();
152 SUnit
*NewNode
= Clone(N
);
153 // Update the topological ordering.
154 if (NewNode
->NodeNum
>= NumSUnits
)
155 Topo
.InitDAGTopologicalSorting();
159 /// ForceUnitLatencies - Return true, since register-pressure-reducing
160 /// scheduling doesn't need actual latency information.
161 bool ForceUnitLatencies() const { return true; }
163 } // end anonymous namespace
166 /// Schedule - Schedule the DAG using list scheduling.
167 void ScheduleDAGRRList::Schedule() {
168 DEBUG(errs() << "********** List Scheduling **********\n");
171 LiveRegDefs
.resize(TRI
->getNumRegs(), NULL
);
172 LiveRegCycles
.resize(TRI
->getNumRegs(), 0);
174 // Build the scheduling graph.
177 DEBUG(for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
)
178 SUnits
[su
].dumpAll(this));
179 Topo
.InitDAGTopologicalSorting();
181 AvailableQueue
->initNodes(SUnits
);
183 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
185 ListScheduleBottomUp();
187 ListScheduleTopDown();
189 AvailableQueue
->releaseState();
192 //===----------------------------------------------------------------------===//
193 // Bottom-Up Scheduling
194 //===----------------------------------------------------------------------===//
196 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
197 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
198 void ScheduleDAGRRList::ReleasePred(SUnit
*SU
, const SDep
*PredEdge
) {
199 SUnit
*PredSU
= PredEdge
->getSUnit();
200 --PredSU
->NumSuccsLeft
;
203 if (PredSU
->NumSuccsLeft
< 0) {
204 errs() << "*** Scheduling failed! ***\n";
206 errs() << " has been released too many times!\n";
211 // If all the node's successors are scheduled, this node is ready
212 // to be scheduled. Ignore the special EntrySU node.
213 if (PredSU
->NumSuccsLeft
== 0 && PredSU
!= &EntrySU
) {
214 PredSU
->isAvailable
= true;
215 AvailableQueue
->push(PredSU
);
219 void ScheduleDAGRRList::ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
) {
220 // Bottom up: release predecessors
221 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
223 ReleasePred(SU
, &*I
);
224 if (I
->isAssignedRegDep()) {
225 // This is a physical register dependency and it's impossible or
226 // expensive to copy the register. Make sure nothing that can
227 // clobber the register is scheduled between the predecessor and
229 if (!LiveRegDefs
[I
->getReg()]) {
231 LiveRegDefs
[I
->getReg()] = I
->getSUnit();
232 LiveRegCycles
[I
->getReg()] = CurCycle
;
238 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
239 /// count of its predecessors. If a predecessor pending count is zero, add it to
240 /// the Available queue.
241 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit
*SU
, unsigned CurCycle
) {
242 DEBUG(errs() << "*** Scheduling [" << CurCycle
<< "]: ");
243 DEBUG(SU
->dump(this));
245 assert(CurCycle
>= SU
->getHeight() && "Node scheduled below its height!");
246 SU
->setHeightToAtLeast(CurCycle
);
247 Sequence
.push_back(SU
);
249 ReleasePredecessors(SU
, CurCycle
);
251 // Release all the implicit physical register defs that are live.
252 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
254 if (I
->isAssignedRegDep()) {
255 if (LiveRegCycles
[I
->getReg()] == I
->getSUnit()->getHeight()) {
256 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
257 assert(LiveRegDefs
[I
->getReg()] == SU
&&
258 "Physical register dependency violated?");
260 LiveRegDefs
[I
->getReg()] = NULL
;
261 LiveRegCycles
[I
->getReg()] = 0;
266 SU
->isScheduled
= true;
267 AvailableQueue
->ScheduledNode(SU
);
270 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
271 /// unscheduled, incrcease the succ left count of its predecessors. Remove
272 /// them from AvailableQueue if necessary.
273 void ScheduleDAGRRList::CapturePred(SDep
*PredEdge
) {
274 SUnit
*PredSU
= PredEdge
->getSUnit();
275 if (PredSU
->isAvailable
) {
276 PredSU
->isAvailable
= false;
277 if (!PredSU
->isPending
)
278 AvailableQueue
->remove(PredSU
);
281 ++PredSU
->NumSuccsLeft
;
284 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
285 /// its predecessor states to reflect the change.
286 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit
*SU
) {
287 DEBUG(errs() << "*** Unscheduling [" << SU
->getHeight() << "]: ");
288 DEBUG(SU
->dump(this));
290 AvailableQueue
->UnscheduledNode(SU
);
292 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
295 if (I
->isAssignedRegDep() && SU
->getHeight() == LiveRegCycles
[I
->getReg()]) {
296 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
297 assert(LiveRegDefs
[I
->getReg()] == I
->getSUnit() &&
298 "Physical register dependency violated?");
300 LiveRegDefs
[I
->getReg()] = NULL
;
301 LiveRegCycles
[I
->getReg()] = 0;
305 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
307 if (I
->isAssignedRegDep()) {
308 if (!LiveRegDefs
[I
->getReg()]) {
309 LiveRegDefs
[I
->getReg()] = SU
;
312 if (I
->getSUnit()->getHeight() < LiveRegCycles
[I
->getReg()])
313 LiveRegCycles
[I
->getReg()] = I
->getSUnit()->getHeight();
317 SU
->setHeightDirty();
318 SU
->isScheduled
= false;
319 SU
->isAvailable
= true;
320 AvailableQueue
->push(SU
);
323 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
324 /// BTCycle in order to schedule a specific node.
325 void ScheduleDAGRRList::BacktrackBottomUp(SUnit
*SU
, unsigned BtCycle
,
326 unsigned &CurCycle
) {
328 while (CurCycle
> BtCycle
) {
329 OldSU
= Sequence
.back();
331 if (SU
->isSucc(OldSU
))
332 // Don't try to remove SU from AvailableQueue.
333 SU
->isAvailable
= false;
334 UnscheduleNodeBottomUp(OldSU
);
338 assert(!SU
->isSucc(OldSU
) && "Something is wrong!");
343 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
344 /// successors to the newly created node.
345 SUnit
*ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit
*SU
) {
346 if (SU
->getNode()->getFlaggedNode())
349 SDNode
*N
= SU
->getNode();
354 bool TryUnfold
= false;
355 for (unsigned i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
356 EVT VT
= N
->getValueType(i
);
359 else if (VT
== MVT::Other
)
362 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
363 const SDValue
&Op
= N
->getOperand(i
);
364 EVT VT
= Op
.getNode()->getValueType(Op
.getResNo());
370 SmallVector
<SDNode
*, 2> NewNodes
;
371 if (!TII
->unfoldMemoryOperand(*DAG
, N
, NewNodes
))
374 DEBUG(errs() << "Unfolding SU # " << SU
->NodeNum
<< "\n");
375 assert(NewNodes
.size() == 2 && "Expected a load folding node!");
378 SDNode
*LoadNode
= NewNodes
[0];
379 unsigned NumVals
= N
->getNumValues();
380 unsigned OldNumVals
= SU
->getNode()->getNumValues();
381 for (unsigned i
= 0; i
!= NumVals
; ++i
)
382 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), i
), SDValue(N
, i
));
383 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), OldNumVals
-1),
384 SDValue(LoadNode
, 1));
386 // LoadNode may already exist. This can happen when there is another
387 // load from the same location and producing the same type of value
388 // but it has different alignment or volatileness.
389 bool isNewLoad
= true;
391 if (LoadNode
->getNodeId() != -1) {
392 LoadSU
= &SUnits
[LoadNode
->getNodeId()];
395 LoadSU
= CreateNewSUnit(LoadNode
);
396 LoadNode
->setNodeId(LoadSU
->NodeNum
);
397 ComputeLatency(LoadSU
);
400 SUnit
*NewSU
= CreateNewSUnit(N
);
401 assert(N
->getNodeId() == -1 && "Node already inserted!");
402 N
->setNodeId(NewSU
->NodeNum
);
404 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
405 for (unsigned i
= 0; i
!= TID
.getNumOperands(); ++i
) {
406 if (TID
.getOperandConstraint(i
, TOI::TIED_TO
) != -1) {
407 NewSU
->isTwoAddress
= true;
411 if (TID
.isCommutable())
412 NewSU
->isCommutable
= true;
413 ComputeLatency(NewSU
);
415 // Record all the edges to and from the old SU, by category.
416 SmallVector
<SDep
, 4> ChainPreds
;
417 SmallVector
<SDep
, 4> ChainSuccs
;
418 SmallVector
<SDep
, 4> LoadPreds
;
419 SmallVector
<SDep
, 4> NodePreds
;
420 SmallVector
<SDep
, 4> NodeSuccs
;
421 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
424 ChainPreds
.push_back(*I
);
425 else if (I
->getSUnit()->getNode() &&
426 I
->getSUnit()->getNode()->isOperandOf(LoadNode
))
427 LoadPreds
.push_back(*I
);
429 NodePreds
.push_back(*I
);
431 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
434 ChainSuccs
.push_back(*I
);
436 NodeSuccs
.push_back(*I
);
439 // Now assign edges to the newly-created nodes.
440 for (unsigned i
= 0, e
= ChainPreds
.size(); i
!= e
; ++i
) {
441 const SDep
&Pred
= ChainPreds
[i
];
442 RemovePred(SU
, Pred
);
444 AddPred(LoadSU
, Pred
);
446 for (unsigned i
= 0, e
= LoadPreds
.size(); i
!= e
; ++i
) {
447 const SDep
&Pred
= LoadPreds
[i
];
448 RemovePred(SU
, Pred
);
450 AddPred(LoadSU
, Pred
);
452 for (unsigned i
= 0, e
= NodePreds
.size(); i
!= e
; ++i
) {
453 const SDep
&Pred
= NodePreds
[i
];
454 RemovePred(SU
, Pred
);
455 AddPred(NewSU
, Pred
);
457 for (unsigned i
= 0, e
= NodeSuccs
.size(); i
!= e
; ++i
) {
458 SDep D
= NodeSuccs
[i
];
459 SUnit
*SuccDep
= D
.getSUnit();
461 RemovePred(SuccDep
, D
);
465 for (unsigned i
= 0, e
= ChainSuccs
.size(); i
!= e
; ++i
) {
466 SDep D
= ChainSuccs
[i
];
467 SUnit
*SuccDep
= D
.getSUnit();
469 RemovePred(SuccDep
, D
);
476 // Add a data dependency to reflect that NewSU reads the value defined
478 AddPred(NewSU
, SDep(LoadSU
, SDep::Data
, LoadSU
->Latency
));
481 AvailableQueue
->addNode(LoadSU
);
482 AvailableQueue
->addNode(NewSU
);
486 if (NewSU
->NumSuccsLeft
== 0) {
487 NewSU
->isAvailable
= true;
493 DEBUG(errs() << "Duplicating SU # " << SU
->NodeNum
<< "\n");
494 NewSU
= CreateClone(SU
);
496 // New SUnit has the exact same predecessors.
497 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
499 if (!I
->isArtificial())
502 // Only copy scheduled successors. Cut them from old node's successor
503 // list and move them over.
504 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
505 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
507 if (I
->isArtificial())
509 SUnit
*SuccSU
= I
->getSUnit();
510 if (SuccSU
->isScheduled
) {
515 DelDeps
.push_back(std::make_pair(SuccSU
, D
));
518 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
519 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
521 AvailableQueue
->updateNode(SU
);
522 AvailableQueue
->addNode(NewSU
);
528 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
529 /// scheduled successors of the given SUnit to the last copy.
530 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit
*SU
, unsigned Reg
,
531 const TargetRegisterClass
*DestRC
,
532 const TargetRegisterClass
*SrcRC
,
533 SmallVector
<SUnit
*, 2> &Copies
) {
534 SUnit
*CopyFromSU
= CreateNewSUnit(NULL
);
535 CopyFromSU
->CopySrcRC
= SrcRC
;
536 CopyFromSU
->CopyDstRC
= DestRC
;
538 SUnit
*CopyToSU
= CreateNewSUnit(NULL
);
539 CopyToSU
->CopySrcRC
= DestRC
;
540 CopyToSU
->CopyDstRC
= SrcRC
;
542 // Only copy scheduled successors. Cut them from old node's successor
543 // list and move them over.
544 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
545 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
547 if (I
->isArtificial())
549 SUnit
*SuccSU
= I
->getSUnit();
550 if (SuccSU
->isScheduled
) {
552 D
.setSUnit(CopyToSU
);
554 DelDeps
.push_back(std::make_pair(SuccSU
, *I
));
557 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
558 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
560 AddPred(CopyFromSU
, SDep(SU
, SDep::Data
, SU
->Latency
, Reg
));
561 AddPred(CopyToSU
, SDep(CopyFromSU
, SDep::Data
, CopyFromSU
->Latency
, 0));
563 AvailableQueue
->updateNode(SU
);
564 AvailableQueue
->addNode(CopyFromSU
);
565 AvailableQueue
->addNode(CopyToSU
);
566 Copies
.push_back(CopyFromSU
);
567 Copies
.push_back(CopyToSU
);
572 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
573 /// definition of the specified node.
574 /// FIXME: Move to SelectionDAG?
575 static EVT
getPhysicalRegisterVT(SDNode
*N
, unsigned Reg
,
576 const TargetInstrInfo
*TII
) {
577 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
578 assert(TID
.ImplicitDefs
&& "Physical reg def must be in implicit def list!");
579 unsigned NumRes
= TID
.getNumDefs();
580 for (const unsigned *ImpDef
= TID
.getImplicitDefs(); *ImpDef
; ++ImpDef
) {
585 return N
->getValueType(NumRes
);
588 /// CheckForLiveRegDef - Return true and update live register vector if the
589 /// specified register def of the specified SUnit clobbers any "live" registers.
590 static bool CheckForLiveRegDef(SUnit
*SU
, unsigned Reg
,
591 std::vector
<SUnit
*> &LiveRegDefs
,
592 SmallSet
<unsigned, 4> &RegAdded
,
593 SmallVector
<unsigned, 4> &LRegs
,
594 const TargetRegisterInfo
*TRI
) {
596 if (LiveRegDefs
[Reg
] && LiveRegDefs
[Reg
] != SU
) {
597 if (RegAdded
.insert(Reg
)) {
598 LRegs
.push_back(Reg
);
602 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
)
603 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != SU
) {
604 if (RegAdded
.insert(*Alias
)) {
605 LRegs
.push_back(*Alias
);
612 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
613 /// scheduling of the given node to satisfy live physical register dependencies.
614 /// If the specific node is the last one that's available to schedule, do
615 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
616 bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit
*SU
,
617 SmallVector
<unsigned, 4> &LRegs
){
618 if (NumLiveRegs
== 0)
621 SmallSet
<unsigned, 4> RegAdded
;
622 // If this node would clobber any "live" register, then it's not ready.
623 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
625 if (I
->isAssignedRegDep())
626 CheckForLiveRegDef(I
->getSUnit(), I
->getReg(), LiveRegDefs
,
627 RegAdded
, LRegs
, TRI
);
630 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getFlaggedNode()) {
631 if (Node
->getOpcode() == ISD::INLINEASM
) {
632 // Inline asm can clobber physical defs.
633 unsigned NumOps
= Node
->getNumOperands();
634 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Flag
)
635 --NumOps
; // Ignore the flag operand.
637 for (unsigned i
= 2; i
!= NumOps
;) {
639 cast
<ConstantSDNode
>(Node
->getOperand(i
))->getZExtValue();
640 unsigned NumVals
= (Flags
& 0xffff) >> 3;
642 ++i
; // Skip the ID value.
643 if ((Flags
& 7) == 2 || (Flags
& 7) == 6) {
644 // Check for def of register or earlyclobber register.
645 for (; NumVals
; --NumVals
, ++i
) {
646 unsigned Reg
= cast
<RegisterSDNode
>(Node
->getOperand(i
))->getReg();
647 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
648 CheckForLiveRegDef(SU
, Reg
, LiveRegDefs
, RegAdded
, LRegs
, TRI
);
656 if (!Node
->isMachineOpcode())
658 const TargetInstrDesc
&TID
= TII
->get(Node
->getMachineOpcode());
659 if (!TID
.ImplicitDefs
)
661 for (const unsigned *Reg
= TID
.ImplicitDefs
; *Reg
; ++Reg
)
662 CheckForLiveRegDef(SU
, *Reg
, LiveRegDefs
, RegAdded
, LRegs
, TRI
);
664 return !LRegs
.empty();
668 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
670 void ScheduleDAGRRList::ListScheduleBottomUp() {
671 unsigned CurCycle
= 0;
673 // Release any predecessors of the special Exit node.
674 ReleasePredecessors(&ExitSU
, CurCycle
);
676 // Add root to Available queue.
677 if (!SUnits
.empty()) {
678 SUnit
*RootSU
= &SUnits
[DAG
->getRoot().getNode()->getNodeId()];
679 assert(RootSU
->Succs
.empty() && "Graph root shouldn't have successors!");
680 RootSU
->isAvailable
= true;
681 AvailableQueue
->push(RootSU
);
684 // While Available queue is not empty, grab the node with the highest
685 // priority. If it is not ready put it back. Schedule the node.
686 SmallVector
<SUnit
*, 4> NotReady
;
687 DenseMap
<SUnit
*, SmallVector
<unsigned, 4> > LRegsMap
;
688 Sequence
.reserve(SUnits
.size());
689 while (!AvailableQueue
->empty()) {
690 bool Delayed
= false;
692 SUnit
*CurSU
= AvailableQueue
->pop();
694 SmallVector
<unsigned, 4> LRegs
;
695 if (!DelayForLiveRegsBottomUp(CurSU
, LRegs
))
698 LRegsMap
.insert(std::make_pair(CurSU
, LRegs
));
700 CurSU
->isPending
= true; // This SU is not in AvailableQueue right now.
701 NotReady
.push_back(CurSU
);
702 CurSU
= AvailableQueue
->pop();
705 // All candidates are delayed due to live physical reg dependencies.
706 // Try backtracking, code duplication, or inserting cross class copies
708 if (Delayed
&& !CurSU
) {
709 for (unsigned i
= 0, e
= NotReady
.size(); i
!= e
; ++i
) {
710 SUnit
*TrySU
= NotReady
[i
];
711 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
713 // Try unscheduling up to the point where it's safe to schedule
715 unsigned LiveCycle
= CurCycle
;
716 for (unsigned j
= 0, ee
= LRegs
.size(); j
!= ee
; ++j
) {
717 unsigned Reg
= LRegs
[j
];
718 unsigned LCycle
= LiveRegCycles
[Reg
];
719 LiveCycle
= std::min(LiveCycle
, LCycle
);
721 SUnit
*OldSU
= Sequence
[LiveCycle
];
722 if (!WillCreateCycle(TrySU
, OldSU
)) {
723 BacktrackBottomUp(TrySU
, LiveCycle
, CurCycle
);
724 // Force the current node to be scheduled before the node that
725 // requires the physical reg dep.
726 if (OldSU
->isAvailable
) {
727 OldSU
->isAvailable
= false;
728 AvailableQueue
->remove(OldSU
);
730 AddPred(TrySU
, SDep(OldSU
, SDep::Order
, /*Latency=*/1,
731 /*Reg=*/0, /*isNormalMemory=*/false,
732 /*isMustAlias=*/false, /*isArtificial=*/true));
733 // If one or more successors has been unscheduled, then the current
734 // node is no longer avaialable. Schedule a successor that's now
735 // available instead.
736 if (!TrySU
->isAvailable
)
737 CurSU
= AvailableQueue
->pop();
740 TrySU
->isPending
= false;
741 NotReady
.erase(NotReady
.begin()+i
);
748 // Can't backtrack. If it's too expensive to copy the value, then try
749 // duplicate the nodes that produces these "too expensive to copy"
750 // values to break the dependency. In case even that doesn't work,
751 // insert cross class copies.
752 // If it's not too expensive, i.e. cost != -1, issue copies.
753 SUnit
*TrySU
= NotReady
[0];
754 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
755 assert(LRegs
.size() == 1 && "Can't handle this yet!");
756 unsigned Reg
= LRegs
[0];
757 SUnit
*LRDef
= LiveRegDefs
[Reg
];
758 EVT VT
= getPhysicalRegisterVT(LRDef
->getNode(), Reg
, TII
);
759 const TargetRegisterClass
*RC
=
760 TRI
->getPhysicalRegisterRegClass(Reg
, VT
);
761 const TargetRegisterClass
*DestRC
= TRI
->getCrossCopyRegClass(RC
);
763 // If cross copy register class is null, then it must be possible copy
764 // the value directly. Do not try duplicate the def.
767 NewDef
= CopyAndMoveSuccessors(LRDef
);
771 // Issue copies, these can be expensive cross register class copies.
772 SmallVector
<SUnit
*, 2> Copies
;
773 InsertCopiesAndMoveSuccs(LRDef
, Reg
, DestRC
, RC
, Copies
);
774 DEBUG(errs() << "Adding an edge from SU #" << TrySU
->NodeNum
775 << " to SU #" << Copies
.front()->NodeNum
<< "\n");
776 AddPred(TrySU
, SDep(Copies
.front(), SDep::Order
, /*Latency=*/1,
777 /*Reg=*/0, /*isNormalMemory=*/false,
778 /*isMustAlias=*/false,
779 /*isArtificial=*/true));
780 NewDef
= Copies
.back();
783 DEBUG(errs() << "Adding an edge from SU #" << NewDef
->NodeNum
784 << " to SU #" << TrySU
->NodeNum
<< "\n");
785 LiveRegDefs
[Reg
] = NewDef
;
786 AddPred(NewDef
, SDep(TrySU
, SDep::Order
, /*Latency=*/1,
787 /*Reg=*/0, /*isNormalMemory=*/false,
788 /*isMustAlias=*/false,
789 /*isArtificial=*/true));
790 TrySU
->isAvailable
= false;
794 assert(CurSU
&& "Unable to resolve live physical register dependencies!");
797 // Add the nodes that aren't ready back onto the available list.
798 for (unsigned i
= 0, e
= NotReady
.size(); i
!= e
; ++i
) {
799 NotReady
[i
]->isPending
= false;
800 // May no longer be available due to backtracking.
801 if (NotReady
[i
]->isAvailable
)
802 AvailableQueue
->push(NotReady
[i
]);
807 ScheduleNodeBottomUp(CurSU
, CurCycle
);
811 // Reverse the order if it is bottom up.
812 std::reverse(Sequence
.begin(), Sequence
.end());
815 VerifySchedule(isBottomUp
);
819 //===----------------------------------------------------------------------===//
820 // Top-Down Scheduling
821 //===----------------------------------------------------------------------===//
823 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
824 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
825 void ScheduleDAGRRList::ReleaseSucc(SUnit
*SU
, const SDep
*SuccEdge
) {
826 SUnit
*SuccSU
= SuccEdge
->getSUnit();
827 --SuccSU
->NumPredsLeft
;
830 if (SuccSU
->NumPredsLeft
< 0) {
831 errs() << "*** Scheduling failed! ***\n";
833 errs() << " has been released too many times!\n";
838 // If all the node's predecessors are scheduled, this node is ready
839 // to be scheduled. Ignore the special ExitSU node.
840 if (SuccSU
->NumPredsLeft
== 0 && SuccSU
!= &ExitSU
) {
841 SuccSU
->isAvailable
= true;
842 AvailableQueue
->push(SuccSU
);
846 void ScheduleDAGRRList::ReleaseSuccessors(SUnit
*SU
) {
847 // Top down: release successors
848 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
850 assert(!I
->isAssignedRegDep() &&
851 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
853 ReleaseSucc(SU
, &*I
);
857 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
858 /// count of its successors. If a successor pending count is zero, add it to
859 /// the Available queue.
860 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit
*SU
, unsigned CurCycle
) {
861 DEBUG(errs() << "*** Scheduling [" << CurCycle
<< "]: ");
862 DEBUG(SU
->dump(this));
864 assert(CurCycle
>= SU
->getDepth() && "Node scheduled above its depth!");
865 SU
->setDepthToAtLeast(CurCycle
);
866 Sequence
.push_back(SU
);
868 ReleaseSuccessors(SU
);
869 SU
->isScheduled
= true;
870 AvailableQueue
->ScheduledNode(SU
);
873 /// ListScheduleTopDown - The main loop of list scheduling for top-down
875 void ScheduleDAGRRList::ListScheduleTopDown() {
876 unsigned CurCycle
= 0;
878 // Release any successors of the special Entry node.
879 ReleaseSuccessors(&EntrySU
);
881 // All leaves to Available queue.
882 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
883 // It is available if it has no predecessors.
884 if (SUnits
[i
].Preds
.empty()) {
885 AvailableQueue
->push(&SUnits
[i
]);
886 SUnits
[i
].isAvailable
= true;
890 // While Available queue is not empty, grab the node with the highest
891 // priority. If it is not ready put it back. Schedule the node.
892 Sequence
.reserve(SUnits
.size());
893 while (!AvailableQueue
->empty()) {
894 SUnit
*CurSU
= AvailableQueue
->pop();
897 ScheduleNodeTopDown(CurSU
, CurCycle
);
902 VerifySchedule(isBottomUp
);
907 //===----------------------------------------------------------------------===//
908 // RegReductionPriorityQueue Implementation
909 //===----------------------------------------------------------------------===//
911 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
912 // to reduce register pressure.
916 class RegReductionPriorityQueue
;
918 /// Sorting functions for the Available queue.
919 struct bu_ls_rr_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
920 RegReductionPriorityQueue
<bu_ls_rr_sort
> *SPQ
;
921 bu_ls_rr_sort(RegReductionPriorityQueue
<bu_ls_rr_sort
> *spq
) : SPQ(spq
) {}
922 bu_ls_rr_sort(const bu_ls_rr_sort
&RHS
) : SPQ(RHS
.SPQ
) {}
924 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
927 struct td_ls_rr_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
928 RegReductionPriorityQueue
<td_ls_rr_sort
> *SPQ
;
929 td_ls_rr_sort(RegReductionPriorityQueue
<td_ls_rr_sort
> *spq
) : SPQ(spq
) {}
930 td_ls_rr_sort(const td_ls_rr_sort
&RHS
) : SPQ(RHS
.SPQ
) {}
932 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
934 } // end anonymous namespace
936 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
937 /// Smaller number is the higher priority.
939 CalcNodeSethiUllmanNumber(const SUnit
*SU
, std::vector
<unsigned> &SUNumbers
) {
940 unsigned &SethiUllmanNumber
= SUNumbers
[SU
->NodeNum
];
941 if (SethiUllmanNumber
!= 0)
942 return SethiUllmanNumber
;
945 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
947 if (I
->isCtrl()) continue; // ignore chain preds
948 SUnit
*PredSU
= I
->getSUnit();
949 unsigned PredSethiUllman
= CalcNodeSethiUllmanNumber(PredSU
, SUNumbers
);
950 if (PredSethiUllman
> SethiUllmanNumber
) {
951 SethiUllmanNumber
= PredSethiUllman
;
953 } else if (PredSethiUllman
== SethiUllmanNumber
)
957 SethiUllmanNumber
+= Extra
;
959 if (SethiUllmanNumber
== 0)
960 SethiUllmanNumber
= 1;
962 return SethiUllmanNumber
;
967 class VISIBILITY_HIDDEN RegReductionPriorityQueue
968 : public SchedulingPriorityQueue
{
969 PriorityQueue
<SUnit
*, std::vector
<SUnit
*>, SF
> Queue
;
970 unsigned currentQueueId
;
973 // SUnits - The SUnits for the current graph.
974 std::vector
<SUnit
> *SUnits
;
976 const TargetInstrInfo
*TII
;
977 const TargetRegisterInfo
*TRI
;
978 ScheduleDAGRRList
*scheduleDAG
;
980 // SethiUllmanNumbers - The SethiUllman number for each node.
981 std::vector
<unsigned> SethiUllmanNumbers
;
984 RegReductionPriorityQueue(const TargetInstrInfo
*tii
,
985 const TargetRegisterInfo
*tri
) :
986 Queue(SF(this)), currentQueueId(0),
987 TII(tii
), TRI(tri
), scheduleDAG(NULL
) {}
989 void initNodes(std::vector
<SUnit
> &sunits
) {
991 // Add pseudo dependency edges for two-address nodes.
992 AddPseudoTwoAddrDeps();
993 // Reroute edges to nodes with multiple uses.
994 PrescheduleNodesWithMultipleUses();
995 // Calculate node priorities.
996 CalculateSethiUllmanNumbers();
999 void addNode(const SUnit
*SU
) {
1000 unsigned SUSize
= SethiUllmanNumbers
.size();
1001 if (SUnits
->size() > SUSize
)
1002 SethiUllmanNumbers
.resize(SUSize
*2, 0);
1003 CalcNodeSethiUllmanNumber(SU
, SethiUllmanNumbers
);
1006 void updateNode(const SUnit
*SU
) {
1007 SethiUllmanNumbers
[SU
->NodeNum
] = 0;
1008 CalcNodeSethiUllmanNumber(SU
, SethiUllmanNumbers
);
1011 void releaseState() {
1013 SethiUllmanNumbers
.clear();
1016 unsigned getNodePriority(const SUnit
*SU
) const {
1017 assert(SU
->NodeNum
< SethiUllmanNumbers
.size());
1018 unsigned Opc
= SU
->getNode() ? SU
->getNode()->getOpcode() : 0;
1019 if (Opc
== ISD::TokenFactor
|| Opc
== ISD::CopyToReg
)
1020 // CopyToReg should be close to its uses to facilitate coalescing and
1023 if (Opc
== TargetInstrInfo::EXTRACT_SUBREG
||
1024 Opc
== TargetInstrInfo::SUBREG_TO_REG
||
1025 Opc
== TargetInstrInfo::INSERT_SUBREG
)
1026 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1027 // close to their uses to facilitate coalescing.
1029 if (SU
->NumSuccs
== 0 && SU
->NumPreds
!= 0)
1030 // If SU does not have a register use, i.e. it doesn't produce a value
1031 // that would be consumed (e.g. store), then it terminates a chain of
1032 // computation. Give it a large SethiUllman number so it will be
1033 // scheduled right before its predecessors that it doesn't lengthen
1034 // their live ranges.
1036 if (SU
->NumPreds
== 0 && SU
->NumSuccs
!= 0)
1037 // If SU does not have a register def, schedule it close to its uses
1038 // because it does not lengthen any live ranges.
1040 return SethiUllmanNumbers
[SU
->NodeNum
];
1043 unsigned size() const { return Queue
.size(); }
1045 bool empty() const { return Queue
.empty(); }
1047 void push(SUnit
*U
) {
1048 assert(!U
->NodeQueueId
&& "Node in the queue already");
1049 U
->NodeQueueId
= ++currentQueueId
;
1053 void push_all(const std::vector
<SUnit
*> &Nodes
) {
1054 for (unsigned i
= 0, e
= Nodes
.size(); i
!= e
; ++i
)
1059 if (empty()) return NULL
;
1060 SUnit
*V
= Queue
.top();
1066 void remove(SUnit
*SU
) {
1067 assert(!Queue
.empty() && "Queue is empty!");
1068 assert(SU
->NodeQueueId
!= 0 && "Not in queue!");
1069 Queue
.erase_one(SU
);
1070 SU
->NodeQueueId
= 0;
1073 void setScheduleDAG(ScheduleDAGRRList
*scheduleDag
) {
1074 scheduleDAG
= scheduleDag
;
1078 bool canClobber(const SUnit
*SU
, const SUnit
*Op
);
1079 void AddPseudoTwoAddrDeps();
1080 void PrescheduleNodesWithMultipleUses();
1081 void CalculateSethiUllmanNumbers();
1084 typedef RegReductionPriorityQueue
<bu_ls_rr_sort
>
1085 BURegReductionPriorityQueue
;
1087 typedef RegReductionPriorityQueue
<td_ls_rr_sort
>
1088 TDRegReductionPriorityQueue
;
1091 /// closestSucc - Returns the scheduled cycle of the successor which is
1092 /// closest to the current cycle.
1093 static unsigned closestSucc(const SUnit
*SU
) {
1094 unsigned MaxHeight
= 0;
1095 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
1097 if (I
->isCtrl()) continue; // ignore chain succs
1098 unsigned Height
= I
->getSUnit()->getHeight();
1099 // If there are bunch of CopyToRegs stacked up, they should be considered
1100 // to be at the same position.
1101 if (I
->getSUnit()->getNode() &&
1102 I
->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg
)
1103 Height
= closestSucc(I
->getSUnit())+1;
1104 if (Height
> MaxHeight
)
1110 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1111 /// for scratch registers, i.e. number of data dependencies.
1112 static unsigned calcMaxScratches(const SUnit
*SU
) {
1113 unsigned Scratches
= 0;
1114 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
1116 if (I
->isCtrl()) continue; // ignore chain preds
1123 bool bu_ls_rr_sort::operator()(const SUnit
*left
, const SUnit
*right
) const {
1124 unsigned LPriority
= SPQ
->getNodePriority(left
);
1125 unsigned RPriority
= SPQ
->getNodePriority(right
);
1126 if (LPriority
!= RPriority
)
1127 return LPriority
> RPriority
;
1129 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1134 // and the following instructions are both ready.
1138 // Then schedule t2 = op first.
1145 // This creates more short live intervals.
1146 unsigned LDist
= closestSucc(left
);
1147 unsigned RDist
= closestSucc(right
);
1149 return LDist
< RDist
;
1151 // How many registers becomes live when the node is scheduled.
1152 unsigned LScratch
= calcMaxScratches(left
);
1153 unsigned RScratch
= calcMaxScratches(right
);
1154 if (LScratch
!= RScratch
)
1155 return LScratch
> RScratch
;
1157 if (left
->getHeight() != right
->getHeight())
1158 return left
->getHeight() > right
->getHeight();
1160 if (left
->getDepth() != right
->getDepth())
1161 return left
->getDepth() < right
->getDepth();
1163 assert(left
->NodeQueueId
&& right
->NodeQueueId
&&
1164 "NodeQueueId cannot be zero");
1165 return (left
->NodeQueueId
> right
->NodeQueueId
);
1170 RegReductionPriorityQueue
<SF
>::canClobber(const SUnit
*SU
, const SUnit
*Op
) {
1171 if (SU
->isTwoAddress
) {
1172 unsigned Opc
= SU
->getNode()->getMachineOpcode();
1173 const TargetInstrDesc
&TID
= TII
->get(Opc
);
1174 unsigned NumRes
= TID
.getNumDefs();
1175 unsigned NumOps
= TID
.getNumOperands() - NumRes
;
1176 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
1177 if (TID
.getOperandConstraint(i
+NumRes
, TOI::TIED_TO
) != -1) {
1178 SDNode
*DU
= SU
->getNode()->getOperand(i
).getNode();
1179 if (DU
->getNodeId() != -1 &&
1180 Op
->OrigNode
== &(*SUnits
)[DU
->getNodeId()])
1189 /// hasCopyToRegUse - Return true if SU has a value successor that is a
1191 static bool hasCopyToRegUse(const SUnit
*SU
) {
1192 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
1194 if (I
->isCtrl()) continue;
1195 const SUnit
*SuccSU
= I
->getSUnit();
1196 if (SuccSU
->getNode() && SuccSU
->getNode()->getOpcode() == ISD::CopyToReg
)
1202 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1203 /// physical register defs.
1204 static bool canClobberPhysRegDefs(const SUnit
*SuccSU
, const SUnit
*SU
,
1205 const TargetInstrInfo
*TII
,
1206 const TargetRegisterInfo
*TRI
) {
1207 SDNode
*N
= SuccSU
->getNode();
1208 unsigned NumDefs
= TII
->get(N
->getMachineOpcode()).getNumDefs();
1209 const unsigned *ImpDefs
= TII
->get(N
->getMachineOpcode()).getImplicitDefs();
1210 assert(ImpDefs
&& "Caller should check hasPhysRegDefs");
1211 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
1212 SUNode
= SUNode
->getFlaggedNode()) {
1213 if (!SUNode
->isMachineOpcode())
1215 const unsigned *SUImpDefs
=
1216 TII
->get(SUNode
->getMachineOpcode()).getImplicitDefs();
1219 for (unsigned i
= NumDefs
, e
= N
->getNumValues(); i
!= e
; ++i
) {
1220 EVT VT
= N
->getValueType(i
);
1221 if (VT
== MVT::Flag
|| VT
== MVT::Other
)
1223 if (!N
->hasAnyUseOfValue(i
))
1225 unsigned Reg
= ImpDefs
[i
- NumDefs
];
1226 for (;*SUImpDefs
; ++SUImpDefs
) {
1227 unsigned SUReg
= *SUImpDefs
;
1228 if (TRI
->regsOverlap(Reg
, SUReg
))
1236 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1237 /// are not handled well by the general register pressure reduction
1238 /// heuristics. When presented with code like this:
1247 /// the heuristics tend to push the store up, but since the
1248 /// operand of the store has another use (U), this would increase
1249 /// the length of that other use (the U->N edge).
1251 /// This function transforms code like the above to route U's
1252 /// dependence through the store when possible, like this:
1263 /// This results in the store being scheduled immediately
1264 /// after N, which shortens the U->N live range, reducing
1265 /// register pressure.
1268 void RegReductionPriorityQueue
<SF
>::PrescheduleNodesWithMultipleUses() {
1269 // Visit all the nodes in topological order, working top-down.
1270 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
) {
1271 SUnit
*SU
= &(*SUnits
)[i
];
1272 // For now, only look at nodes with no data successors, such as stores.
1273 // These are especially important, due to the heuristics in
1274 // getNodePriority for nodes with no data successors.
1275 if (SU
->NumSuccs
!= 0)
1277 // For now, only look at nodes with exactly one data predecessor.
1278 if (SU
->NumPreds
!= 1)
1280 // Avoid prescheduling copies to virtual registers, which don't behave
1281 // like other nodes from the perspective of scheduling heuristics.
1282 if (SDNode
*N
= SU
->getNode())
1283 if (N
->getOpcode() == ISD::CopyToReg
&&
1284 TargetRegisterInfo::isVirtualRegister
1285 (cast
<RegisterSDNode
>(N
->getOperand(1))->getReg()))
1288 // Locate the single data predecessor.
1290 for (SUnit::const_pred_iterator II
= SU
->Preds
.begin(),
1291 EE
= SU
->Preds
.end(); II
!= EE
; ++II
)
1292 if (!II
->isCtrl()) {
1293 PredSU
= II
->getSUnit();
1298 // Don't rewrite edges that carry physregs, because that requires additional
1299 // support infrastructure.
1300 if (PredSU
->hasPhysRegDefs
)
1302 // Short-circuit the case where SU is PredSU's only data successor.
1303 if (PredSU
->NumSuccs
== 1)
1305 // Avoid prescheduling to copies from virtual registers, which don't behave
1306 // like other nodes from the perspective of scheduling // heuristics.
1307 if (SDNode
*N
= SU
->getNode())
1308 if (N
->getOpcode() == ISD::CopyFromReg
&&
1309 TargetRegisterInfo::isVirtualRegister
1310 (cast
<RegisterSDNode
>(N
->getOperand(1))->getReg()))
1313 // Perform checks on the successors of PredSU.
1314 for (SUnit::const_succ_iterator II
= PredSU
->Succs
.begin(),
1315 EE
= PredSU
->Succs
.end(); II
!= EE
; ++II
) {
1316 SUnit
*PredSuccSU
= II
->getSUnit();
1317 if (PredSuccSU
== SU
) continue;
1318 // If PredSU has another successor with no data successors, for
1319 // now don't attempt to choose either over the other.
1320 if (PredSuccSU
->NumSuccs
== 0)
1321 goto outer_loop_continue
;
1322 // Don't break physical register dependencies.
1323 if (SU
->hasPhysRegClobbers
&& PredSuccSU
->hasPhysRegDefs
)
1324 if (canClobberPhysRegDefs(PredSuccSU
, SU
, TII
, TRI
))
1325 goto outer_loop_continue
;
1326 // Don't introduce graph cycles.
1327 if (scheduleDAG
->IsReachable(SU
, PredSuccSU
))
1328 goto outer_loop_continue
;
1331 // Ok, the transformation is safe and the heuristics suggest it is
1332 // profitable. Update the graph.
1333 DEBUG(errs() << "Prescheduling SU # " << SU
->NodeNum
1334 << " next to PredSU # " << PredSU
->NodeNum
1335 << " to guide scheduling in the presence of multiple uses\n");
1336 for (unsigned i
= 0; i
!= PredSU
->Succs
.size(); ++i
) {
1337 SDep Edge
= PredSU
->Succs
[i
];
1338 assert(!Edge
.isAssignedRegDep());
1339 SUnit
*SuccSU
= Edge
.getSUnit();
1341 Edge
.setSUnit(PredSU
);
1342 scheduleDAG
->RemovePred(SuccSU
, Edge
);
1343 scheduleDAG
->AddPred(SU
, Edge
);
1345 scheduleDAG
->AddPred(SuccSU
, Edge
);
1349 outer_loop_continue
:;
1353 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1354 /// it as a def&use operand. Add a pseudo control edge from it to the other
1355 /// node (if it won't create a cycle) so the two-address one will be scheduled
1356 /// first (lower in the schedule). If both nodes are two-address, favor the
1357 /// one that has a CopyToReg use (more likely to be a loop induction update).
1358 /// If both are two-address, but one is commutable while the other is not
1359 /// commutable, favor the one that's not commutable.
1361 void RegReductionPriorityQueue
<SF
>::AddPseudoTwoAddrDeps() {
1362 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
) {
1363 SUnit
*SU
= &(*SUnits
)[i
];
1364 if (!SU
->isTwoAddress
)
1367 SDNode
*Node
= SU
->getNode();
1368 if (!Node
|| !Node
->isMachineOpcode() || SU
->getNode()->getFlaggedNode())
1371 unsigned Opc
= Node
->getMachineOpcode();
1372 const TargetInstrDesc
&TID
= TII
->get(Opc
);
1373 unsigned NumRes
= TID
.getNumDefs();
1374 unsigned NumOps
= TID
.getNumOperands() - NumRes
;
1375 for (unsigned j
= 0; j
!= NumOps
; ++j
) {
1376 if (TID
.getOperandConstraint(j
+NumRes
, TOI::TIED_TO
) == -1)
1378 SDNode
*DU
= SU
->getNode()->getOperand(j
).getNode();
1379 if (DU
->getNodeId() == -1)
1381 const SUnit
*DUSU
= &(*SUnits
)[DU
->getNodeId()];
1382 if (!DUSU
) continue;
1383 for (SUnit::const_succ_iterator I
= DUSU
->Succs
.begin(),
1384 E
= DUSU
->Succs
.end(); I
!= E
; ++I
) {
1385 if (I
->isCtrl()) continue;
1386 SUnit
*SuccSU
= I
->getSUnit();
1389 // Be conservative. Ignore if nodes aren't at roughly the same
1390 // depth and height.
1391 if (SuccSU
->getHeight() < SU
->getHeight() &&
1392 (SU
->getHeight() - SuccSU
->getHeight()) > 1)
1394 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1395 // constrains whatever is using the copy, instead of the copy
1396 // itself. In the case that the copy is coalesced, this
1397 // preserves the intent of the pseudo two-address heurietics.
1398 while (SuccSU
->Succs
.size() == 1 &&
1399 SuccSU
->getNode()->isMachineOpcode() &&
1400 SuccSU
->getNode()->getMachineOpcode() ==
1401 TargetInstrInfo::COPY_TO_REGCLASS
)
1402 SuccSU
= SuccSU
->Succs
.front().getSUnit();
1403 // Don't constrain non-instruction nodes.
1404 if (!SuccSU
->getNode() || !SuccSU
->getNode()->isMachineOpcode())
1406 // Don't constrain nodes with physical register defs if the
1407 // predecessor can clobber them.
1408 if (SuccSU
->hasPhysRegDefs
&& SU
->hasPhysRegClobbers
) {
1409 if (canClobberPhysRegDefs(SuccSU
, SU
, TII
, TRI
))
1412 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1413 // these may be coalesced away. We want them close to their uses.
1414 unsigned SuccOpc
= SuccSU
->getNode()->getMachineOpcode();
1415 if (SuccOpc
== TargetInstrInfo::EXTRACT_SUBREG
||
1416 SuccOpc
== TargetInstrInfo::INSERT_SUBREG
||
1417 SuccOpc
== TargetInstrInfo::SUBREG_TO_REG
)
1419 if ((!canClobber(SuccSU
, DUSU
) ||
1420 (hasCopyToRegUse(SU
) && !hasCopyToRegUse(SuccSU
)) ||
1421 (!SU
->isCommutable
&& SuccSU
->isCommutable
)) &&
1422 !scheduleDAG
->IsReachable(SuccSU
, SU
)) {
1423 DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # "
1424 << SU
->NodeNum
<< " to SU #" << SuccSU
->NodeNum
<< "\n");
1425 scheduleDAG
->AddPred(SU
, SDep(SuccSU
, SDep::Order
, /*Latency=*/0,
1426 /*Reg=*/0, /*isNormalMemory=*/false,
1427 /*isMustAlias=*/false,
1428 /*isArtificial=*/true));
1435 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1436 /// scheduling units.
1438 void RegReductionPriorityQueue
<SF
>::CalculateSethiUllmanNumbers() {
1439 SethiUllmanNumbers
.assign(SUnits
->size(), 0);
1441 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
)
1442 CalcNodeSethiUllmanNumber(&(*SUnits
)[i
], SethiUllmanNumbers
);
1445 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1446 /// predecessors of the successors of the SUnit SU. Stop when the provided
1447 /// limit is exceeded.
1448 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit
*SU
,
1451 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
1453 const SUnit
*SuccSU
= I
->getSUnit();
1454 for (SUnit::const_pred_iterator II
= SuccSU
->Preds
.begin(),
1455 EE
= SuccSU
->Preds
.end(); II
!= EE
; ++II
) {
1456 SUnit
*PredSU
= II
->getSUnit();
1457 if (!PredSU
->isScheduled
)
1467 bool td_ls_rr_sort::operator()(const SUnit
*left
, const SUnit
*right
) const {
1468 unsigned LPriority
= SPQ
->getNodePriority(left
);
1469 unsigned RPriority
= SPQ
->getNodePriority(right
);
1470 bool LIsTarget
= left
->getNode() && left
->getNode()->isMachineOpcode();
1471 bool RIsTarget
= right
->getNode() && right
->getNode()->isMachineOpcode();
1472 bool LIsFloater
= LIsTarget
&& left
->NumPreds
== 0;
1473 bool RIsFloater
= RIsTarget
&& right
->NumPreds
== 0;
1474 unsigned LBonus
= (LimitedSumOfUnscheduledPredsOfSuccs(left
,1) == 1) ? 2 : 0;
1475 unsigned RBonus
= (LimitedSumOfUnscheduledPredsOfSuccs(right
,1) == 1) ? 2 : 0;
1477 if (left
->NumSuccs
== 0 && right
->NumSuccs
!= 0)
1479 else if (left
->NumSuccs
!= 0 && right
->NumSuccs
== 0)
1486 if (left
->NumSuccs
== 1)
1488 if (right
->NumSuccs
== 1)
1491 if (LPriority
+LBonus
!= RPriority
+RBonus
)
1492 return LPriority
+LBonus
< RPriority
+RBonus
;
1494 if (left
->getDepth() != right
->getDepth())
1495 return left
->getDepth() < right
->getDepth();
1497 if (left
->NumSuccsLeft
!= right
->NumSuccsLeft
)
1498 return left
->NumSuccsLeft
> right
->NumSuccsLeft
;
1500 assert(left
->NodeQueueId
&& right
->NodeQueueId
&&
1501 "NodeQueueId cannot be zero");
1502 return (left
->NodeQueueId
> right
->NodeQueueId
);
1505 //===----------------------------------------------------------------------===//
1506 // Public Constructor Functions
1507 //===----------------------------------------------------------------------===//
1509 llvm::ScheduleDAGSDNodes
*
1510 llvm::createBURRListDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
1511 const TargetMachine
&TM
= IS
->TM
;
1512 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
1513 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
1515 BURegReductionPriorityQueue
*PQ
= new BURegReductionPriorityQueue(TII
, TRI
);
1517 ScheduleDAGRRList
*SD
=
1518 new ScheduleDAGRRList(*IS
->MF
, true, PQ
);
1519 PQ
->setScheduleDAG(SD
);
1523 llvm::ScheduleDAGSDNodes
*
1524 llvm::createTDRRListDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
1525 const TargetMachine
&TM
= IS
->TM
;
1526 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
1527 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
1529 TDRegReductionPriorityQueue
*PQ
= new TDRegReductionPriorityQueue(TII
, TRI
);
1531 ScheduleDAGRRList
*SD
=
1532 new ScheduleDAGRRList(*IS
->MF
, false, PQ
);
1533 PQ
->setScheduleDAG(SD
);