Fixed some bugs.
[llvm/zpu.git] / lib / CodeGen / SelectionDAG / ScheduleDAGRRList.cpp
blob9978d00f20f0e4897ad317aee2ed570fa28b4119
1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/InlineAsm.h"
21 #include "llvm/CodeGen/SchedulerRegistry.h"
22 #include "llvm/CodeGen/SelectionDAGISel.h"
23 #include "llvm/Target/TargetRegisterInfo.h"
24 #include "llvm/Target/TargetData.h"
25 #include "llvm/Target/TargetMachine.h"
26 #include "llvm/Target/TargetInstrInfo.h"
27 #include "llvm/Target/TargetLowering.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <climits>
35 using namespace llvm;
37 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
38 STATISTIC(NumUnfolds, "Number of nodes unfolded");
39 STATISTIC(NumDups, "Number of duplicated nodes");
40 STATISTIC(NumPRCopies, "Number of physical register copies");
42 static RegisterScheduler
43 burrListDAGScheduler("list-burr",
44 "Bottom-up register reduction list scheduling",
45 createBURRListDAGScheduler);
46 static RegisterScheduler
47 tdrListrDAGScheduler("list-tdrr",
48 "Top-down register reduction list scheduling",
49 createTDRRListDAGScheduler);
50 static RegisterScheduler
51 sourceListDAGScheduler("source",
52 "Similar to list-burr but schedules in source "
53 "order when possible",
54 createSourceListDAGScheduler);
56 static RegisterScheduler
57 hybridListDAGScheduler("list-hybrid",
58 "Bottom-up register pressure aware list scheduling "
59 "which tries to balance latency and register pressure",
60 createHybridListDAGScheduler);
62 static RegisterScheduler
63 ILPListDAGScheduler("list-ilp",
64 "Bottom-up register pressure aware list scheduling "
65 "which tries to balance ILP and register pressure",
66 createILPListDAGScheduler);
68 namespace {
69 //===----------------------------------------------------------------------===//
70 /// ScheduleDAGRRList - The actual register reduction list scheduler
71 /// implementation. This supports both top-down and bottom-up scheduling.
72 ///
73 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
74 private:
75 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
76 /// it is top-down.
77 bool isBottomUp;
79 /// NeedLatency - True if the scheduler will make use of latency information.
80 ///
81 bool NeedLatency;
83 /// AvailableQueue - The priority queue to use for the available SUnits.
84 SchedulingPriorityQueue *AvailableQueue;
86 /// LiveRegDefs - A set of physical registers and their definition
87 /// that are "live". These nodes must be scheduled before any other nodes that
88 /// modifies the registers can be scheduled.
89 unsigned NumLiveRegs;
90 std::vector<SUnit*> LiveRegDefs;
91 std::vector<unsigned> LiveRegCycles;
93 /// Topo - A topological ordering for SUnits which permits fast IsReachable
94 /// and similar queries.
95 ScheduleDAGTopologicalSort Topo;
97 public:
98 ScheduleDAGRRList(MachineFunction &mf,
99 bool isbottomup, bool needlatency,
100 SchedulingPriorityQueue *availqueue)
101 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
102 AvailableQueue(availqueue), Topo(SUnits) {
105 ~ScheduleDAGRRList() {
106 delete AvailableQueue;
109 void Schedule();
111 /// IsReachable - Checks if SU is reachable from TargetSU.
112 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
113 return Topo.IsReachable(SU, TargetSU);
116 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
117 /// create a cycle.
118 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
119 return Topo.WillCreateCycle(SU, TargetSU);
122 /// AddPred - adds a predecessor edge to SUnit SU.
123 /// This returns true if this is a new predecessor.
124 /// Updates the topological ordering if required.
125 void AddPred(SUnit *SU, const SDep &D) {
126 Topo.AddPred(SU, D.getSUnit());
127 SU->addPred(D);
130 /// RemovePred - removes a predecessor edge from SUnit SU.
131 /// This returns true if an edge was removed.
132 /// Updates the topological ordering if required.
133 void RemovePred(SUnit *SU, const SDep &D) {
134 Topo.RemovePred(SU, D.getSUnit());
135 SU->removePred(D);
138 private:
139 void ReleasePred(SUnit *SU, const SDep *PredEdge);
140 void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
141 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
142 void ReleaseSuccessors(SUnit *SU);
143 void CapturePred(SDep *PredEdge);
144 void ScheduleNodeBottomUp(SUnit*, unsigned);
145 void ScheduleNodeTopDown(SUnit*, unsigned);
146 void UnscheduleNodeBottomUp(SUnit*);
147 void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
148 SUnit *CopyAndMoveSuccessors(SUnit*);
149 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
150 const TargetRegisterClass*,
151 const TargetRegisterClass*,
152 SmallVector<SUnit*, 2>&);
153 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
154 void ListScheduleTopDown();
155 void ListScheduleBottomUp();
158 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
159 /// Updates the topological ordering if required.
160 SUnit *CreateNewSUnit(SDNode *N) {
161 unsigned NumSUnits = SUnits.size();
162 SUnit *NewNode = NewSUnit(N);
163 // Update the topological ordering.
164 if (NewNode->NodeNum >= NumSUnits)
165 Topo.InitDAGTopologicalSorting();
166 return NewNode;
169 /// CreateClone - Creates a new SUnit from an existing one.
170 /// Updates the topological ordering if required.
171 SUnit *CreateClone(SUnit *N) {
172 unsigned NumSUnits = SUnits.size();
173 SUnit *NewNode = Clone(N);
174 // Update the topological ordering.
175 if (NewNode->NodeNum >= NumSUnits)
176 Topo.InitDAGTopologicalSorting();
177 return NewNode;
180 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
181 /// need actual latency information but the hybrid scheduler does.
182 bool ForceUnitLatencies() const {
183 return !NeedLatency;
186 } // end anonymous namespace
189 /// Schedule - Schedule the DAG using list scheduling.
190 void ScheduleDAGRRList::Schedule() {
191 DEBUG(dbgs()
192 << "********** List Scheduling BB#" << BB->getNumber()
193 << " '" << BB->getName() << "' **********\n");
195 NumLiveRegs = 0;
196 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
197 LiveRegCycles.resize(TRI->getNumRegs(), 0);
199 // Build the scheduling graph.
200 BuildSchedGraph(NULL);
202 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
203 SUnits[su].dumpAll(this));
204 Topo.InitDAGTopologicalSorting();
206 AvailableQueue->initNodes(SUnits);
208 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
209 if (isBottomUp)
210 ListScheduleBottomUp();
211 else
212 ListScheduleTopDown();
214 AvailableQueue->releaseState();
217 //===----------------------------------------------------------------------===//
218 // Bottom-Up Scheduling
219 //===----------------------------------------------------------------------===//
221 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
222 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
223 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
224 SUnit *PredSU = PredEdge->getSUnit();
226 #ifndef NDEBUG
227 if (PredSU->NumSuccsLeft == 0) {
228 dbgs() << "*** Scheduling failed! ***\n";
229 PredSU->dump(this);
230 dbgs() << " has been released too many times!\n";
231 llvm_unreachable(0);
233 #endif
234 --PredSU->NumSuccsLeft;
236 if (!ForceUnitLatencies()) {
237 // Updating predecessor's height. This is now the cycle when the
238 // predecessor can be scheduled without causing a pipeline stall.
239 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
242 // If all the node's successors are scheduled, this node is ready
243 // to be scheduled. Ignore the special EntrySU node.
244 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
245 PredSU->isAvailable = true;
246 AvailableQueue->push(PredSU);
250 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
251 // Bottom up: release predecessors
252 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
253 I != E; ++I) {
254 ReleasePred(SU, &*I);
255 if (I->isAssignedRegDep()) {
256 // This is a physical register dependency and it's impossible or
257 // expensive to copy the register. Make sure nothing that can
258 // clobber the register is scheduled between the predecessor and
259 // this node.
260 if (!LiveRegDefs[I->getReg()]) {
261 ++NumLiveRegs;
262 LiveRegDefs[I->getReg()] = I->getSUnit();
263 LiveRegCycles[I->getReg()] = CurCycle;
269 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
270 /// count of its predecessors. If a predecessor pending count is zero, add it to
271 /// the Available queue.
272 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
273 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
274 DEBUG(SU->dump(this));
276 #ifndef NDEBUG
277 if (CurCycle < SU->getHeight())
278 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
279 #endif
281 // FIXME: Handle noop hazard.
282 SU->setHeightToAtLeast(CurCycle);
283 Sequence.push_back(SU);
285 AvailableQueue->ScheduledNode(SU);
287 ReleasePredecessors(SU, CurCycle);
289 // Release all the implicit physical register defs that are live.
290 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
291 I != E; ++I) {
292 if (I->isAssignedRegDep()) {
293 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
294 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
295 assert(LiveRegDefs[I->getReg()] == SU &&
296 "Physical register dependency violated?");
297 --NumLiveRegs;
298 LiveRegDefs[I->getReg()] = NULL;
299 LiveRegCycles[I->getReg()] = 0;
304 SU->isScheduled = true;
307 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
308 /// unscheduled, incrcease the succ left count of its predecessors. Remove
309 /// them from AvailableQueue if necessary.
310 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
311 SUnit *PredSU = PredEdge->getSUnit();
312 if (PredSU->isAvailable) {
313 PredSU->isAvailable = false;
314 if (!PredSU->isPending)
315 AvailableQueue->remove(PredSU);
318 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
319 ++PredSU->NumSuccsLeft;
322 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
323 /// its predecessor states to reflect the change.
324 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
325 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
326 DEBUG(SU->dump(this));
328 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
329 I != E; ++I) {
330 CapturePred(&*I);
331 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
332 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
333 assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
334 "Physical register dependency violated?");
335 --NumLiveRegs;
336 LiveRegDefs[I->getReg()] = NULL;
337 LiveRegCycles[I->getReg()] = 0;
341 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
342 I != E; ++I) {
343 if (I->isAssignedRegDep()) {
344 if (!LiveRegDefs[I->getReg()]) {
345 LiveRegDefs[I->getReg()] = SU;
346 ++NumLiveRegs;
348 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
349 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
353 SU->setHeightDirty();
354 SU->isScheduled = false;
355 SU->isAvailable = true;
356 AvailableQueue->push(SU);
357 AvailableQueue->UnscheduledNode(SU);
360 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
361 /// BTCycle in order to schedule a specific node.
362 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
363 unsigned &CurCycle) {
364 SUnit *OldSU = NULL;
365 while (CurCycle > BtCycle) {
366 OldSU = Sequence.back();
367 Sequence.pop_back();
368 if (SU->isSucc(OldSU))
369 // Don't try to remove SU from AvailableQueue.
370 SU->isAvailable = false;
371 UnscheduleNodeBottomUp(OldSU);
372 --CurCycle;
373 AvailableQueue->setCurCycle(CurCycle);
376 assert(!SU->isSucc(OldSU) && "Something is wrong!");
378 ++NumBacktracks;
381 static bool isOperandOf(const SUnit *SU, SDNode *N) {
382 for (const SDNode *SUNode = SU->getNode(); SUNode;
383 SUNode = SUNode->getFlaggedNode()) {
384 if (SUNode->isOperandOf(N))
385 return true;
387 return false;
390 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
391 /// successors to the newly created node.
392 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
393 if (SU->getNode()->getFlaggedNode())
394 return NULL;
396 SDNode *N = SU->getNode();
397 if (!N)
398 return NULL;
400 SUnit *NewSU;
401 bool TryUnfold = false;
402 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
403 EVT VT = N->getValueType(i);
404 if (VT == MVT::Flag)
405 return NULL;
406 else if (VT == MVT::Other)
407 TryUnfold = true;
409 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
410 const SDValue &Op = N->getOperand(i);
411 EVT VT = Op.getNode()->getValueType(Op.getResNo());
412 if (VT == MVT::Flag)
413 return NULL;
416 if (TryUnfold) {
417 SmallVector<SDNode*, 2> NewNodes;
418 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
419 return NULL;
421 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
422 assert(NewNodes.size() == 2 && "Expected a load folding node!");
424 N = NewNodes[1];
425 SDNode *LoadNode = NewNodes[0];
426 unsigned NumVals = N->getNumValues();
427 unsigned OldNumVals = SU->getNode()->getNumValues();
428 for (unsigned i = 0; i != NumVals; ++i)
429 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
430 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
431 SDValue(LoadNode, 1));
433 // LoadNode may already exist. This can happen when there is another
434 // load from the same location and producing the same type of value
435 // but it has different alignment or volatileness.
436 bool isNewLoad = true;
437 SUnit *LoadSU;
438 if (LoadNode->getNodeId() != -1) {
439 LoadSU = &SUnits[LoadNode->getNodeId()];
440 isNewLoad = false;
441 } else {
442 LoadSU = CreateNewSUnit(LoadNode);
443 LoadNode->setNodeId(LoadSU->NodeNum);
444 ComputeLatency(LoadSU);
447 SUnit *NewSU = CreateNewSUnit(N);
448 assert(N->getNodeId() == -1 && "Node already inserted!");
449 N->setNodeId(NewSU->NodeNum);
451 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
452 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
453 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
454 NewSU->isTwoAddress = true;
455 break;
458 if (TID.isCommutable())
459 NewSU->isCommutable = true;
460 ComputeLatency(NewSU);
462 // Record all the edges to and from the old SU, by category.
463 SmallVector<SDep, 4> ChainPreds;
464 SmallVector<SDep, 4> ChainSuccs;
465 SmallVector<SDep, 4> LoadPreds;
466 SmallVector<SDep, 4> NodePreds;
467 SmallVector<SDep, 4> NodeSuccs;
468 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
469 I != E; ++I) {
470 if (I->isCtrl())
471 ChainPreds.push_back(*I);
472 else if (isOperandOf(I->getSUnit(), LoadNode))
473 LoadPreds.push_back(*I);
474 else
475 NodePreds.push_back(*I);
477 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
478 I != E; ++I) {
479 if (I->isCtrl())
480 ChainSuccs.push_back(*I);
481 else
482 NodeSuccs.push_back(*I);
485 // Now assign edges to the newly-created nodes.
486 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
487 const SDep &Pred = ChainPreds[i];
488 RemovePred(SU, Pred);
489 if (isNewLoad)
490 AddPred(LoadSU, Pred);
492 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
493 const SDep &Pred = LoadPreds[i];
494 RemovePred(SU, Pred);
495 if (isNewLoad)
496 AddPred(LoadSU, Pred);
498 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
499 const SDep &Pred = NodePreds[i];
500 RemovePred(SU, Pred);
501 AddPred(NewSU, Pred);
503 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
504 SDep D = NodeSuccs[i];
505 SUnit *SuccDep = D.getSUnit();
506 D.setSUnit(SU);
507 RemovePred(SuccDep, D);
508 D.setSUnit(NewSU);
509 AddPred(SuccDep, D);
511 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
512 SDep D = ChainSuccs[i];
513 SUnit *SuccDep = D.getSUnit();
514 D.setSUnit(SU);
515 RemovePred(SuccDep, D);
516 if (isNewLoad) {
517 D.setSUnit(LoadSU);
518 AddPred(SuccDep, D);
522 // Add a data dependency to reflect that NewSU reads the value defined
523 // by LoadSU.
524 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
526 if (isNewLoad)
527 AvailableQueue->addNode(LoadSU);
528 AvailableQueue->addNode(NewSU);
530 ++NumUnfolds;
532 if (NewSU->NumSuccsLeft == 0) {
533 NewSU->isAvailable = true;
534 return NewSU;
536 SU = NewSU;
539 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
540 NewSU = CreateClone(SU);
542 // New SUnit has the exact same predecessors.
543 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
544 I != E; ++I)
545 if (!I->isArtificial())
546 AddPred(NewSU, *I);
548 // Only copy scheduled successors. Cut them from old node's successor
549 // list and move them over.
550 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
551 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
552 I != E; ++I) {
553 if (I->isArtificial())
554 continue;
555 SUnit *SuccSU = I->getSUnit();
556 if (SuccSU->isScheduled) {
557 SDep D = *I;
558 D.setSUnit(NewSU);
559 AddPred(SuccSU, D);
560 D.setSUnit(SU);
561 DelDeps.push_back(std::make_pair(SuccSU, D));
564 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
565 RemovePred(DelDeps[i].first, DelDeps[i].second);
567 AvailableQueue->updateNode(SU);
568 AvailableQueue->addNode(NewSU);
570 ++NumDups;
571 return NewSU;
574 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
575 /// scheduled successors of the given SUnit to the last copy.
576 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
577 const TargetRegisterClass *DestRC,
578 const TargetRegisterClass *SrcRC,
579 SmallVector<SUnit*, 2> &Copies) {
580 SUnit *CopyFromSU = CreateNewSUnit(NULL);
581 CopyFromSU->CopySrcRC = SrcRC;
582 CopyFromSU->CopyDstRC = DestRC;
584 SUnit *CopyToSU = CreateNewSUnit(NULL);
585 CopyToSU->CopySrcRC = DestRC;
586 CopyToSU->CopyDstRC = SrcRC;
588 // Only copy scheduled successors. Cut them from old node's successor
589 // list and move them over.
590 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
591 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
592 I != E; ++I) {
593 if (I->isArtificial())
594 continue;
595 SUnit *SuccSU = I->getSUnit();
596 if (SuccSU->isScheduled) {
597 SDep D = *I;
598 D.setSUnit(CopyToSU);
599 AddPred(SuccSU, D);
600 DelDeps.push_back(std::make_pair(SuccSU, *I));
603 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
604 RemovePred(DelDeps[i].first, DelDeps[i].second);
606 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
607 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
609 AvailableQueue->updateNode(SU);
610 AvailableQueue->addNode(CopyFromSU);
611 AvailableQueue->addNode(CopyToSU);
612 Copies.push_back(CopyFromSU);
613 Copies.push_back(CopyToSU);
615 ++NumPRCopies;
618 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
619 /// definition of the specified node.
620 /// FIXME: Move to SelectionDAG?
621 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
622 const TargetInstrInfo *TII) {
623 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
624 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
625 unsigned NumRes = TID.getNumDefs();
626 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
627 if (Reg == *ImpDef)
628 break;
629 ++NumRes;
631 return N->getValueType(NumRes);
634 /// CheckForLiveRegDef - Return true and update live register vector if the
635 /// specified register def of the specified SUnit clobbers any "live" registers.
636 static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
637 std::vector<SUnit*> &LiveRegDefs,
638 SmallSet<unsigned, 4> &RegAdded,
639 SmallVector<unsigned, 4> &LRegs,
640 const TargetRegisterInfo *TRI) {
641 bool Added = false;
642 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
643 if (RegAdded.insert(Reg)) {
644 LRegs.push_back(Reg);
645 Added = true;
648 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
649 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
650 if (RegAdded.insert(*Alias)) {
651 LRegs.push_back(*Alias);
652 Added = true;
655 return Added;
658 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
659 /// scheduling of the given node to satisfy live physical register dependencies.
660 /// If the specific node is the last one that's available to schedule, do
661 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
662 bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
663 SmallVector<unsigned, 4> &LRegs){
664 if (NumLiveRegs == 0)
665 return false;
667 SmallSet<unsigned, 4> RegAdded;
668 // If this node would clobber any "live" register, then it's not ready.
669 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
670 I != E; ++I) {
671 if (I->isAssignedRegDep())
672 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
673 RegAdded, LRegs, TRI);
676 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
677 if (Node->getOpcode() == ISD::INLINEASM) {
678 // Inline asm can clobber physical defs.
679 unsigned NumOps = Node->getNumOperands();
680 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
681 --NumOps; // Ignore the flag operand.
683 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
684 unsigned Flags =
685 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
686 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
688 ++i; // Skip the ID value.
689 if (InlineAsm::isRegDefKind(Flags) ||
690 InlineAsm::isRegDefEarlyClobberKind(Flags)) {
691 // Check for def of register or earlyclobber register.
692 for (; NumVals; --NumVals, ++i) {
693 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
694 if (TargetRegisterInfo::isPhysicalRegister(Reg))
695 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
697 } else
698 i += NumVals;
700 continue;
703 if (!Node->isMachineOpcode())
704 continue;
705 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
706 if (!TID.ImplicitDefs)
707 continue;
708 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
709 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
711 return !LRegs.empty();
715 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
716 /// schedulers.
717 void ScheduleDAGRRList::ListScheduleBottomUp() {
718 unsigned CurCycle = 0;
720 // Release any predecessors of the special Exit node.
721 ReleasePredecessors(&ExitSU, CurCycle);
723 // Add root to Available queue.
724 if (!SUnits.empty()) {
725 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
726 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
727 RootSU->isAvailable = true;
728 AvailableQueue->push(RootSU);
731 // While Available queue is not empty, grab the node with the highest
732 // priority. If it is not ready put it back. Schedule the node.
733 SmallVector<SUnit*, 4> NotReady;
734 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
735 Sequence.reserve(SUnits.size());
736 while (!AvailableQueue->empty()) {
737 bool Delayed = false;
738 LRegsMap.clear();
739 SUnit *CurSU = AvailableQueue->pop();
740 while (CurSU) {
741 SmallVector<unsigned, 4> LRegs;
742 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
743 break;
744 Delayed = true;
745 LRegsMap.insert(std::make_pair(CurSU, LRegs));
747 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
748 NotReady.push_back(CurSU);
749 CurSU = AvailableQueue->pop();
752 // All candidates are delayed due to live physical reg dependencies.
753 // Try backtracking, code duplication, or inserting cross class copies
754 // to resolve it.
755 if (Delayed && !CurSU) {
756 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
757 SUnit *TrySU = NotReady[i];
758 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
760 // Try unscheduling up to the point where it's safe to schedule
761 // this node.
762 unsigned LiveCycle = CurCycle;
763 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
764 unsigned Reg = LRegs[j];
765 unsigned LCycle = LiveRegCycles[Reg];
766 LiveCycle = std::min(LiveCycle, LCycle);
768 SUnit *OldSU = Sequence[LiveCycle];
769 if (!WillCreateCycle(TrySU, OldSU)) {
770 BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
771 // Force the current node to be scheduled before the node that
772 // requires the physical reg dep.
773 if (OldSU->isAvailable) {
774 OldSU->isAvailable = false;
775 AvailableQueue->remove(OldSU);
777 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
778 /*Reg=*/0, /*isNormalMemory=*/false,
779 /*isMustAlias=*/false, /*isArtificial=*/true));
780 // If one or more successors has been unscheduled, then the current
781 // node is no longer avaialable. Schedule a successor that's now
782 // available instead.
783 if (!TrySU->isAvailable)
784 CurSU = AvailableQueue->pop();
785 else {
786 CurSU = TrySU;
787 TrySU->isPending = false;
788 NotReady.erase(NotReady.begin()+i);
790 break;
794 if (!CurSU) {
795 // Can't backtrack. If it's too expensive to copy the value, then try
796 // duplicate the nodes that produces these "too expensive to copy"
797 // values to break the dependency. In case even that doesn't work,
798 // insert cross class copies.
799 // If it's not too expensive, i.e. cost != -1, issue copies.
800 SUnit *TrySU = NotReady[0];
801 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
802 assert(LRegs.size() == 1 && "Can't handle this yet!");
803 unsigned Reg = LRegs[0];
804 SUnit *LRDef = LiveRegDefs[Reg];
805 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
806 const TargetRegisterClass *RC =
807 TRI->getMinimalPhysRegClass(Reg, VT);
808 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
810 // If cross copy register class is null, then it must be possible copy
811 // the value directly. Do not try duplicate the def.
812 SUnit *NewDef = 0;
813 if (DestRC)
814 NewDef = CopyAndMoveSuccessors(LRDef);
815 else
816 DestRC = RC;
817 if (!NewDef) {
818 // Issue copies, these can be expensive cross register class copies.
819 SmallVector<SUnit*, 2> Copies;
820 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
821 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
822 << " to SU #" << Copies.front()->NodeNum << "\n");
823 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
824 /*Reg=*/0, /*isNormalMemory=*/false,
825 /*isMustAlias=*/false,
826 /*isArtificial=*/true));
827 NewDef = Copies.back();
830 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
831 << " to SU #" << TrySU->NodeNum << "\n");
832 LiveRegDefs[Reg] = NewDef;
833 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
834 /*Reg=*/0, /*isNormalMemory=*/false,
835 /*isMustAlias=*/false,
836 /*isArtificial=*/true));
837 TrySU->isAvailable = false;
838 CurSU = NewDef;
841 assert(CurSU && "Unable to resolve live physical register dependencies!");
844 // Add the nodes that aren't ready back onto the available list.
845 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
846 NotReady[i]->isPending = false;
847 // May no longer be available due to backtracking.
848 if (NotReady[i]->isAvailable)
849 AvailableQueue->push(NotReady[i]);
851 NotReady.clear();
853 if (CurSU)
854 ScheduleNodeBottomUp(CurSU, CurCycle);
855 ++CurCycle;
856 AvailableQueue->setCurCycle(CurCycle);
859 // Reverse the order if it is bottom up.
860 std::reverse(Sequence.begin(), Sequence.end());
862 #ifndef NDEBUG
863 VerifySchedule(isBottomUp);
864 #endif
867 //===----------------------------------------------------------------------===//
868 // Top-Down Scheduling
869 //===----------------------------------------------------------------------===//
871 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
872 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
873 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
874 SUnit *SuccSU = SuccEdge->getSUnit();
876 #ifndef NDEBUG
877 if (SuccSU->NumPredsLeft == 0) {
878 dbgs() << "*** Scheduling failed! ***\n";
879 SuccSU->dump(this);
880 dbgs() << " has been released too many times!\n";
881 llvm_unreachable(0);
883 #endif
884 --SuccSU->NumPredsLeft;
886 // If all the node's predecessors are scheduled, this node is ready
887 // to be scheduled. Ignore the special ExitSU node.
888 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
889 SuccSU->isAvailable = true;
890 AvailableQueue->push(SuccSU);
894 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
895 // Top down: release successors
896 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
897 I != E; ++I) {
898 assert(!I->isAssignedRegDep() &&
899 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
901 ReleaseSucc(SU, &*I);
905 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
906 /// count of its successors. If a successor pending count is zero, add it to
907 /// the Available queue.
908 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
909 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
910 DEBUG(SU->dump(this));
912 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
913 SU->setDepthToAtLeast(CurCycle);
914 Sequence.push_back(SU);
916 ReleaseSuccessors(SU);
917 SU->isScheduled = true;
918 AvailableQueue->ScheduledNode(SU);
921 /// ListScheduleTopDown - The main loop of list scheduling for top-down
922 /// schedulers.
923 void ScheduleDAGRRList::ListScheduleTopDown() {
924 unsigned CurCycle = 0;
925 AvailableQueue->setCurCycle(CurCycle);
927 // Release any successors of the special Entry node.
928 ReleaseSuccessors(&EntrySU);
930 // All leaves to Available queue.
931 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
932 // It is available if it has no predecessors.
933 if (SUnits[i].Preds.empty()) {
934 AvailableQueue->push(&SUnits[i]);
935 SUnits[i].isAvailable = true;
939 // While Available queue is not empty, grab the node with the highest
940 // priority. If it is not ready put it back. Schedule the node.
941 Sequence.reserve(SUnits.size());
942 while (!AvailableQueue->empty()) {
943 SUnit *CurSU = AvailableQueue->pop();
945 if (CurSU)
946 ScheduleNodeTopDown(CurSU, CurCycle);
947 ++CurCycle;
948 AvailableQueue->setCurCycle(CurCycle);
951 #ifndef NDEBUG
952 VerifySchedule(isBottomUp);
953 #endif
957 //===----------------------------------------------------------------------===//
958 // RegReductionPriorityQueue Implementation
959 //===----------------------------------------------------------------------===//
961 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
962 // to reduce register pressure.
964 namespace {
965 template<class SF>
966 class RegReductionPriorityQueue;
968 /// bu_ls_rr_sort - Priority function for bottom up register pressure
969 // reduction scheduler.
970 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
971 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
972 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
973 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
975 bool operator()(const SUnit* left, const SUnit* right) const;
978 // td_ls_rr_sort - Priority function for top down register pressure reduction
979 // scheduler.
980 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
981 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
982 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
983 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
985 bool operator()(const SUnit* left, const SUnit* right) const;
988 // src_ls_rr_sort - Priority function for source order scheduler.
989 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
990 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
991 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
992 : SPQ(spq) {}
993 src_ls_rr_sort(const src_ls_rr_sort &RHS)
994 : SPQ(RHS.SPQ) {}
996 bool operator()(const SUnit* left, const SUnit* right) const;
999 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1000 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1001 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
1002 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
1003 : SPQ(spq) {}
1004 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1005 : SPQ(RHS.SPQ) {}
1007 bool operator()(const SUnit* left, const SUnit* right) const;
1010 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1011 // scheduler.
1012 struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1013 RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
1014 ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
1015 : SPQ(spq) {}
1016 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1017 : SPQ(RHS.SPQ) {}
1019 bool operator()(const SUnit* left, const SUnit* right) const;
1021 } // end anonymous namespace
1023 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1024 /// Smaller number is the higher priority.
1025 static unsigned
1026 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1027 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1028 if (SethiUllmanNumber != 0)
1029 return SethiUllmanNumber;
1031 unsigned Extra = 0;
1032 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1033 I != E; ++I) {
1034 if (I->isCtrl()) continue; // ignore chain preds
1035 SUnit *PredSU = I->getSUnit();
1036 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1037 if (PredSethiUllman > SethiUllmanNumber) {
1038 SethiUllmanNumber = PredSethiUllman;
1039 Extra = 0;
1040 } else if (PredSethiUllman == SethiUllmanNumber)
1041 ++Extra;
1044 SethiUllmanNumber += Extra;
1046 if (SethiUllmanNumber == 0)
1047 SethiUllmanNumber = 1;
1049 return SethiUllmanNumber;
1052 namespace {
1053 template<class SF>
1054 class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1055 std::vector<SUnit*> Queue;
1056 SF Picker;
1057 unsigned CurQueueId;
1058 bool TracksRegPressure;
1060 protected:
1061 // SUnits - The SUnits for the current graph.
1062 std::vector<SUnit> *SUnits;
1064 MachineFunction &MF;
1065 const TargetInstrInfo *TII;
1066 const TargetRegisterInfo *TRI;
1067 const TargetLowering *TLI;
1068 ScheduleDAGRRList *scheduleDAG;
1070 // SethiUllmanNumbers - The SethiUllman number for each node.
1071 std::vector<unsigned> SethiUllmanNumbers;
1073 /// RegPressure - Tracking current reg pressure per register class.
1075 std::vector<unsigned> RegPressure;
1077 /// RegLimit - Tracking the number of allocatable registers per register
1078 /// class.
1079 std::vector<unsigned> RegLimit;
1081 public:
1082 RegReductionPriorityQueue(MachineFunction &mf,
1083 bool tracksrp,
1084 const TargetInstrInfo *tii,
1085 const TargetRegisterInfo *tri,
1086 const TargetLowering *tli)
1087 : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
1088 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1089 if (TracksRegPressure) {
1090 unsigned NumRC = TRI->getNumRegClasses();
1091 RegLimit.resize(NumRC);
1092 RegPressure.resize(NumRC);
1093 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1094 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1095 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1096 E = TRI->regclass_end(); I != E; ++I)
1097 RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1101 void initNodes(std::vector<SUnit> &sunits) {
1102 SUnits = &sunits;
1103 // Add pseudo dependency edges for two-address nodes.
1104 AddPseudoTwoAddrDeps();
1105 // Reroute edges to nodes with multiple uses.
1106 PrescheduleNodesWithMultipleUses();
1107 // Calculate node priorities.
1108 CalculateSethiUllmanNumbers();
1111 void addNode(const SUnit *SU) {
1112 unsigned SUSize = SethiUllmanNumbers.size();
1113 if (SUnits->size() > SUSize)
1114 SethiUllmanNumbers.resize(SUSize*2, 0);
1115 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1118 void updateNode(const SUnit *SU) {
1119 SethiUllmanNumbers[SU->NodeNum] = 0;
1120 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1123 void releaseState() {
1124 SUnits = 0;
1125 SethiUllmanNumbers.clear();
1126 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1129 unsigned getNodePriority(const SUnit *SU) const {
1130 assert(SU->NodeNum < SethiUllmanNumbers.size());
1131 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1132 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1133 // CopyToReg should be close to its uses to facilitate coalescing and
1134 // avoid spilling.
1135 return 0;
1136 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1137 Opc == TargetOpcode::SUBREG_TO_REG ||
1138 Opc == TargetOpcode::INSERT_SUBREG)
1139 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1140 // close to their uses to facilitate coalescing.
1141 return 0;
1142 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1143 // If SU does not have a register use, i.e. it doesn't produce a value
1144 // that would be consumed (e.g. store), then it terminates a chain of
1145 // computation. Give it a large SethiUllman number so it will be
1146 // scheduled right before its predecessors that it doesn't lengthen
1147 // their live ranges.
1148 return 0xffff;
1149 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1150 // If SU does not have a register def, schedule it close to its uses
1151 // because it does not lengthen any live ranges.
1152 return 0;
1153 return SethiUllmanNumbers[SU->NodeNum];
1156 unsigned getNodeOrdering(const SUnit *SU) const {
1157 return scheduleDAG->DAG->GetOrdering(SU->getNode());
1160 bool empty() const { return Queue.empty(); }
1162 void push(SUnit *U) {
1163 assert(!U->NodeQueueId && "Node in the queue already");
1164 U->NodeQueueId = ++CurQueueId;
1165 Queue.push_back(U);
1168 SUnit *pop() {
1169 if (empty()) return NULL;
1170 std::vector<SUnit *>::iterator Best = Queue.begin();
1171 for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
1172 E = Queue.end(); I != E; ++I)
1173 if (Picker(*Best, *I))
1174 Best = I;
1175 SUnit *V = *Best;
1176 if (Best != prior(Queue.end()))
1177 std::swap(*Best, Queue.back());
1178 Queue.pop_back();
1179 V->NodeQueueId = 0;
1180 return V;
1183 void remove(SUnit *SU) {
1184 assert(!Queue.empty() && "Queue is empty!");
1185 assert(SU->NodeQueueId != 0 && "Not in queue!");
1186 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1187 SU);
1188 if (I != prior(Queue.end()))
1189 std::swap(*I, Queue.back());
1190 Queue.pop_back();
1191 SU->NodeQueueId = 0;
1194 bool HighRegPressure(const SUnit *SU) const {
1195 if (!TLI)
1196 return false;
1198 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1199 I != E; ++I) {
1200 if (I->isCtrl())
1201 continue;
1202 SUnit *PredSU = I->getSUnit();
1203 const SDNode *PN = PredSU->getNode();
1204 if (!PN->isMachineOpcode()) {
1205 if (PN->getOpcode() == ISD::CopyFromReg) {
1206 EVT VT = PN->getValueType(0);
1207 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1208 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1209 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1210 return true;
1212 continue;
1214 unsigned POpc = PN->getMachineOpcode();
1215 if (POpc == TargetOpcode::IMPLICIT_DEF)
1216 continue;
1217 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1218 EVT VT = PN->getOperand(0).getValueType();
1219 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1220 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1221 // Check if this increases register pressure of the specific register
1222 // class to the point where it would cause spills.
1223 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1224 return true;
1225 continue;
1226 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1227 POpc == TargetOpcode::SUBREG_TO_REG) {
1228 EVT VT = PN->getValueType(0);
1229 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1230 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1231 // Check if this increases register pressure of the specific register
1232 // class to the point where it would cause spills.
1233 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1234 return true;
1235 continue;
1237 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1238 for (unsigned i = 0; i != NumDefs; ++i) {
1239 EVT VT = PN->getValueType(i);
1240 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1241 if (RegPressure[RCId] >= RegLimit[RCId])
1242 return true; // Reg pressure already high.
1243 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1244 if (!PN->hasAnyUseOfValue(i))
1245 continue;
1246 // Check if this increases register pressure of the specific register
1247 // class to the point where it would cause spills.
1248 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1249 return true;
1253 return false;
1256 void ScheduledNode(SUnit *SU) {
1257 if (!TracksRegPressure)
1258 return;
1260 const SDNode *N = SU->getNode();
1261 if (!N->isMachineOpcode()) {
1262 if (N->getOpcode() != ISD::CopyToReg)
1263 return;
1264 } else {
1265 unsigned Opc = N->getMachineOpcode();
1266 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1267 Opc == TargetOpcode::INSERT_SUBREG ||
1268 Opc == TargetOpcode::SUBREG_TO_REG ||
1269 Opc == TargetOpcode::REG_SEQUENCE ||
1270 Opc == TargetOpcode::IMPLICIT_DEF)
1271 return;
1274 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1275 I != E; ++I) {
1276 if (I->isCtrl())
1277 continue;
1278 SUnit *PredSU = I->getSUnit();
1279 if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1280 continue;
1281 const SDNode *PN = PredSU->getNode();
1282 if (!PN->isMachineOpcode()) {
1283 if (PN->getOpcode() == ISD::CopyFromReg) {
1284 EVT VT = PN->getValueType(0);
1285 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1286 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1288 continue;
1290 unsigned POpc = PN->getMachineOpcode();
1291 if (POpc == TargetOpcode::IMPLICIT_DEF)
1292 continue;
1293 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1294 EVT VT = PN->getOperand(0).getValueType();
1295 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1296 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1297 continue;
1298 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1299 POpc == TargetOpcode::SUBREG_TO_REG) {
1300 EVT VT = PN->getValueType(0);
1301 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1302 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1303 continue;
1305 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1306 for (unsigned i = 0; i != NumDefs; ++i) {
1307 EVT VT = PN->getValueType(i);
1308 if (!PN->hasAnyUseOfValue(i))
1309 continue;
1310 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1311 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1315 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1316 // may transfer data dependencies to CopyToReg.
1317 if (SU->NumSuccs && N->isMachineOpcode()) {
1318 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1319 for (unsigned i = 0; i != NumDefs; ++i) {
1320 EVT VT = N->getValueType(i);
1321 if (!N->hasAnyUseOfValue(i))
1322 continue;
1323 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1324 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1325 // Register pressure tracking is imprecise. This can happen.
1326 RegPressure[RCId] = 0;
1327 else
1328 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1332 dumpRegPressure();
1335 void UnscheduledNode(SUnit *SU) {
1336 if (!TracksRegPressure)
1337 return;
1339 const SDNode *N = SU->getNode();
1340 if (!N->isMachineOpcode()) {
1341 if (N->getOpcode() != ISD::CopyToReg)
1342 return;
1343 } else {
1344 unsigned Opc = N->getMachineOpcode();
1345 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1346 Opc == TargetOpcode::INSERT_SUBREG ||
1347 Opc == TargetOpcode::SUBREG_TO_REG ||
1348 Opc == TargetOpcode::REG_SEQUENCE ||
1349 Opc == TargetOpcode::IMPLICIT_DEF)
1350 return;
1353 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1354 I != E; ++I) {
1355 if (I->isCtrl())
1356 continue;
1357 SUnit *PredSU = I->getSUnit();
1358 if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1359 continue;
1360 const SDNode *PN = PredSU->getNode();
1361 if (!PN->isMachineOpcode()) {
1362 if (PN->getOpcode() == ISD::CopyFromReg) {
1363 EVT VT = PN->getValueType(0);
1364 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1365 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1367 continue;
1369 unsigned POpc = PN->getMachineOpcode();
1370 if (POpc == TargetOpcode::IMPLICIT_DEF)
1371 continue;
1372 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1373 EVT VT = PN->getOperand(0).getValueType();
1374 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1375 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1376 continue;
1377 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1378 POpc == TargetOpcode::SUBREG_TO_REG) {
1379 EVT VT = PN->getValueType(0);
1380 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1381 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1382 continue;
1384 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1385 for (unsigned i = 0; i != NumDefs; ++i) {
1386 EVT VT = PN->getValueType(i);
1387 if (!PN->hasAnyUseOfValue(i))
1388 continue;
1389 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1390 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1391 // Register pressure tracking is imprecise. This can happen.
1392 RegPressure[RCId] = 0;
1393 else
1394 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1398 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1399 // may transfer data dependencies to CopyToReg.
1400 if (SU->NumSuccs && N->isMachineOpcode()) {
1401 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1402 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1403 EVT VT = N->getValueType(i);
1404 if (VT == MVT::Flag || VT == MVT::Other)
1405 continue;
1406 if (!N->hasAnyUseOfValue(i))
1407 continue;
1408 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1409 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1413 dumpRegPressure();
1416 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1417 scheduleDAG = scheduleDag;
1420 void dumpRegPressure() const {
1421 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1422 E = TRI->regclass_end(); I != E; ++I) {
1423 const TargetRegisterClass *RC = *I;
1424 unsigned Id = RC->getID();
1425 unsigned RP = RegPressure[Id];
1426 if (!RP) continue;
1427 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1428 << '\n');
1432 protected:
1433 bool canClobber(const SUnit *SU, const SUnit *Op);
1434 void AddPseudoTwoAddrDeps();
1435 void PrescheduleNodesWithMultipleUses();
1436 void CalculateSethiUllmanNumbers();
1439 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1440 BURegReductionPriorityQueue;
1442 typedef RegReductionPriorityQueue<td_ls_rr_sort>
1443 TDRegReductionPriorityQueue;
1445 typedef RegReductionPriorityQueue<src_ls_rr_sort>
1446 SrcRegReductionPriorityQueue;
1448 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1449 HybridBURRPriorityQueue;
1451 typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1452 ILPBURRPriorityQueue;
1455 /// closestSucc - Returns the scheduled cycle of the successor which is
1456 /// closest to the current cycle.
1457 static unsigned closestSucc(const SUnit *SU) {
1458 unsigned MaxHeight = 0;
1459 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1460 I != E; ++I) {
1461 if (I->isCtrl()) continue; // ignore chain succs
1462 unsigned Height = I->getSUnit()->getHeight();
1463 // If there are bunch of CopyToRegs stacked up, they should be considered
1464 // to be at the same position.
1465 if (I->getSUnit()->getNode() &&
1466 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1467 Height = closestSucc(I->getSUnit())+1;
1468 if (Height > MaxHeight)
1469 MaxHeight = Height;
1471 return MaxHeight;
1474 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1475 /// for scratch registers, i.e. number of data dependencies.
1476 static unsigned calcMaxScratches(const SUnit *SU) {
1477 unsigned Scratches = 0;
1478 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1479 I != E; ++I) {
1480 if (I->isCtrl()) continue; // ignore chain preds
1481 Scratches++;
1483 return Scratches;
1486 /// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1487 /// CopyToReg to a virtual register. This SU def is probably a liveout and
1488 /// it has no other use. It should be scheduled closer to the terminator.
1489 static bool hasOnlyLiveOutUses(const SUnit *SU) {
1490 bool RetVal = false;
1491 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1492 I != E; ++I) {
1493 if (I->isCtrl()) continue;
1494 const SUnit *SuccSU = I->getSUnit();
1495 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1496 unsigned Reg =
1497 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1498 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1499 RetVal = true;
1500 continue;
1503 return false;
1505 return RetVal;
1508 /// UnitsSharePred - Return true if the two scheduling units share a common
1509 /// data predecessor.
1510 static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1511 SmallSet<const SUnit*, 4> Preds;
1512 for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1513 I != E; ++I) {
1514 if (I->isCtrl()) continue; // ignore chain preds
1515 Preds.insert(I->getSUnit());
1517 for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1518 I != E; ++I) {
1519 if (I->isCtrl()) continue; // ignore chain preds
1520 if (Preds.count(I->getSUnit()))
1521 return true;
1523 return false;
1526 template <typename RRSort>
1527 static bool BURRSort(const SUnit *left, const SUnit *right,
1528 const RegReductionPriorityQueue<RRSort> *SPQ) {
1529 unsigned LPriority = SPQ->getNodePriority(left);
1530 unsigned RPriority = SPQ->getNodePriority(right);
1531 if (LPriority != RPriority)
1532 return LPriority > RPriority;
1534 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1535 // e.g.
1536 // t1 = op t2, c1
1537 // t3 = op t4, c2
1539 // and the following instructions are both ready.
1540 // t2 = op c3
1541 // t4 = op c4
1543 // Then schedule t2 = op first.
1544 // i.e.
1545 // t4 = op c4
1546 // t2 = op c3
1547 // t1 = op t2, c1
1548 // t3 = op t4, c2
1550 // This creates more short live intervals.
1551 unsigned LDist = closestSucc(left);
1552 unsigned RDist = closestSucc(right);
1553 if (LDist != RDist)
1554 return LDist < RDist;
1556 // How many registers becomes live when the node is scheduled.
1557 unsigned LScratch = calcMaxScratches(left);
1558 unsigned RScratch = calcMaxScratches(right);
1559 if (LScratch != RScratch)
1560 return LScratch > RScratch;
1562 if (left->getHeight() != right->getHeight())
1563 return left->getHeight() > right->getHeight();
1565 if (left->getDepth() != right->getDepth())
1566 return left->getDepth() < right->getDepth();
1568 assert(left->NodeQueueId && right->NodeQueueId &&
1569 "NodeQueueId cannot be zero");
1570 return (left->NodeQueueId > right->NodeQueueId);
1573 // Bottom up
1574 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1575 return BURRSort(left, right, SPQ);
1578 // Source order, otherwise bottom up.
1579 bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1580 unsigned LOrder = SPQ->getNodeOrdering(left);
1581 unsigned ROrder = SPQ->getNodeOrdering(right);
1583 // Prefer an ordering where the lower the non-zero order number, the higher
1584 // the preference.
1585 if ((LOrder || ROrder) && LOrder != ROrder)
1586 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1588 return BURRSort(left, right, SPQ);
1591 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1592 if (left->isCall || right->isCall)
1593 // No way to compute latency of calls.
1594 return BURRSort(left, right, SPQ);
1596 bool LHigh = SPQ->HighRegPressure(left);
1597 bool RHigh = SPQ->HighRegPressure(right);
1598 // Avoid causing spills. If register pressure is high, schedule for
1599 // register pressure reduction.
1600 if (LHigh && !RHigh)
1601 return true;
1602 else if (!LHigh && RHigh)
1603 return false;
1604 else if (!LHigh && !RHigh) {
1605 // If the two nodes share an operand and one of them has a single
1606 // use that is a live out copy, favor the one that is live out. Otherwise
1607 // it will be difficult to eliminate the copy if the instruction is a
1608 // loop induction variable update. e.g.
1609 // BB:
1610 // sub r1, r3, #1
1611 // str r0, [r2, r3]
1612 // mov r3, r1
1613 // cmp
1614 // bne BB
1615 bool SharePred = UnitsSharePred(left, right);
1616 // FIXME: Only adjust if BB is a loop back edge.
1617 // FIXME: What's the cost of a copy?
1618 int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1619 int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1620 int LHeight = (int)left->getHeight() - LBonus;
1621 int RHeight = (int)right->getHeight() - RBonus;
1623 // Low register pressure situation, schedule for latency if possible.
1624 bool LStall = left->SchedulingPref == Sched::Latency &&
1625 (int)SPQ->getCurCycle() < LHeight;
1626 bool RStall = right->SchedulingPref == Sched::Latency &&
1627 (int)SPQ->getCurCycle() < RHeight;
1628 // If scheduling one of the node will cause a pipeline stall, delay it.
1629 // If scheduling either one of the node will cause a pipeline stall, sort
1630 // them according to their height.
1631 if (LStall) {
1632 if (!RStall)
1633 return true;
1634 if (LHeight != RHeight)
1635 return LHeight > RHeight;
1636 } else if (RStall)
1637 return false;
1639 // If either node is scheduling for latency, sort them by height
1640 // and latency.
1641 if (left->SchedulingPref == Sched::Latency ||
1642 right->SchedulingPref == Sched::Latency) {
1643 if (LHeight != RHeight)
1644 return LHeight > RHeight;
1645 if (left->Latency != right->Latency)
1646 return left->Latency > right->Latency;
1650 return BURRSort(left, right, SPQ);
1653 bool ilp_ls_rr_sort::operator()(const SUnit *left,
1654 const SUnit *right) const {
1655 if (left->isCall || right->isCall)
1656 // No way to compute latency of calls.
1657 return BURRSort(left, right, SPQ);
1659 bool LHigh = SPQ->HighRegPressure(left);
1660 bool RHigh = SPQ->HighRegPressure(right);
1661 // Avoid causing spills. If register pressure is high, schedule for
1662 // register pressure reduction.
1663 if (LHigh && !RHigh)
1664 return true;
1665 else if (!LHigh && RHigh)
1666 return false;
1667 else if (!LHigh && !RHigh) {
1668 // Low register pressure situation, schedule to maximize instruction level
1669 // parallelism.
1670 if (left->NumPreds > right->NumPreds)
1671 return false;
1672 else if (left->NumPreds < right->NumPreds)
1673 return false;
1676 return BURRSort(left, right, SPQ);
1679 template<class SF>
1680 bool
1681 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1682 if (SU->isTwoAddress) {
1683 unsigned Opc = SU->getNode()->getMachineOpcode();
1684 const TargetInstrDesc &TID = TII->get(Opc);
1685 unsigned NumRes = TID.getNumDefs();
1686 unsigned NumOps = TID.getNumOperands() - NumRes;
1687 for (unsigned i = 0; i != NumOps; ++i) {
1688 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1689 SDNode *DU = SU->getNode()->getOperand(i).getNode();
1690 if (DU->getNodeId() != -1 &&
1691 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1692 return true;
1696 return false;
1699 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1700 /// physical register defs.
1701 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1702 const TargetInstrInfo *TII,
1703 const TargetRegisterInfo *TRI) {
1704 SDNode *N = SuccSU->getNode();
1705 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1706 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1707 assert(ImpDefs && "Caller should check hasPhysRegDefs");
1708 for (const SDNode *SUNode = SU->getNode(); SUNode;
1709 SUNode = SUNode->getFlaggedNode()) {
1710 if (!SUNode->isMachineOpcode())
1711 continue;
1712 const unsigned *SUImpDefs =
1713 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1714 if (!SUImpDefs)
1715 return false;
1716 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1717 EVT VT = N->getValueType(i);
1718 if (VT == MVT::Flag || VT == MVT::Other)
1719 continue;
1720 if (!N->hasAnyUseOfValue(i))
1721 continue;
1722 unsigned Reg = ImpDefs[i - NumDefs];
1723 for (;*SUImpDefs; ++SUImpDefs) {
1724 unsigned SUReg = *SUImpDefs;
1725 if (TRI->regsOverlap(Reg, SUReg))
1726 return true;
1730 return false;
1733 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1734 /// are not handled well by the general register pressure reduction
1735 /// heuristics. When presented with code like this:
1737 /// N
1738 /// / |
1739 /// / |
1740 /// U store
1741 /// |
1742 /// ...
1744 /// the heuristics tend to push the store up, but since the
1745 /// operand of the store has another use (U), this would increase
1746 /// the length of that other use (the U->N edge).
1748 /// This function transforms code like the above to route U's
1749 /// dependence through the store when possible, like this:
1751 /// N
1752 /// ||
1753 /// ||
1754 /// store
1755 /// |
1756 /// U
1757 /// |
1758 /// ...
1760 /// This results in the store being scheduled immediately
1761 /// after N, which shortens the U->N live range, reducing
1762 /// register pressure.
1764 template<class SF>
1765 void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1766 // Visit all the nodes in topological order, working top-down.
1767 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1768 SUnit *SU = &(*SUnits)[i];
1769 // For now, only look at nodes with no data successors, such as stores.
1770 // These are especially important, due to the heuristics in
1771 // getNodePriority for nodes with no data successors.
1772 if (SU->NumSuccs != 0)
1773 continue;
1774 // For now, only look at nodes with exactly one data predecessor.
1775 if (SU->NumPreds != 1)
1776 continue;
1777 // Avoid prescheduling copies to virtual registers, which don't behave
1778 // like other nodes from the perspective of scheduling heuristics.
1779 if (SDNode *N = SU->getNode())
1780 if (N->getOpcode() == ISD::CopyToReg &&
1781 TargetRegisterInfo::isVirtualRegister
1782 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1783 continue;
1785 // Locate the single data predecessor.
1786 SUnit *PredSU = 0;
1787 for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1788 EE = SU->Preds.end(); II != EE; ++II)
1789 if (!II->isCtrl()) {
1790 PredSU = II->getSUnit();
1791 break;
1793 assert(PredSU);
1795 // Don't rewrite edges that carry physregs, because that requires additional
1796 // support infrastructure.
1797 if (PredSU->hasPhysRegDefs)
1798 continue;
1799 // Short-circuit the case where SU is PredSU's only data successor.
1800 if (PredSU->NumSuccs == 1)
1801 continue;
1802 // Avoid prescheduling to copies from virtual registers, which don't behave
1803 // like other nodes from the perspective of scheduling // heuristics.
1804 if (SDNode *N = SU->getNode())
1805 if (N->getOpcode() == ISD::CopyFromReg &&
1806 TargetRegisterInfo::isVirtualRegister
1807 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1808 continue;
1810 // Perform checks on the successors of PredSU.
1811 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1812 EE = PredSU->Succs.end(); II != EE; ++II) {
1813 SUnit *PredSuccSU = II->getSUnit();
1814 if (PredSuccSU == SU) continue;
1815 // If PredSU has another successor with no data successors, for
1816 // now don't attempt to choose either over the other.
1817 if (PredSuccSU->NumSuccs == 0)
1818 goto outer_loop_continue;
1819 // Don't break physical register dependencies.
1820 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1821 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1822 goto outer_loop_continue;
1823 // Don't introduce graph cycles.
1824 if (scheduleDAG->IsReachable(SU, PredSuccSU))
1825 goto outer_loop_continue;
1828 // Ok, the transformation is safe and the heuristics suggest it is
1829 // profitable. Update the graph.
1830 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
1831 << " next to PredSU #" << PredSU->NodeNum
1832 << " to guide scheduling in the presence of multiple uses\n");
1833 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1834 SDep Edge = PredSU->Succs[i];
1835 assert(!Edge.isAssignedRegDep());
1836 SUnit *SuccSU = Edge.getSUnit();
1837 if (SuccSU != SU) {
1838 Edge.setSUnit(PredSU);
1839 scheduleDAG->RemovePred(SuccSU, Edge);
1840 scheduleDAG->AddPred(SU, Edge);
1841 Edge.setSUnit(SU);
1842 scheduleDAG->AddPred(SuccSU, Edge);
1843 --i;
1846 outer_loop_continue:;
1850 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1851 /// it as a def&use operand. Add a pseudo control edge from it to the other
1852 /// node (if it won't create a cycle) so the two-address one will be scheduled
1853 /// first (lower in the schedule). If both nodes are two-address, favor the
1854 /// one that has a CopyToReg use (more likely to be a loop induction update).
1855 /// If both are two-address, but one is commutable while the other is not
1856 /// commutable, favor the one that's not commutable.
1857 template<class SF>
1858 void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1859 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1860 SUnit *SU = &(*SUnits)[i];
1861 if (!SU->isTwoAddress)
1862 continue;
1864 SDNode *Node = SU->getNode();
1865 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1866 continue;
1868 bool isLiveOut = hasOnlyLiveOutUses(SU);
1869 unsigned Opc = Node->getMachineOpcode();
1870 const TargetInstrDesc &TID = TII->get(Opc);
1871 unsigned NumRes = TID.getNumDefs();
1872 unsigned NumOps = TID.getNumOperands() - NumRes;
1873 for (unsigned j = 0; j != NumOps; ++j) {
1874 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1875 continue;
1876 SDNode *DU = SU->getNode()->getOperand(j).getNode();
1877 if (DU->getNodeId() == -1)
1878 continue;
1879 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1880 if (!DUSU) continue;
1881 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1882 E = DUSU->Succs.end(); I != E; ++I) {
1883 if (I->isCtrl()) continue;
1884 SUnit *SuccSU = I->getSUnit();
1885 if (SuccSU == SU)
1886 continue;
1887 // Be conservative. Ignore if nodes aren't at roughly the same
1888 // depth and height.
1889 if (SuccSU->getHeight() < SU->getHeight() &&
1890 (SU->getHeight() - SuccSU->getHeight()) > 1)
1891 continue;
1892 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1893 // constrains whatever is using the copy, instead of the copy
1894 // itself. In the case that the copy is coalesced, this
1895 // preserves the intent of the pseudo two-address heurietics.
1896 while (SuccSU->Succs.size() == 1 &&
1897 SuccSU->getNode()->isMachineOpcode() &&
1898 SuccSU->getNode()->getMachineOpcode() ==
1899 TargetOpcode::COPY_TO_REGCLASS)
1900 SuccSU = SuccSU->Succs.front().getSUnit();
1901 // Don't constrain non-instruction nodes.
1902 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1903 continue;
1904 // Don't constrain nodes with physical register defs if the
1905 // predecessor can clobber them.
1906 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1907 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1908 continue;
1910 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1911 // these may be coalesced away. We want them close to their uses.
1912 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1913 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1914 SuccOpc == TargetOpcode::INSERT_SUBREG ||
1915 SuccOpc == TargetOpcode::SUBREG_TO_REG)
1916 continue;
1917 if ((!canClobber(SuccSU, DUSU) ||
1918 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
1919 (!SU->isCommutable && SuccSU->isCommutable)) &&
1920 !scheduleDAG->IsReachable(SuccSU, SU)) {
1921 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
1922 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1923 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1924 /*Reg=*/0, /*isNormalMemory=*/false,
1925 /*isMustAlias=*/false,
1926 /*isArtificial=*/true));
1933 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1934 /// scheduling units.
1935 template<class SF>
1936 void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1937 SethiUllmanNumbers.assign(SUnits->size(), 0);
1939 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1940 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1943 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1944 /// predecessors of the successors of the SUnit SU. Stop when the provided
1945 /// limit is exceeded.
1946 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1947 unsigned Limit) {
1948 unsigned Sum = 0;
1949 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1950 I != E; ++I) {
1951 const SUnit *SuccSU = I->getSUnit();
1952 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1953 EE = SuccSU->Preds.end(); II != EE; ++II) {
1954 SUnit *PredSU = II->getSUnit();
1955 if (!PredSU->isScheduled)
1956 if (++Sum > Limit)
1957 return Sum;
1960 return Sum;
1964 // Top down
1965 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1966 unsigned LPriority = SPQ->getNodePriority(left);
1967 unsigned RPriority = SPQ->getNodePriority(right);
1968 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1969 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1970 bool LIsFloater = LIsTarget && left->NumPreds == 0;
1971 bool RIsFloater = RIsTarget && right->NumPreds == 0;
1972 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1973 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1975 if (left->NumSuccs == 0 && right->NumSuccs != 0)
1976 return false;
1977 else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1978 return true;
1980 if (LIsFloater)
1981 LBonus -= 2;
1982 if (RIsFloater)
1983 RBonus -= 2;
1984 if (left->NumSuccs == 1)
1985 LBonus += 2;
1986 if (right->NumSuccs == 1)
1987 RBonus += 2;
1989 if (LPriority+LBonus != RPriority+RBonus)
1990 return LPriority+LBonus < RPriority+RBonus;
1992 if (left->getDepth() != right->getDepth())
1993 return left->getDepth() < right->getDepth();
1995 if (left->NumSuccsLeft != right->NumSuccsLeft)
1996 return left->NumSuccsLeft > right->NumSuccsLeft;
1998 assert(left->NodeQueueId && right->NodeQueueId &&
1999 "NodeQueueId cannot be zero");
2000 return (left->NodeQueueId > right->NodeQueueId);
2003 //===----------------------------------------------------------------------===//
2004 // Public Constructor Functions
2005 //===----------------------------------------------------------------------===//
2007 llvm::ScheduleDAGSDNodes *
2008 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2009 const TargetMachine &TM = IS->TM;
2010 const TargetInstrInfo *TII = TM.getInstrInfo();
2011 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2013 BURegReductionPriorityQueue *PQ =
2014 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2015 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
2016 PQ->setScheduleDAG(SD);
2017 return SD;
2020 llvm::ScheduleDAGSDNodes *
2021 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2022 const TargetMachine &TM = IS->TM;
2023 const TargetInstrInfo *TII = TM.getInstrInfo();
2024 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2026 TDRegReductionPriorityQueue *PQ =
2027 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2028 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
2029 PQ->setScheduleDAG(SD);
2030 return SD;
2033 llvm::ScheduleDAGSDNodes *
2034 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2035 const TargetMachine &TM = IS->TM;
2036 const TargetInstrInfo *TII = TM.getInstrInfo();
2037 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2039 SrcRegReductionPriorityQueue *PQ =
2040 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2041 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
2042 PQ->setScheduleDAG(SD);
2043 return SD;
2046 llvm::ScheduleDAGSDNodes *
2047 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2048 const TargetMachine &TM = IS->TM;
2049 const TargetInstrInfo *TII = TM.getInstrInfo();
2050 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2051 const TargetLowering *TLI = &IS->getTargetLowering();
2053 HybridBURRPriorityQueue *PQ =
2054 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2055 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
2056 PQ->setScheduleDAG(SD);
2057 return SD;
2060 llvm::ScheduleDAGSDNodes *
2061 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2062 const TargetMachine &TM = IS->TM;
2063 const TargetInstrInfo *TII = TM.getInstrInfo();
2064 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2065 const TargetLowering *TLI = &IS->getTargetLowering();
2067 ILPBURRPriorityQueue *PQ =
2068 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2069 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
2070 PQ->setScheduleDAG(SD);
2071 return SD;