We're not going to spend 100% of time in interrupts, do we? :)
[llvm/msp430.git] / lib / CodeGen / SelectionDAG / ScheduleDAGRRList.cpp
blobc97e2a8c86bf7afb8a9dd8ab72e95afb94918603
1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/CodeGen/SchedulerRegistry.h"
21 #include "llvm/CodeGen/SelectionDAGISel.h"
22 #include "llvm/Target/TargetRegisterInfo.h"
23 #include "llvm/Target/TargetData.h"
24 #include "llvm/Target/TargetMachine.h"
25 #include "llvm/Target/TargetInstrInfo.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/ADT/PriorityQueue.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include <climits>
33 using namespace llvm;
35 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
36 STATISTIC(NumUnfolds, "Number of nodes unfolded");
37 STATISTIC(NumDups, "Number of duplicated nodes");
38 STATISTIC(NumPRCopies, "Number of physical register copies");
40 static RegisterScheduler
41 burrListDAGScheduler("list-burr",
42 "Bottom-up register reduction list scheduling",
43 createBURRListDAGScheduler);
44 static RegisterScheduler
45 tdrListrDAGScheduler("list-tdrr",
46 "Top-down register reduction list scheduling",
47 createTDRRListDAGScheduler);
49 namespace {
50 //===----------------------------------------------------------------------===//
51 /// ScheduleDAGRRList - The actual register reduction list scheduler
52 /// implementation. This supports both top-down and bottom-up scheduling.
53 ///
54 class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
55 private:
56 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
57 /// it is top-down.
58 bool isBottomUp;
60 /// AvailableQueue - The priority queue to use for the available SUnits.
61 SchedulingPriorityQueue *AvailableQueue;
63 /// LiveRegDefs - A set of physical registers and their definition
64 /// that are "live". These nodes must be scheduled before any other nodes that
65 /// modifies the registers can be scheduled.
66 unsigned NumLiveRegs;
67 std::vector<SUnit*> LiveRegDefs;
68 std::vector<unsigned> LiveRegCycles;
70 /// Topo - A topological ordering for SUnits which permits fast IsReachable
71 /// and similar queries.
72 ScheduleDAGTopologicalSort Topo;
74 public:
75 ScheduleDAGRRList(MachineFunction &mf,
76 bool isbottomup,
77 SchedulingPriorityQueue *availqueue)
78 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
79 AvailableQueue(availqueue), Topo(SUnits) {
82 ~ScheduleDAGRRList() {
83 delete AvailableQueue;
86 void Schedule();
88 /// IsReachable - Checks if SU is reachable from TargetSU.
89 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
90 return Topo.IsReachable(SU, TargetSU);
93 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
94 /// create a cycle.
95 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
96 return Topo.WillCreateCycle(SU, TargetSU);
99 /// AddPred - adds a predecessor edge to SUnit SU.
100 /// This returns true if this is a new predecessor.
101 /// Updates the topological ordering if required.
102 void AddPred(SUnit *SU, const SDep &D) {
103 Topo.AddPred(SU, D.getSUnit());
104 SU->addPred(D);
107 /// RemovePred - removes a predecessor edge from SUnit SU.
108 /// This returns true if an edge was removed.
109 /// Updates the topological ordering if required.
110 void RemovePred(SUnit *SU, const SDep &D) {
111 Topo.RemovePred(SU, D.getSUnit());
112 SU->removePred(D);
115 private:
116 void ReleasePred(SUnit *SU, const SDep *PredEdge);
117 void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
118 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
119 void ReleaseSuccessors(SUnit *SU);
120 void CapturePred(SDep *PredEdge);
121 void ScheduleNodeBottomUp(SUnit*, unsigned);
122 void ScheduleNodeTopDown(SUnit*, unsigned);
123 void UnscheduleNodeBottomUp(SUnit*);
124 void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
125 SUnit *CopyAndMoveSuccessors(SUnit*);
126 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
127 const TargetRegisterClass*,
128 const TargetRegisterClass*,
129 SmallVector<SUnit*, 2>&);
130 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
131 void ListScheduleTopDown();
132 void ListScheduleBottomUp();
135 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
136 /// Updates the topological ordering if required.
137 SUnit *CreateNewSUnit(SDNode *N) {
138 unsigned NumSUnits = SUnits.size();
139 SUnit *NewNode = NewSUnit(N);
140 // Update the topological ordering.
141 if (NewNode->NodeNum >= NumSUnits)
142 Topo.InitDAGTopologicalSorting();
143 return NewNode;
146 /// CreateClone - Creates a new SUnit from an existing one.
147 /// Updates the topological ordering if required.
148 SUnit *CreateClone(SUnit *N) {
149 unsigned NumSUnits = SUnits.size();
150 SUnit *NewNode = Clone(N);
151 // Update the topological ordering.
152 if (NewNode->NodeNum >= NumSUnits)
153 Topo.InitDAGTopologicalSorting();
154 return NewNode;
157 /// ForceUnitLatencies - Return true, since register-pressure-reducing
158 /// scheduling doesn't need actual latency information.
159 bool ForceUnitLatencies() const { return true; }
161 } // end anonymous namespace
164 /// Schedule - Schedule the DAG using list scheduling.
165 void ScheduleDAGRRList::Schedule() {
166 DOUT << "********** List Scheduling **********\n";
168 NumLiveRegs = 0;
169 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
170 LiveRegCycles.resize(TRI->getNumRegs(), 0);
172 // Build the scheduling graph.
173 BuildSchedGraph();
175 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
176 SUnits[su].dumpAll(this));
177 Topo.InitDAGTopologicalSorting();
179 AvailableQueue->initNodes(SUnits);
181 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
182 if (isBottomUp)
183 ListScheduleBottomUp();
184 else
185 ListScheduleTopDown();
187 AvailableQueue->releaseState();
190 //===----------------------------------------------------------------------===//
191 // Bottom-Up Scheduling
192 //===----------------------------------------------------------------------===//
194 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
195 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
196 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
197 SUnit *PredSU = PredEdge->getSUnit();
198 --PredSU->NumSuccsLeft;
200 #ifndef NDEBUG
201 if (PredSU->NumSuccsLeft < 0) {
202 cerr << "*** Scheduling failed! ***\n";
203 PredSU->dump(this);
204 cerr << " has been released too many times!\n";
205 assert(0);
207 #endif
209 // If all the node's successors are scheduled, this node is ready
210 // to be scheduled. Ignore the special EntrySU node.
211 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
212 PredSU->isAvailable = true;
213 AvailableQueue->push(PredSU);
217 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
218 // Bottom up: release predecessors
219 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
220 I != E; ++I) {
221 ReleasePred(SU, &*I);
222 if (I->isAssignedRegDep()) {
223 // This is a physical register dependency and it's impossible or
224 // expensive to copy the register. Make sure nothing that can
225 // clobber the register is scheduled between the predecessor and
226 // this node.
227 if (!LiveRegDefs[I->getReg()]) {
228 ++NumLiveRegs;
229 LiveRegDefs[I->getReg()] = I->getSUnit();
230 LiveRegCycles[I->getReg()] = CurCycle;
236 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
237 /// count of its predecessors. If a predecessor pending count is zero, add it to
238 /// the Available queue.
239 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
240 DOUT << "*** Scheduling [" << CurCycle << "]: ";
241 DEBUG(SU->dump(this));
243 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
244 SU->setHeightToAtLeast(CurCycle);
245 Sequence.push_back(SU);
247 ReleasePredecessors(SU, CurCycle);
249 // Release all the implicit physical register defs that are live.
250 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
251 I != E; ++I) {
252 if (I->isAssignedRegDep()) {
253 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
254 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
255 assert(LiveRegDefs[I->getReg()] == SU &&
256 "Physical register dependency violated?");
257 --NumLiveRegs;
258 LiveRegDefs[I->getReg()] = NULL;
259 LiveRegCycles[I->getReg()] = 0;
264 SU->isScheduled = true;
265 AvailableQueue->ScheduledNode(SU);
268 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
269 /// unscheduled, incrcease the succ left count of its predecessors. Remove
270 /// them from AvailableQueue if necessary.
271 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
272 SUnit *PredSU = PredEdge->getSUnit();
273 if (PredSU->isAvailable) {
274 PredSU->isAvailable = false;
275 if (!PredSU->isPending)
276 AvailableQueue->remove(PredSU);
279 ++PredSU->NumSuccsLeft;
282 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
283 /// its predecessor states to reflect the change.
284 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
285 DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
286 DEBUG(SU->dump(this));
288 AvailableQueue->UnscheduledNode(SU);
290 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
291 I != E; ++I) {
292 CapturePred(&*I);
293 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
294 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
295 assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
296 "Physical register dependency violated?");
297 --NumLiveRegs;
298 LiveRegDefs[I->getReg()] = NULL;
299 LiveRegCycles[I->getReg()] = 0;
303 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
304 I != E; ++I) {
305 if (I->isAssignedRegDep()) {
306 if (!LiveRegDefs[I->getReg()]) {
307 LiveRegDefs[I->getReg()] = SU;
308 ++NumLiveRegs;
310 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
311 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
315 SU->setHeightDirty();
316 SU->isScheduled = false;
317 SU->isAvailable = true;
318 AvailableQueue->push(SU);
321 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
322 /// BTCycle in order to schedule a specific node.
323 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
324 unsigned &CurCycle) {
325 SUnit *OldSU = NULL;
326 while (CurCycle > BtCycle) {
327 OldSU = Sequence.back();
328 Sequence.pop_back();
329 if (SU->isSucc(OldSU))
330 // Don't try to remove SU from AvailableQueue.
331 SU->isAvailable = false;
332 UnscheduleNodeBottomUp(OldSU);
333 --CurCycle;
336 assert(!SU->isSucc(OldSU) && "Something is wrong!");
338 ++NumBacktracks;
341 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
342 /// successors to the newly created node.
343 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
344 if (SU->getNode()->getFlaggedNode())
345 return NULL;
347 SDNode *N = SU->getNode();
348 if (!N)
349 return NULL;
351 SUnit *NewSU;
352 bool TryUnfold = false;
353 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
354 MVT VT = N->getValueType(i);
355 if (VT == MVT::Flag)
356 return NULL;
357 else if (VT == MVT::Other)
358 TryUnfold = true;
360 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
361 const SDValue &Op = N->getOperand(i);
362 MVT VT = Op.getNode()->getValueType(Op.getResNo());
363 if (VT == MVT::Flag)
364 return NULL;
367 if (TryUnfold) {
368 SmallVector<SDNode*, 2> NewNodes;
369 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
370 return NULL;
372 DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
373 assert(NewNodes.size() == 2 && "Expected a load folding node!");
375 N = NewNodes[1];
376 SDNode *LoadNode = NewNodes[0];
377 unsigned NumVals = N->getNumValues();
378 unsigned OldNumVals = SU->getNode()->getNumValues();
379 for (unsigned i = 0; i != NumVals; ++i)
380 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
381 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
382 SDValue(LoadNode, 1));
384 // LoadNode may already exist. This can happen when there is another
385 // load from the same location and producing the same type of value
386 // but it has different alignment or volatileness.
387 bool isNewLoad = true;
388 SUnit *LoadSU;
389 if (LoadNode->getNodeId() != -1) {
390 LoadSU = &SUnits[LoadNode->getNodeId()];
391 isNewLoad = false;
392 } else {
393 LoadSU = CreateNewSUnit(LoadNode);
394 LoadNode->setNodeId(LoadSU->NodeNum);
395 ComputeLatency(LoadSU);
398 SUnit *NewSU = CreateNewSUnit(N);
399 assert(N->getNodeId() == -1 && "Node already inserted!");
400 N->setNodeId(NewSU->NodeNum);
402 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
403 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
404 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
405 NewSU->isTwoAddress = true;
406 break;
409 if (TID.isCommutable())
410 NewSU->isCommutable = true;
411 ComputeLatency(NewSU);
413 // Record all the edges to and from the old SU, by category.
414 SmallVector<SDep, 4> ChainPreds;
415 SmallVector<SDep, 4> ChainSuccs;
416 SmallVector<SDep, 4> LoadPreds;
417 SmallVector<SDep, 4> NodePreds;
418 SmallVector<SDep, 4> NodeSuccs;
419 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
420 I != E; ++I) {
421 if (I->isCtrl())
422 ChainPreds.push_back(*I);
423 else if (I->getSUnit()->getNode() &&
424 I->getSUnit()->getNode()->isOperandOf(LoadNode))
425 LoadPreds.push_back(*I);
426 else
427 NodePreds.push_back(*I);
429 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
430 I != E; ++I) {
431 if (I->isCtrl())
432 ChainSuccs.push_back(*I);
433 else
434 NodeSuccs.push_back(*I);
437 // Now assign edges to the newly-created nodes.
438 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
439 const SDep &Pred = ChainPreds[i];
440 RemovePred(SU, Pred);
441 if (isNewLoad)
442 AddPred(LoadSU, Pred);
444 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
445 const SDep &Pred = LoadPreds[i];
446 RemovePred(SU, Pred);
447 if (isNewLoad)
448 AddPred(LoadSU, Pred);
450 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
451 const SDep &Pred = NodePreds[i];
452 RemovePred(SU, Pred);
453 AddPred(NewSU, Pred);
455 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
456 SDep D = NodeSuccs[i];
457 SUnit *SuccDep = D.getSUnit();
458 D.setSUnit(SU);
459 RemovePred(SuccDep, D);
460 D.setSUnit(NewSU);
461 AddPred(SuccDep, D);
463 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
464 SDep D = ChainSuccs[i];
465 SUnit *SuccDep = D.getSUnit();
466 D.setSUnit(SU);
467 RemovePred(SuccDep, D);
468 if (isNewLoad) {
469 D.setSUnit(LoadSU);
470 AddPred(SuccDep, D);
474 // Add a data dependency to reflect that NewSU reads the value defined
475 // by LoadSU.
476 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
478 if (isNewLoad)
479 AvailableQueue->addNode(LoadSU);
480 AvailableQueue->addNode(NewSU);
482 ++NumUnfolds;
484 if (NewSU->NumSuccsLeft == 0) {
485 NewSU->isAvailable = true;
486 return NewSU;
488 SU = NewSU;
491 DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
492 NewSU = CreateClone(SU);
494 // New SUnit has the exact same predecessors.
495 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
496 I != E; ++I)
497 if (!I->isArtificial())
498 AddPred(NewSU, *I);
500 // Only copy scheduled successors. Cut them from old node's successor
501 // list and move them over.
502 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
503 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
504 I != E; ++I) {
505 if (I->isArtificial())
506 continue;
507 SUnit *SuccSU = I->getSUnit();
508 if (SuccSU->isScheduled) {
509 SDep D = *I;
510 D.setSUnit(NewSU);
511 AddPred(SuccSU, D);
512 D.setSUnit(SU);
513 DelDeps.push_back(std::make_pair(SuccSU, D));
516 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
517 RemovePred(DelDeps[i].first, DelDeps[i].second);
519 AvailableQueue->updateNode(SU);
520 AvailableQueue->addNode(NewSU);
522 ++NumDups;
523 return NewSU;
526 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
527 /// scheduled successors of the given SUnit to the last copy.
528 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
529 const TargetRegisterClass *DestRC,
530 const TargetRegisterClass *SrcRC,
531 SmallVector<SUnit*, 2> &Copies) {
532 SUnit *CopyFromSU = CreateNewSUnit(NULL);
533 CopyFromSU->CopySrcRC = SrcRC;
534 CopyFromSU->CopyDstRC = DestRC;
536 SUnit *CopyToSU = CreateNewSUnit(NULL);
537 CopyToSU->CopySrcRC = DestRC;
538 CopyToSU->CopyDstRC = SrcRC;
540 // Only copy scheduled successors. Cut them from old node's successor
541 // list and move them over.
542 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
543 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
544 I != E; ++I) {
545 if (I->isArtificial())
546 continue;
547 SUnit *SuccSU = I->getSUnit();
548 if (SuccSU->isScheduled) {
549 SDep D = *I;
550 D.setSUnit(CopyToSU);
551 AddPred(SuccSU, D);
552 DelDeps.push_back(std::make_pair(SuccSU, *I));
555 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
556 RemovePred(DelDeps[i].first, DelDeps[i].second);
558 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
559 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
561 AvailableQueue->updateNode(SU);
562 AvailableQueue->addNode(CopyFromSU);
563 AvailableQueue->addNode(CopyToSU);
564 Copies.push_back(CopyFromSU);
565 Copies.push_back(CopyToSU);
567 ++NumPRCopies;
570 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
571 /// definition of the specified node.
572 /// FIXME: Move to SelectionDAG?
573 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
574 const TargetInstrInfo *TII) {
575 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
576 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
577 unsigned NumRes = TID.getNumDefs();
578 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
579 if (Reg == *ImpDef)
580 break;
581 ++NumRes;
583 return N->getValueType(NumRes);
586 /// CheckForLiveRegDef - Return true and update live register vector if the
587 /// specified register def of the specified SUnit clobbers any "live" registers.
588 static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
589 std::vector<SUnit*> &LiveRegDefs,
590 SmallSet<unsigned, 4> &RegAdded,
591 SmallVector<unsigned, 4> &LRegs,
592 const TargetRegisterInfo *TRI) {
593 bool Added = false;
594 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
595 if (RegAdded.insert(Reg)) {
596 LRegs.push_back(Reg);
597 Added = true;
600 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
601 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
602 if (RegAdded.insert(*Alias)) {
603 LRegs.push_back(*Alias);
604 Added = true;
607 return Added;
610 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
611 /// scheduling of the given node to satisfy live physical register dependencies.
612 /// If the specific node is the last one that's available to schedule, do
613 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
614 bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
615 SmallVector<unsigned, 4> &LRegs){
616 if (NumLiveRegs == 0)
617 return false;
619 SmallSet<unsigned, 4> RegAdded;
620 // If this node would clobber any "live" register, then it's not ready.
621 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
622 I != E; ++I) {
623 if (I->isAssignedRegDep())
624 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
625 RegAdded, LRegs, TRI);
628 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
629 if (Node->getOpcode() == ISD::INLINEASM) {
630 // Inline asm can clobber physical defs.
631 unsigned NumOps = Node->getNumOperands();
632 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
633 --NumOps; // Ignore the flag operand.
635 for (unsigned i = 2; i != NumOps;) {
636 unsigned Flags =
637 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
638 unsigned NumVals = (Flags & 0xffff) >> 3;
640 ++i; // Skip the ID value.
641 if ((Flags & 7) == 2 || (Flags & 7) == 6) {
642 // Check for def of register or earlyclobber register.
643 for (; NumVals; --NumVals, ++i) {
644 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
645 if (TargetRegisterInfo::isPhysicalRegister(Reg))
646 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
648 } else
649 i += NumVals;
651 continue;
654 if (!Node->isMachineOpcode())
655 continue;
656 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
657 if (!TID.ImplicitDefs)
658 continue;
659 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
660 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
662 return !LRegs.empty();
666 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
667 /// schedulers.
668 void ScheduleDAGRRList::ListScheduleBottomUp() {
669 unsigned CurCycle = 0;
671 // Release any predecessors of the special Exit node.
672 ReleasePredecessors(&ExitSU, CurCycle);
674 // Add root to Available queue.
675 if (!SUnits.empty()) {
676 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
677 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
678 RootSU->isAvailable = true;
679 AvailableQueue->push(RootSU);
682 // While Available queue is not empty, grab the node with the highest
683 // priority. If it is not ready put it back. Schedule the node.
684 SmallVector<SUnit*, 4> NotReady;
685 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
686 Sequence.reserve(SUnits.size());
687 while (!AvailableQueue->empty()) {
688 bool Delayed = false;
689 LRegsMap.clear();
690 SUnit *CurSU = AvailableQueue->pop();
691 while (CurSU) {
692 SmallVector<unsigned, 4> LRegs;
693 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
694 break;
695 Delayed = true;
696 LRegsMap.insert(std::make_pair(CurSU, LRegs));
698 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
699 NotReady.push_back(CurSU);
700 CurSU = AvailableQueue->pop();
703 // All candidates are delayed due to live physical reg dependencies.
704 // Try backtracking, code duplication, or inserting cross class copies
705 // to resolve it.
706 if (Delayed && !CurSU) {
707 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
708 SUnit *TrySU = NotReady[i];
709 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
711 // Try unscheduling up to the point where it's safe to schedule
712 // this node.
713 unsigned LiveCycle = CurCycle;
714 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
715 unsigned Reg = LRegs[j];
716 unsigned LCycle = LiveRegCycles[Reg];
717 LiveCycle = std::min(LiveCycle, LCycle);
719 SUnit *OldSU = Sequence[LiveCycle];
720 if (!WillCreateCycle(TrySU, OldSU)) {
721 BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
722 // Force the current node to be scheduled before the node that
723 // requires the physical reg dep.
724 if (OldSU->isAvailable) {
725 OldSU->isAvailable = false;
726 AvailableQueue->remove(OldSU);
728 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
729 /*Reg=*/0, /*isNormalMemory=*/false,
730 /*isMustAlias=*/false, /*isArtificial=*/true));
731 // If one or more successors has been unscheduled, then the current
732 // node is no longer avaialable. Schedule a successor that's now
733 // available instead.
734 if (!TrySU->isAvailable)
735 CurSU = AvailableQueue->pop();
736 else {
737 CurSU = TrySU;
738 TrySU->isPending = false;
739 NotReady.erase(NotReady.begin()+i);
741 break;
745 if (!CurSU) {
746 // Can't backtrack. If it's too expensive to copy the value, then try
747 // duplicate the nodes that produces these "too expensive to copy"
748 // values to break the dependency. In case even that doesn't work,
749 // insert cross class copies.
750 // If it's not too expensive, i.e. cost != -1, issue copies.
751 SUnit *TrySU = NotReady[0];
752 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
753 assert(LRegs.size() == 1 && "Can't handle this yet!");
754 unsigned Reg = LRegs[0];
755 SUnit *LRDef = LiveRegDefs[Reg];
756 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
757 const TargetRegisterClass *RC =
758 TRI->getPhysicalRegisterRegClass(Reg, VT);
759 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
761 // If cross copy register class is null, then it must be possible copy
762 // the value directly. Do not try duplicate the def.
763 SUnit *NewDef = 0;
764 if (DestRC)
765 NewDef = CopyAndMoveSuccessors(LRDef);
766 else
767 DestRC = RC;
768 if (!NewDef) {
769 // Issue copies, these can be expensive cross register class copies.
770 SmallVector<SUnit*, 2> Copies;
771 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
772 DOUT << "Adding an edge from SU #" << TrySU->NodeNum
773 << " to SU #" << Copies.front()->NodeNum << "\n";
774 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
775 /*Reg=*/0, /*isNormalMemory=*/false,
776 /*isMustAlias=*/false,
777 /*isArtificial=*/true));
778 NewDef = Copies.back();
781 DOUT << "Adding an edge from SU #" << NewDef->NodeNum
782 << " to SU #" << TrySU->NodeNum << "\n";
783 LiveRegDefs[Reg] = NewDef;
784 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
785 /*Reg=*/0, /*isNormalMemory=*/false,
786 /*isMustAlias=*/false,
787 /*isArtificial=*/true));
788 TrySU->isAvailable = false;
789 CurSU = NewDef;
792 assert(CurSU && "Unable to resolve live physical register dependencies!");
795 // Add the nodes that aren't ready back onto the available list.
796 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
797 NotReady[i]->isPending = false;
798 // May no longer be available due to backtracking.
799 if (NotReady[i]->isAvailable)
800 AvailableQueue->push(NotReady[i]);
802 NotReady.clear();
804 if (CurSU)
805 ScheduleNodeBottomUp(CurSU, CurCycle);
806 ++CurCycle;
809 // Reverse the order if it is bottom up.
810 std::reverse(Sequence.begin(), Sequence.end());
812 #ifndef NDEBUG
813 VerifySchedule(isBottomUp);
814 #endif
817 //===----------------------------------------------------------------------===//
818 // Top-Down Scheduling
819 //===----------------------------------------------------------------------===//
821 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
822 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
823 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
824 SUnit *SuccSU = SuccEdge->getSUnit();
825 --SuccSU->NumPredsLeft;
827 #ifndef NDEBUG
828 if (SuccSU->NumPredsLeft < 0) {
829 cerr << "*** Scheduling failed! ***\n";
830 SuccSU->dump(this);
831 cerr << " has been released too many times!\n";
832 assert(0);
834 #endif
836 // If all the node's predecessors are scheduled, this node is ready
837 // to be scheduled. Ignore the special ExitSU node.
838 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
839 SuccSU->isAvailable = true;
840 AvailableQueue->push(SuccSU);
844 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
845 // Top down: release successors
846 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
847 I != E; ++I) {
848 assert(!I->isAssignedRegDep() &&
849 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
851 ReleaseSucc(SU, &*I);
855 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
856 /// count of its successors. If a successor pending count is zero, add it to
857 /// the Available queue.
858 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
859 DOUT << "*** Scheduling [" << CurCycle << "]: ";
860 DEBUG(SU->dump(this));
862 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
863 SU->setDepthToAtLeast(CurCycle);
864 Sequence.push_back(SU);
866 ReleaseSuccessors(SU);
867 SU->isScheduled = true;
868 AvailableQueue->ScheduledNode(SU);
871 /// ListScheduleTopDown - The main loop of list scheduling for top-down
872 /// schedulers.
873 void ScheduleDAGRRList::ListScheduleTopDown() {
874 unsigned CurCycle = 0;
876 // Release any successors of the special Entry node.
877 ReleaseSuccessors(&EntrySU);
879 // All leaves to Available queue.
880 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
881 // It is available if it has no predecessors.
882 if (SUnits[i].Preds.empty()) {
883 AvailableQueue->push(&SUnits[i]);
884 SUnits[i].isAvailable = true;
888 // While Available queue is not empty, grab the node with the highest
889 // priority. If it is not ready put it back. Schedule the node.
890 Sequence.reserve(SUnits.size());
891 while (!AvailableQueue->empty()) {
892 SUnit *CurSU = AvailableQueue->pop();
894 if (CurSU)
895 ScheduleNodeTopDown(CurSU, CurCycle);
896 ++CurCycle;
899 #ifndef NDEBUG
900 VerifySchedule(isBottomUp);
901 #endif
905 //===----------------------------------------------------------------------===//
906 // RegReductionPriorityQueue Implementation
907 //===----------------------------------------------------------------------===//
909 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
910 // to reduce register pressure.
912 namespace {
913 template<class SF>
914 class RegReductionPriorityQueue;
916 /// Sorting functions for the Available queue.
917 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
918 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
919 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
920 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
922 bool operator()(const SUnit* left, const SUnit* right) const;
925 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
926 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
927 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
928 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
930 bool operator()(const SUnit* left, const SUnit* right) const;
932 } // end anonymous namespace
934 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
935 /// Smaller number is the higher priority.
936 static unsigned
937 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
938 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
939 if (SethiUllmanNumber != 0)
940 return SethiUllmanNumber;
942 unsigned Extra = 0;
943 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
944 I != E; ++I) {
945 if (I->isCtrl()) continue; // ignore chain preds
946 SUnit *PredSU = I->getSUnit();
947 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
948 if (PredSethiUllman > SethiUllmanNumber) {
949 SethiUllmanNumber = PredSethiUllman;
950 Extra = 0;
951 } else if (PredSethiUllman == SethiUllmanNumber)
952 ++Extra;
955 SethiUllmanNumber += Extra;
957 if (SethiUllmanNumber == 0)
958 SethiUllmanNumber = 1;
960 return SethiUllmanNumber;
963 namespace {
964 template<class SF>
965 class VISIBILITY_HIDDEN RegReductionPriorityQueue
966 : public SchedulingPriorityQueue {
967 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
968 unsigned currentQueueId;
970 protected:
971 // SUnits - The SUnits for the current graph.
972 std::vector<SUnit> *SUnits;
974 const TargetInstrInfo *TII;
975 const TargetRegisterInfo *TRI;
976 ScheduleDAGRRList *scheduleDAG;
978 // SethiUllmanNumbers - The SethiUllman number for each node.
979 std::vector<unsigned> SethiUllmanNumbers;
981 public:
982 RegReductionPriorityQueue(const TargetInstrInfo *tii,
983 const TargetRegisterInfo *tri) :
984 Queue(SF(this)), currentQueueId(0),
985 TII(tii), TRI(tri), scheduleDAG(NULL) {}
987 void initNodes(std::vector<SUnit> &sunits) {
988 SUnits = &sunits;
989 // Add pseudo dependency edges for two-address nodes.
990 AddPseudoTwoAddrDeps();
991 // Reroute edges to nodes with multiple uses.
992 PrescheduleNodesWithMultipleUses();
993 // Calculate node priorities.
994 CalculateSethiUllmanNumbers();
997 void addNode(const SUnit *SU) {
998 unsigned SUSize = SethiUllmanNumbers.size();
999 if (SUnits->size() > SUSize)
1000 SethiUllmanNumbers.resize(SUSize*2, 0);
1001 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1004 void updateNode(const SUnit *SU) {
1005 SethiUllmanNumbers[SU->NodeNum] = 0;
1006 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1009 void releaseState() {
1010 SUnits = 0;
1011 SethiUllmanNumbers.clear();
1014 unsigned getNodePriority(const SUnit *SU) const {
1015 assert(SU->NodeNum < SethiUllmanNumbers.size());
1016 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1017 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1018 // CopyToReg should be close to its uses to facilitate coalescing and
1019 // avoid spilling.
1020 return 0;
1021 if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
1022 Opc == TargetInstrInfo::SUBREG_TO_REG ||
1023 Opc == TargetInstrInfo::INSERT_SUBREG)
1024 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1025 // close to their uses to facilitate coalescing.
1026 return 0;
1027 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1028 // If SU does not have a register use, i.e. it doesn't produce a value
1029 // that would be consumed (e.g. store), then it terminates a chain of
1030 // computation. Give it a large SethiUllman number so it will be
1031 // scheduled right before its predecessors that it doesn't lengthen
1032 // their live ranges.
1033 return 0xffff;
1034 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1035 // If SU does not have a register def, schedule it close to its uses
1036 // because it does not lengthen any live ranges.
1037 return 0;
1038 return SethiUllmanNumbers[SU->NodeNum];
1041 unsigned size() const { return Queue.size(); }
1043 bool empty() const { return Queue.empty(); }
1045 void push(SUnit *U) {
1046 assert(!U->NodeQueueId && "Node in the queue already");
1047 U->NodeQueueId = ++currentQueueId;
1048 Queue.push(U);
1051 void push_all(const std::vector<SUnit *> &Nodes) {
1052 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1053 push(Nodes[i]);
1056 SUnit *pop() {
1057 if (empty()) return NULL;
1058 SUnit *V = Queue.top();
1059 Queue.pop();
1060 V->NodeQueueId = 0;
1061 return V;
1064 void remove(SUnit *SU) {
1065 assert(!Queue.empty() && "Queue is empty!");
1066 assert(SU->NodeQueueId != 0 && "Not in queue!");
1067 Queue.erase_one(SU);
1068 SU->NodeQueueId = 0;
1071 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1072 scheduleDAG = scheduleDag;
1075 protected:
1076 bool canClobber(const SUnit *SU, const SUnit *Op);
1077 void AddPseudoTwoAddrDeps();
1078 void PrescheduleNodesWithMultipleUses();
1079 void CalculateSethiUllmanNumbers();
1082 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1083 BURegReductionPriorityQueue;
1085 typedef RegReductionPriorityQueue<td_ls_rr_sort>
1086 TDRegReductionPriorityQueue;
1089 /// closestSucc - Returns the scheduled cycle of the successor which is
1090 /// closest to the current cycle.
1091 static unsigned closestSucc(const SUnit *SU) {
1092 unsigned MaxHeight = 0;
1093 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1094 I != E; ++I) {
1095 if (I->isCtrl()) continue; // ignore chain succs
1096 unsigned Height = I->getSUnit()->getHeight();
1097 // If there are bunch of CopyToRegs stacked up, they should be considered
1098 // to be at the same position.
1099 if (I->getSUnit()->getNode() &&
1100 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1101 Height = closestSucc(I->getSUnit())+1;
1102 if (Height > MaxHeight)
1103 MaxHeight = Height;
1105 return MaxHeight;
1108 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1109 /// for scratch registers, i.e. number of data dependencies.
1110 static unsigned calcMaxScratches(const SUnit *SU) {
1111 unsigned Scratches = 0;
1112 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1113 I != E; ++I) {
1114 if (I->isCtrl()) continue; // ignore chain preds
1115 Scratches++;
1117 return Scratches;
1120 // Bottom up
1121 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1122 unsigned LPriority = SPQ->getNodePriority(left);
1123 unsigned RPriority = SPQ->getNodePriority(right);
1124 if (LPriority != RPriority)
1125 return LPriority > RPriority;
1127 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1128 // e.g.
1129 // t1 = op t2, c1
1130 // t3 = op t4, c2
1132 // and the following instructions are both ready.
1133 // t2 = op c3
1134 // t4 = op c4
1136 // Then schedule t2 = op first.
1137 // i.e.
1138 // t4 = op c4
1139 // t2 = op c3
1140 // t1 = op t2, c1
1141 // t3 = op t4, c2
1143 // This creates more short live intervals.
1144 unsigned LDist = closestSucc(left);
1145 unsigned RDist = closestSucc(right);
1146 if (LDist != RDist)
1147 return LDist < RDist;
1149 // How many registers becomes live when the node is scheduled.
1150 unsigned LScratch = calcMaxScratches(left);
1151 unsigned RScratch = calcMaxScratches(right);
1152 if (LScratch != RScratch)
1153 return LScratch > RScratch;
1155 if (left->getHeight() != right->getHeight())
1156 return left->getHeight() > right->getHeight();
1158 if (left->getDepth() != right->getDepth())
1159 return left->getDepth() < right->getDepth();
1161 assert(left->NodeQueueId && right->NodeQueueId &&
1162 "NodeQueueId cannot be zero");
1163 return (left->NodeQueueId > right->NodeQueueId);
1166 template<class SF>
1167 bool
1168 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1169 if (SU->isTwoAddress) {
1170 unsigned Opc = SU->getNode()->getMachineOpcode();
1171 const TargetInstrDesc &TID = TII->get(Opc);
1172 unsigned NumRes = TID.getNumDefs();
1173 unsigned NumOps = TID.getNumOperands() - NumRes;
1174 for (unsigned i = 0; i != NumOps; ++i) {
1175 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1176 SDNode *DU = SU->getNode()->getOperand(i).getNode();
1177 if (DU->getNodeId() != -1 &&
1178 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1179 return true;
1183 return false;
1187 /// hasCopyToRegUse - Return true if SU has a value successor that is a
1188 /// CopyToReg node.
1189 static bool hasCopyToRegUse(const SUnit *SU) {
1190 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1191 I != E; ++I) {
1192 if (I->isCtrl()) continue;
1193 const SUnit *SuccSU = I->getSUnit();
1194 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1195 return true;
1197 return false;
1200 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1201 /// physical register defs.
1202 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1203 const TargetInstrInfo *TII,
1204 const TargetRegisterInfo *TRI) {
1205 SDNode *N = SuccSU->getNode();
1206 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1207 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1208 assert(ImpDefs && "Caller should check hasPhysRegDefs");
1209 for (const SDNode *SUNode = SU->getNode(); SUNode;
1210 SUNode = SUNode->getFlaggedNode()) {
1211 if (!SUNode->isMachineOpcode())
1212 continue;
1213 const unsigned *SUImpDefs =
1214 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1215 if (!SUImpDefs)
1216 return false;
1217 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1218 MVT VT = N->getValueType(i);
1219 if (VT == MVT::Flag || VT == MVT::Other)
1220 continue;
1221 if (!N->hasAnyUseOfValue(i))
1222 continue;
1223 unsigned Reg = ImpDefs[i - NumDefs];
1224 for (;*SUImpDefs; ++SUImpDefs) {
1225 unsigned SUReg = *SUImpDefs;
1226 if (TRI->regsOverlap(Reg, SUReg))
1227 return true;
1231 return false;
1234 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1235 /// are not handled well by the general register pressure reduction
1236 /// heuristics. When presented with code like this:
1238 /// N
1239 /// / |
1240 /// / |
1241 /// U store
1242 /// |
1243 /// ...
1245 /// the heuristics tend to push the store up, but since the
1246 /// operand of the store has another use (U), this would increase
1247 /// the length of that other use (the U->N edge).
1249 /// This function transforms code like the above to route U's
1250 /// dependence through the store when possible, like this:
1252 /// N
1253 /// ||
1254 /// ||
1255 /// store
1256 /// |
1257 /// U
1258 /// |
1259 /// ...
1261 /// This results in the store being scheduled immediately
1262 /// after N, which shortens the U->N live range, reducing
1263 /// register pressure.
1265 template<class SF>
1266 void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1267 // Visit all the nodes in topological order, working top-down.
1268 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1269 SUnit *SU = &(*SUnits)[i];
1270 // For now, only look at nodes with no data successors, such as stores.
1271 // These are especially important, due to the heuristics in
1272 // getNodePriority for nodes with no data successors.
1273 if (SU->NumSuccs != 0)
1274 continue;
1275 // For now, only look at nodes with exactly one data predecessor.
1276 if (SU->NumPreds != 1)
1277 continue;
1278 // Avoid prescheduling copies to virtual registers, which don't behave
1279 // like other nodes from the perspective of scheduling heuristics.
1280 if (SDNode *N = SU->getNode())
1281 if (N->getOpcode() == ISD::CopyToReg &&
1282 TargetRegisterInfo::isVirtualRegister
1283 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1284 continue;
1286 // Locate the single data predecessor.
1287 SUnit *PredSU = 0;
1288 for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1289 EE = SU->Preds.end(); II != EE; ++II)
1290 if (!II->isCtrl()) {
1291 PredSU = II->getSUnit();
1292 break;
1294 assert(PredSU);
1296 // Don't rewrite edges that carry physregs, because that requires additional
1297 // support infrastructure.
1298 if (PredSU->hasPhysRegDefs)
1299 continue;
1300 // Short-circuit the case where SU is PredSU's only data successor.
1301 if (PredSU->NumSuccs == 1)
1302 continue;
1303 // Avoid prescheduling to copies from virtual registers, which don't behave
1304 // like other nodes from the perspective of scheduling // heuristics.
1305 if (SDNode *N = SU->getNode())
1306 if (N->getOpcode() == ISD::CopyFromReg &&
1307 TargetRegisterInfo::isVirtualRegister
1308 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1309 continue;
1311 // Perform checks on the successors of PredSU.
1312 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1313 EE = PredSU->Succs.end(); II != EE; ++II) {
1314 SUnit *PredSuccSU = II->getSUnit();
1315 if (PredSuccSU == SU) continue;
1316 // If PredSU has another successor with no data successors, for
1317 // now don't attempt to choose either over the other.
1318 if (PredSuccSU->NumSuccs == 0)
1319 goto outer_loop_continue;
1320 // Don't break physical register dependencies.
1321 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1322 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1323 goto outer_loop_continue;
1324 // Don't introduce graph cycles.
1325 if (scheduleDAG->IsReachable(SU, PredSuccSU))
1326 goto outer_loop_continue;
1329 // Ok, the transformation is safe and the heuristics suggest it is
1330 // profitable. Update the graph.
1331 DOUT << "Prescheduling SU # " << SU->NodeNum
1332 << " next to PredSU # " << PredSU->NodeNum
1333 << " to guide scheduling in the presence of multiple uses\n";
1334 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1335 SDep Edge = PredSU->Succs[i];
1336 assert(!Edge.isAssignedRegDep());
1337 SUnit *SuccSU = Edge.getSUnit();
1338 if (SuccSU != SU) {
1339 Edge.setSUnit(PredSU);
1340 scheduleDAG->RemovePred(SuccSU, Edge);
1341 scheduleDAG->AddPred(SU, Edge);
1342 Edge.setSUnit(SU);
1343 scheduleDAG->AddPred(SuccSU, Edge);
1344 --i;
1347 outer_loop_continue:;
1351 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1352 /// it as a def&use operand. Add a pseudo control edge from it to the other
1353 /// node (if it won't create a cycle) so the two-address one will be scheduled
1354 /// first (lower in the schedule). If both nodes are two-address, favor the
1355 /// one that has a CopyToReg use (more likely to be a loop induction update).
1356 /// If both are two-address, but one is commutable while the other is not
1357 /// commutable, favor the one that's not commutable.
1358 template<class SF>
1359 void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1360 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1361 SUnit *SU = &(*SUnits)[i];
1362 if (!SU->isTwoAddress)
1363 continue;
1365 SDNode *Node = SU->getNode();
1366 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1367 continue;
1369 unsigned Opc = Node->getMachineOpcode();
1370 const TargetInstrDesc &TID = TII->get(Opc);
1371 unsigned NumRes = TID.getNumDefs();
1372 unsigned NumOps = TID.getNumOperands() - NumRes;
1373 for (unsigned j = 0; j != NumOps; ++j) {
1374 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1375 continue;
1376 SDNode *DU = SU->getNode()->getOperand(j).getNode();
1377 if (DU->getNodeId() == -1)
1378 continue;
1379 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1380 if (!DUSU) continue;
1381 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1382 E = DUSU->Succs.end(); I != E; ++I) {
1383 if (I->isCtrl()) continue;
1384 SUnit *SuccSU = I->getSUnit();
1385 if (SuccSU == SU)
1386 continue;
1387 // Be conservative. Ignore if nodes aren't at roughly the same
1388 // depth and height.
1389 if (SuccSU->getHeight() < SU->getHeight() &&
1390 (SU->getHeight() - SuccSU->getHeight()) > 1)
1391 continue;
1392 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1393 // constrains whatever is using the copy, instead of the copy
1394 // itself. In the case that the copy is coalesced, this
1395 // preserves the intent of the pseudo two-address heurietics.
1396 while (SuccSU->Succs.size() == 1 &&
1397 SuccSU->getNode()->isMachineOpcode() &&
1398 SuccSU->getNode()->getMachineOpcode() ==
1399 TargetInstrInfo::COPY_TO_REGCLASS)
1400 SuccSU = SuccSU->Succs.front().getSUnit();
1401 // Don't constrain non-instruction nodes.
1402 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1403 continue;
1404 // Don't constrain nodes with physical register defs if the
1405 // predecessor can clobber them.
1406 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1407 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1408 continue;
1410 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1411 // these may be coalesced away. We want them close to their uses.
1412 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1413 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
1414 SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
1415 SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
1416 continue;
1417 if ((!canClobber(SuccSU, DUSU) ||
1418 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1419 (!SU->isCommutable && SuccSU->isCommutable)) &&
1420 !scheduleDAG->IsReachable(SuccSU, SU)) {
1421 DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
1422 << " to SU #" << SuccSU->NodeNum << "\n";
1423 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1424 /*Reg=*/0, /*isNormalMemory=*/false,
1425 /*isMustAlias=*/false,
1426 /*isArtificial=*/true));
1433 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1434 /// scheduling units.
1435 template<class SF>
1436 void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1437 SethiUllmanNumbers.assign(SUnits->size(), 0);
1439 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1440 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1443 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1444 /// predecessors of the successors of the SUnit SU. Stop when the provided
1445 /// limit is exceeded.
1446 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1447 unsigned Limit) {
1448 unsigned Sum = 0;
1449 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1450 I != E; ++I) {
1451 const SUnit *SuccSU = I->getSUnit();
1452 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1453 EE = SuccSU->Preds.end(); II != EE; ++II) {
1454 SUnit *PredSU = II->getSUnit();
1455 if (!PredSU->isScheduled)
1456 if (++Sum > Limit)
1457 return Sum;
1460 return Sum;
1464 // Top down
1465 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1466 unsigned LPriority = SPQ->getNodePriority(left);
1467 unsigned RPriority = SPQ->getNodePriority(right);
1468 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1469 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1470 bool LIsFloater = LIsTarget && left->NumPreds == 0;
1471 bool RIsFloater = RIsTarget && right->NumPreds == 0;
1472 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1473 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1475 if (left->NumSuccs == 0 && right->NumSuccs != 0)
1476 return false;
1477 else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1478 return true;
1480 if (LIsFloater)
1481 LBonus -= 2;
1482 if (RIsFloater)
1483 RBonus -= 2;
1484 if (left->NumSuccs == 1)
1485 LBonus += 2;
1486 if (right->NumSuccs == 1)
1487 RBonus += 2;
1489 if (LPriority+LBonus != RPriority+RBonus)
1490 return LPriority+LBonus < RPriority+RBonus;
1492 if (left->getDepth() != right->getDepth())
1493 return left->getDepth() < right->getDepth();
1495 if (left->NumSuccsLeft != right->NumSuccsLeft)
1496 return left->NumSuccsLeft > right->NumSuccsLeft;
1498 assert(left->NodeQueueId && right->NodeQueueId &&
1499 "NodeQueueId cannot be zero");
1500 return (left->NodeQueueId > right->NodeQueueId);
1503 //===----------------------------------------------------------------------===//
1504 // Public Constructor Functions
1505 //===----------------------------------------------------------------------===//
1507 llvm::ScheduleDAGSDNodes *
1508 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1509 const TargetMachine &TM = IS->TM;
1510 const TargetInstrInfo *TII = TM.getInstrInfo();
1511 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1513 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1515 ScheduleDAGRRList *SD =
1516 new ScheduleDAGRRList(*IS->MF, true, PQ);
1517 PQ->setScheduleDAG(SD);
1518 return SD;
1521 llvm::ScheduleDAGSDNodes *
1522 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1523 const TargetMachine &TM = IS->TM;
1524 const TargetInstrInfo *TII = TM.getInstrInfo();
1525 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1527 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1529 ScheduleDAGRRList *SD =
1530 new ScheduleDAGRRList(*IS->MF, false, PQ);
1531 PQ->setScheduleDAG(SD);
1532 return SD;