Indentation.
[llvm/avr.git] / lib / CodeGen / SelectionDAG / ScheduleDAGRRList.cpp
blob26da246c412af2f72ceb381ab0229f5691662946
1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/CodeGen/SchedulerRegistry.h"
21 #include "llvm/CodeGen/SelectionDAGISel.h"
22 #include "llvm/Target/TargetRegisterInfo.h"
23 #include "llvm/Target/TargetData.h"
24 #include "llvm/Target/TargetMachine.h"
25 #include "llvm/Target/TargetInstrInfo.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include <climits>
34 using namespace llvm;
36 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
37 STATISTIC(NumUnfolds, "Number of nodes unfolded");
38 STATISTIC(NumDups, "Number of duplicated nodes");
39 STATISTIC(NumPRCopies, "Number of physical register copies");
41 static RegisterScheduler
42 burrListDAGScheduler("list-burr",
43 "Bottom-up register reduction list scheduling",
44 createBURRListDAGScheduler);
45 static RegisterScheduler
46 tdrListrDAGScheduler("list-tdrr",
47 "Top-down register reduction list scheduling",
48 createTDRRListDAGScheduler);
50 namespace {
51 //===----------------------------------------------------------------------===//
52 /// ScheduleDAGRRList - The actual register reduction list scheduler
53 /// implementation. This supports both top-down and bottom-up scheduling.
54 ///
55 class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
56 private:
57 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
58 /// it is top-down.
59 bool isBottomUp;
61 /// AvailableQueue - The priority queue to use for the available SUnits.
62 SchedulingPriorityQueue *AvailableQueue;
64 /// LiveRegDefs - A set of physical registers and their definition
65 /// that are "live". These nodes must be scheduled before any other nodes that
66 /// modifies the registers can be scheduled.
67 unsigned NumLiveRegs;
68 std::vector<SUnit*> LiveRegDefs;
69 std::vector<unsigned> LiveRegCycles;
71 /// Topo - A topological ordering for SUnits which permits fast IsReachable
72 /// and similar queries.
73 ScheduleDAGTopologicalSort Topo;
75 public:
76 ScheduleDAGRRList(MachineFunction &mf,
77 bool isbottomup,
78 SchedulingPriorityQueue *availqueue)
79 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
80 AvailableQueue(availqueue), Topo(SUnits) {
83 ~ScheduleDAGRRList() {
84 delete AvailableQueue;
87 void Schedule();
89 /// IsReachable - Checks if SU is reachable from TargetSU.
90 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
91 return Topo.IsReachable(SU, TargetSU);
94 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
95 /// create a cycle.
96 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
97 return Topo.WillCreateCycle(SU, TargetSU);
100 /// AddPred - adds a predecessor edge to SUnit SU.
101 /// This returns true if this is a new predecessor.
102 /// Updates the topological ordering if required.
103 void AddPred(SUnit *SU, const SDep &D) {
104 Topo.AddPred(SU, D.getSUnit());
105 SU->addPred(D);
108 /// RemovePred - removes a predecessor edge from SUnit SU.
109 /// This returns true if an edge was removed.
110 /// Updates the topological ordering if required.
111 void RemovePred(SUnit *SU, const SDep &D) {
112 Topo.RemovePred(SU, D.getSUnit());
113 SU->removePred(D);
116 private:
117 void ReleasePred(SUnit *SU, const SDep *PredEdge);
118 void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
119 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
120 void ReleaseSuccessors(SUnit *SU);
121 void CapturePred(SDep *PredEdge);
122 void ScheduleNodeBottomUp(SUnit*, unsigned);
123 void ScheduleNodeTopDown(SUnit*, unsigned);
124 void UnscheduleNodeBottomUp(SUnit*);
125 void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
126 SUnit *CopyAndMoveSuccessors(SUnit*);
127 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
128 const TargetRegisterClass*,
129 const TargetRegisterClass*,
130 SmallVector<SUnit*, 2>&);
131 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
132 void ListScheduleTopDown();
133 void ListScheduleBottomUp();
136 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
137 /// Updates the topological ordering if required.
138 SUnit *CreateNewSUnit(SDNode *N) {
139 unsigned NumSUnits = SUnits.size();
140 SUnit *NewNode = NewSUnit(N);
141 // Update the topological ordering.
142 if (NewNode->NodeNum >= NumSUnits)
143 Topo.InitDAGTopologicalSorting();
144 return NewNode;
147 /// CreateClone - Creates a new SUnit from an existing one.
148 /// Updates the topological ordering if required.
149 SUnit *CreateClone(SUnit *N) {
150 unsigned NumSUnits = SUnits.size();
151 SUnit *NewNode = Clone(N);
152 // Update the topological ordering.
153 if (NewNode->NodeNum >= NumSUnits)
154 Topo.InitDAGTopologicalSorting();
155 return NewNode;
158 /// ForceUnitLatencies - Return true, since register-pressure-reducing
159 /// scheduling doesn't need actual latency information.
160 bool ForceUnitLatencies() const { return true; }
162 } // end anonymous namespace
165 /// Schedule - Schedule the DAG using list scheduling.
166 void ScheduleDAGRRList::Schedule() {
167 DOUT << "********** List Scheduling **********\n";
169 NumLiveRegs = 0;
170 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
171 LiveRegCycles.resize(TRI->getNumRegs(), 0);
173 // Build the scheduling graph.
174 BuildSchedGraph();
176 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
177 SUnits[su].dumpAll(this));
178 Topo.InitDAGTopologicalSorting();
180 AvailableQueue->initNodes(SUnits);
182 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
183 if (isBottomUp)
184 ListScheduleBottomUp();
185 else
186 ListScheduleTopDown();
188 AvailableQueue->releaseState();
191 //===----------------------------------------------------------------------===//
192 // Bottom-Up Scheduling
193 //===----------------------------------------------------------------------===//
195 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
196 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
197 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
198 SUnit *PredSU = PredEdge->getSUnit();
199 --PredSU->NumSuccsLeft;
201 #ifndef NDEBUG
202 if (PredSU->NumSuccsLeft < 0) {
203 cerr << "*** Scheduling failed! ***\n";
204 PredSU->dump(this);
205 cerr << " has been released too many times!\n";
206 llvm_unreachable(0);
208 #endif
210 // If all the node's successors are scheduled, this node is ready
211 // to be scheduled. Ignore the special EntrySU node.
212 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
213 PredSU->isAvailable = true;
214 AvailableQueue->push(PredSU);
218 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
219 // Bottom up: release predecessors
220 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
221 I != E; ++I) {
222 ReleasePred(SU, &*I);
223 if (I->isAssignedRegDep()) {
224 // This is a physical register dependency and it's impossible or
225 // expensive to copy the register. Make sure nothing that can
226 // clobber the register is scheduled between the predecessor and
227 // this node.
228 if (!LiveRegDefs[I->getReg()]) {
229 ++NumLiveRegs;
230 LiveRegDefs[I->getReg()] = I->getSUnit();
231 LiveRegCycles[I->getReg()] = CurCycle;
237 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
238 /// count of its predecessors. If a predecessor pending count is zero, add it to
239 /// the Available queue.
240 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
241 DOUT << "*** Scheduling [" << CurCycle << "]: ";
242 DEBUG(SU->dump(this));
244 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
245 SU->setHeightToAtLeast(CurCycle);
246 Sequence.push_back(SU);
248 ReleasePredecessors(SU, CurCycle);
250 // Release all the implicit physical register defs that are live.
251 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
252 I != E; ++I) {
253 if (I->isAssignedRegDep()) {
254 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
255 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
256 assert(LiveRegDefs[I->getReg()] == SU &&
257 "Physical register dependency violated?");
258 --NumLiveRegs;
259 LiveRegDefs[I->getReg()] = NULL;
260 LiveRegCycles[I->getReg()] = 0;
265 SU->isScheduled = true;
266 AvailableQueue->ScheduledNode(SU);
269 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
270 /// unscheduled, incrcease the succ left count of its predecessors. Remove
271 /// them from AvailableQueue if necessary.
272 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
273 SUnit *PredSU = PredEdge->getSUnit();
274 if (PredSU->isAvailable) {
275 PredSU->isAvailable = false;
276 if (!PredSU->isPending)
277 AvailableQueue->remove(PredSU);
280 ++PredSU->NumSuccsLeft;
283 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
284 /// its predecessor states to reflect the change.
285 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
286 DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
287 DEBUG(SU->dump(this));
289 AvailableQueue->UnscheduledNode(SU);
291 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
292 I != E; ++I) {
293 CapturePred(&*I);
294 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
295 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
296 assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
297 "Physical register dependency violated?");
298 --NumLiveRegs;
299 LiveRegDefs[I->getReg()] = NULL;
300 LiveRegCycles[I->getReg()] = 0;
304 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
305 I != E; ++I) {
306 if (I->isAssignedRegDep()) {
307 if (!LiveRegDefs[I->getReg()]) {
308 LiveRegDefs[I->getReg()] = SU;
309 ++NumLiveRegs;
311 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
312 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
316 SU->setHeightDirty();
317 SU->isScheduled = false;
318 SU->isAvailable = true;
319 AvailableQueue->push(SU);
322 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
323 /// BTCycle in order to schedule a specific node.
324 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
325 unsigned &CurCycle) {
326 SUnit *OldSU = NULL;
327 while (CurCycle > BtCycle) {
328 OldSU = Sequence.back();
329 Sequence.pop_back();
330 if (SU->isSucc(OldSU))
331 // Don't try to remove SU from AvailableQueue.
332 SU->isAvailable = false;
333 UnscheduleNodeBottomUp(OldSU);
334 --CurCycle;
337 assert(!SU->isSucc(OldSU) && "Something is wrong!");
339 ++NumBacktracks;
342 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
343 /// successors to the newly created node.
344 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
345 if (SU->getNode()->getFlaggedNode())
346 return NULL;
348 SDNode *N = SU->getNode();
349 if (!N)
350 return NULL;
352 SUnit *NewSU;
353 bool TryUnfold = false;
354 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
355 EVT VT = N->getValueType(i);
356 if (VT == MVT::Flag)
357 return NULL;
358 else if (VT == MVT::Other)
359 TryUnfold = true;
361 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
362 const SDValue &Op = N->getOperand(i);
363 EVT VT = Op.getNode()->getValueType(Op.getResNo());
364 if (VT == MVT::Flag)
365 return NULL;
368 if (TryUnfold) {
369 SmallVector<SDNode*, 2> NewNodes;
370 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
371 return NULL;
373 DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
374 assert(NewNodes.size() == 2 && "Expected a load folding node!");
376 N = NewNodes[1];
377 SDNode *LoadNode = NewNodes[0];
378 unsigned NumVals = N->getNumValues();
379 unsigned OldNumVals = SU->getNode()->getNumValues();
380 for (unsigned i = 0; i != NumVals; ++i)
381 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
382 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
383 SDValue(LoadNode, 1));
385 // LoadNode may already exist. This can happen when there is another
386 // load from the same location and producing the same type of value
387 // but it has different alignment or volatileness.
388 bool isNewLoad = true;
389 SUnit *LoadSU;
390 if (LoadNode->getNodeId() != -1) {
391 LoadSU = &SUnits[LoadNode->getNodeId()];
392 isNewLoad = false;
393 } else {
394 LoadSU = CreateNewSUnit(LoadNode);
395 LoadNode->setNodeId(LoadSU->NodeNum);
396 ComputeLatency(LoadSU);
399 SUnit *NewSU = CreateNewSUnit(N);
400 assert(N->getNodeId() == -1 && "Node already inserted!");
401 N->setNodeId(NewSU->NodeNum);
403 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
404 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
405 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
406 NewSU->isTwoAddress = true;
407 break;
410 if (TID.isCommutable())
411 NewSU->isCommutable = true;
412 ComputeLatency(NewSU);
414 // Record all the edges to and from the old SU, by category.
415 SmallVector<SDep, 4> ChainPreds;
416 SmallVector<SDep, 4> ChainSuccs;
417 SmallVector<SDep, 4> LoadPreds;
418 SmallVector<SDep, 4> NodePreds;
419 SmallVector<SDep, 4> NodeSuccs;
420 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
421 I != E; ++I) {
422 if (I->isCtrl())
423 ChainPreds.push_back(*I);
424 else if (I->getSUnit()->getNode() &&
425 I->getSUnit()->getNode()->isOperandOf(LoadNode))
426 LoadPreds.push_back(*I);
427 else
428 NodePreds.push_back(*I);
430 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
431 I != E; ++I) {
432 if (I->isCtrl())
433 ChainSuccs.push_back(*I);
434 else
435 NodeSuccs.push_back(*I);
438 // Now assign edges to the newly-created nodes.
439 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
440 const SDep &Pred = ChainPreds[i];
441 RemovePred(SU, Pred);
442 if (isNewLoad)
443 AddPred(LoadSU, Pred);
445 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
446 const SDep &Pred = LoadPreds[i];
447 RemovePred(SU, Pred);
448 if (isNewLoad)
449 AddPred(LoadSU, Pred);
451 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
452 const SDep &Pred = NodePreds[i];
453 RemovePred(SU, Pred);
454 AddPred(NewSU, Pred);
456 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
457 SDep D = NodeSuccs[i];
458 SUnit *SuccDep = D.getSUnit();
459 D.setSUnit(SU);
460 RemovePred(SuccDep, D);
461 D.setSUnit(NewSU);
462 AddPred(SuccDep, D);
464 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
465 SDep D = ChainSuccs[i];
466 SUnit *SuccDep = D.getSUnit();
467 D.setSUnit(SU);
468 RemovePred(SuccDep, D);
469 if (isNewLoad) {
470 D.setSUnit(LoadSU);
471 AddPred(SuccDep, D);
475 // Add a data dependency to reflect that NewSU reads the value defined
476 // by LoadSU.
477 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
479 if (isNewLoad)
480 AvailableQueue->addNode(LoadSU);
481 AvailableQueue->addNode(NewSU);
483 ++NumUnfolds;
485 if (NewSU->NumSuccsLeft == 0) {
486 NewSU->isAvailable = true;
487 return NewSU;
489 SU = NewSU;
492 DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
493 NewSU = CreateClone(SU);
495 // New SUnit has the exact same predecessors.
496 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
497 I != E; ++I)
498 if (!I->isArtificial())
499 AddPred(NewSU, *I);
501 // Only copy scheduled successors. Cut them from old node's successor
502 // list and move them over.
503 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
504 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
505 I != E; ++I) {
506 if (I->isArtificial())
507 continue;
508 SUnit *SuccSU = I->getSUnit();
509 if (SuccSU->isScheduled) {
510 SDep D = *I;
511 D.setSUnit(NewSU);
512 AddPred(SuccSU, D);
513 D.setSUnit(SU);
514 DelDeps.push_back(std::make_pair(SuccSU, D));
517 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
518 RemovePred(DelDeps[i].first, DelDeps[i].second);
520 AvailableQueue->updateNode(SU);
521 AvailableQueue->addNode(NewSU);
523 ++NumDups;
524 return NewSU;
527 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
528 /// scheduled successors of the given SUnit to the last copy.
529 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
530 const TargetRegisterClass *DestRC,
531 const TargetRegisterClass *SrcRC,
532 SmallVector<SUnit*, 2> &Copies) {
533 SUnit *CopyFromSU = CreateNewSUnit(NULL);
534 CopyFromSU->CopySrcRC = SrcRC;
535 CopyFromSU->CopyDstRC = DestRC;
537 SUnit *CopyToSU = CreateNewSUnit(NULL);
538 CopyToSU->CopySrcRC = DestRC;
539 CopyToSU->CopyDstRC = SrcRC;
541 // Only copy scheduled successors. Cut them from old node's successor
542 // list and move them over.
543 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
544 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
545 I != E; ++I) {
546 if (I->isArtificial())
547 continue;
548 SUnit *SuccSU = I->getSUnit();
549 if (SuccSU->isScheduled) {
550 SDep D = *I;
551 D.setSUnit(CopyToSU);
552 AddPred(SuccSU, D);
553 DelDeps.push_back(std::make_pair(SuccSU, *I));
556 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
557 RemovePred(DelDeps[i].first, DelDeps[i].second);
559 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
560 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
562 AvailableQueue->updateNode(SU);
563 AvailableQueue->addNode(CopyFromSU);
564 AvailableQueue->addNode(CopyToSU);
565 Copies.push_back(CopyFromSU);
566 Copies.push_back(CopyToSU);
568 ++NumPRCopies;
571 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
572 /// definition of the specified node.
573 /// FIXME: Move to SelectionDAG?
574 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
575 const TargetInstrInfo *TII) {
576 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
577 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
578 unsigned NumRes = TID.getNumDefs();
579 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
580 if (Reg == *ImpDef)
581 break;
582 ++NumRes;
584 return N->getValueType(NumRes);
587 /// CheckForLiveRegDef - Return true and update live register vector if the
588 /// specified register def of the specified SUnit clobbers any "live" registers.
589 static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
590 std::vector<SUnit*> &LiveRegDefs,
591 SmallSet<unsigned, 4> &RegAdded,
592 SmallVector<unsigned, 4> &LRegs,
593 const TargetRegisterInfo *TRI) {
594 bool Added = false;
595 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
596 if (RegAdded.insert(Reg)) {
597 LRegs.push_back(Reg);
598 Added = true;
601 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
602 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
603 if (RegAdded.insert(*Alias)) {
604 LRegs.push_back(*Alias);
605 Added = true;
608 return Added;
611 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
612 /// scheduling of the given node to satisfy live physical register dependencies.
613 /// If the specific node is the last one that's available to schedule, do
614 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
615 bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
616 SmallVector<unsigned, 4> &LRegs){
617 if (NumLiveRegs == 0)
618 return false;
620 SmallSet<unsigned, 4> RegAdded;
621 // If this node would clobber any "live" register, then it's not ready.
622 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
623 I != E; ++I) {
624 if (I->isAssignedRegDep())
625 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
626 RegAdded, LRegs, TRI);
629 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
630 if (Node->getOpcode() == ISD::INLINEASM) {
631 // Inline asm can clobber physical defs.
632 unsigned NumOps = Node->getNumOperands();
633 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
634 --NumOps; // Ignore the flag operand.
636 for (unsigned i = 2; i != NumOps;) {
637 unsigned Flags =
638 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
639 unsigned NumVals = (Flags & 0xffff) >> 3;
641 ++i; // Skip the ID value.
642 if ((Flags & 7) == 2 || (Flags & 7) == 6) {
643 // Check for def of register or earlyclobber register.
644 for (; NumVals; --NumVals, ++i) {
645 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
646 if (TargetRegisterInfo::isPhysicalRegister(Reg))
647 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
649 } else
650 i += NumVals;
652 continue;
655 if (!Node->isMachineOpcode())
656 continue;
657 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
658 if (!TID.ImplicitDefs)
659 continue;
660 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
661 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
663 return !LRegs.empty();
667 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
668 /// schedulers.
669 void ScheduleDAGRRList::ListScheduleBottomUp() {
670 unsigned CurCycle = 0;
672 // Release any predecessors of the special Exit node.
673 ReleasePredecessors(&ExitSU, CurCycle);
675 // Add root to Available queue.
676 if (!SUnits.empty()) {
677 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
678 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
679 RootSU->isAvailable = true;
680 AvailableQueue->push(RootSU);
683 // While Available queue is not empty, grab the node with the highest
684 // priority. If it is not ready put it back. Schedule the node.
685 SmallVector<SUnit*, 4> NotReady;
686 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
687 Sequence.reserve(SUnits.size());
688 while (!AvailableQueue->empty()) {
689 bool Delayed = false;
690 LRegsMap.clear();
691 SUnit *CurSU = AvailableQueue->pop();
692 while (CurSU) {
693 SmallVector<unsigned, 4> LRegs;
694 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
695 break;
696 Delayed = true;
697 LRegsMap.insert(std::make_pair(CurSU, LRegs));
699 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
700 NotReady.push_back(CurSU);
701 CurSU = AvailableQueue->pop();
704 // All candidates are delayed due to live physical reg dependencies.
705 // Try backtracking, code duplication, or inserting cross class copies
706 // to resolve it.
707 if (Delayed && !CurSU) {
708 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
709 SUnit *TrySU = NotReady[i];
710 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
712 // Try unscheduling up to the point where it's safe to schedule
713 // this node.
714 unsigned LiveCycle = CurCycle;
715 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
716 unsigned Reg = LRegs[j];
717 unsigned LCycle = LiveRegCycles[Reg];
718 LiveCycle = std::min(LiveCycle, LCycle);
720 SUnit *OldSU = Sequence[LiveCycle];
721 if (!WillCreateCycle(TrySU, OldSU)) {
722 BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
723 // Force the current node to be scheduled before the node that
724 // requires the physical reg dep.
725 if (OldSU->isAvailable) {
726 OldSU->isAvailable = false;
727 AvailableQueue->remove(OldSU);
729 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
730 /*Reg=*/0, /*isNormalMemory=*/false,
731 /*isMustAlias=*/false, /*isArtificial=*/true));
732 // If one or more successors has been unscheduled, then the current
733 // node is no longer avaialable. Schedule a successor that's now
734 // available instead.
735 if (!TrySU->isAvailable)
736 CurSU = AvailableQueue->pop();
737 else {
738 CurSU = TrySU;
739 TrySU->isPending = false;
740 NotReady.erase(NotReady.begin()+i);
742 break;
746 if (!CurSU) {
747 // Can't backtrack. If it's too expensive to copy the value, then try
748 // duplicate the nodes that produces these "too expensive to copy"
749 // values to break the dependency. In case even that doesn't work,
750 // insert cross class copies.
751 // If it's not too expensive, i.e. cost != -1, issue copies.
752 SUnit *TrySU = NotReady[0];
753 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
754 assert(LRegs.size() == 1 && "Can't handle this yet!");
755 unsigned Reg = LRegs[0];
756 SUnit *LRDef = LiveRegDefs[Reg];
757 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
758 const TargetRegisterClass *RC =
759 TRI->getPhysicalRegisterRegClass(Reg, VT);
760 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
762 // If cross copy register class is null, then it must be possible copy
763 // the value directly. Do not try duplicate the def.
764 SUnit *NewDef = 0;
765 if (DestRC)
766 NewDef = CopyAndMoveSuccessors(LRDef);
767 else
768 DestRC = RC;
769 if (!NewDef) {
770 // Issue copies, these can be expensive cross register class copies.
771 SmallVector<SUnit*, 2> Copies;
772 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
773 DOUT << "Adding an edge from SU #" << TrySU->NodeNum
774 << " to SU #" << Copies.front()->NodeNum << "\n";
775 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
776 /*Reg=*/0, /*isNormalMemory=*/false,
777 /*isMustAlias=*/false,
778 /*isArtificial=*/true));
779 NewDef = Copies.back();
782 DOUT << "Adding an edge from SU #" << NewDef->NodeNum
783 << " to SU #" << TrySU->NodeNum << "\n";
784 LiveRegDefs[Reg] = NewDef;
785 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
786 /*Reg=*/0, /*isNormalMemory=*/false,
787 /*isMustAlias=*/false,
788 /*isArtificial=*/true));
789 TrySU->isAvailable = false;
790 CurSU = NewDef;
793 assert(CurSU && "Unable to resolve live physical register dependencies!");
796 // Add the nodes that aren't ready back onto the available list.
797 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
798 NotReady[i]->isPending = false;
799 // May no longer be available due to backtracking.
800 if (NotReady[i]->isAvailable)
801 AvailableQueue->push(NotReady[i]);
803 NotReady.clear();
805 if (CurSU)
806 ScheduleNodeBottomUp(CurSU, CurCycle);
807 ++CurCycle;
810 // Reverse the order if it is bottom up.
811 std::reverse(Sequence.begin(), Sequence.end());
813 #ifndef NDEBUG
814 VerifySchedule(isBottomUp);
815 #endif
818 //===----------------------------------------------------------------------===//
819 // Top-Down Scheduling
820 //===----------------------------------------------------------------------===//
822 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
823 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
824 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
825 SUnit *SuccSU = SuccEdge->getSUnit();
826 --SuccSU->NumPredsLeft;
828 #ifndef NDEBUG
829 if (SuccSU->NumPredsLeft < 0) {
830 cerr << "*** Scheduling failed! ***\n";
831 SuccSU->dump(this);
832 cerr << " has been released too many times!\n";
833 llvm_unreachable(0);
835 #endif
837 // If all the node's predecessors are scheduled, this node is ready
838 // to be scheduled. Ignore the special ExitSU node.
839 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
840 SuccSU->isAvailable = true;
841 AvailableQueue->push(SuccSU);
845 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
846 // Top down: release successors
847 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
848 I != E; ++I) {
849 assert(!I->isAssignedRegDep() &&
850 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
852 ReleaseSucc(SU, &*I);
856 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
857 /// count of its successors. If a successor pending count is zero, add it to
858 /// the Available queue.
859 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
860 DOUT << "*** Scheduling [" << CurCycle << "]: ";
861 DEBUG(SU->dump(this));
863 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
864 SU->setDepthToAtLeast(CurCycle);
865 Sequence.push_back(SU);
867 ReleaseSuccessors(SU);
868 SU->isScheduled = true;
869 AvailableQueue->ScheduledNode(SU);
872 /// ListScheduleTopDown - The main loop of list scheduling for top-down
873 /// schedulers.
874 void ScheduleDAGRRList::ListScheduleTopDown() {
875 unsigned CurCycle = 0;
877 // Release any successors of the special Entry node.
878 ReleaseSuccessors(&EntrySU);
880 // All leaves to Available queue.
881 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
882 // It is available if it has no predecessors.
883 if (SUnits[i].Preds.empty()) {
884 AvailableQueue->push(&SUnits[i]);
885 SUnits[i].isAvailable = true;
889 // While Available queue is not empty, grab the node with the highest
890 // priority. If it is not ready put it back. Schedule the node.
891 Sequence.reserve(SUnits.size());
892 while (!AvailableQueue->empty()) {
893 SUnit *CurSU = AvailableQueue->pop();
895 if (CurSU)
896 ScheduleNodeTopDown(CurSU, CurCycle);
897 ++CurCycle;
900 #ifndef NDEBUG
901 VerifySchedule(isBottomUp);
902 #endif
906 //===----------------------------------------------------------------------===//
907 // RegReductionPriorityQueue Implementation
908 //===----------------------------------------------------------------------===//
910 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
911 // to reduce register pressure.
913 namespace {
914 template<class SF>
915 class RegReductionPriorityQueue;
917 /// Sorting functions for the Available queue.
918 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
919 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
920 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
921 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
923 bool operator()(const SUnit* left, const SUnit* right) const;
926 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
927 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
928 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
929 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
931 bool operator()(const SUnit* left, const SUnit* right) const;
933 } // end anonymous namespace
935 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
936 /// Smaller number is the higher priority.
937 static unsigned
938 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
939 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
940 if (SethiUllmanNumber != 0)
941 return SethiUllmanNumber;
943 unsigned Extra = 0;
944 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
945 I != E; ++I) {
946 if (I->isCtrl()) continue; // ignore chain preds
947 SUnit *PredSU = I->getSUnit();
948 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
949 if (PredSethiUllman > SethiUllmanNumber) {
950 SethiUllmanNumber = PredSethiUllman;
951 Extra = 0;
952 } else if (PredSethiUllman == SethiUllmanNumber)
953 ++Extra;
956 SethiUllmanNumber += Extra;
958 if (SethiUllmanNumber == 0)
959 SethiUllmanNumber = 1;
961 return SethiUllmanNumber;
964 namespace {
965 template<class SF>
966 class VISIBILITY_HIDDEN RegReductionPriorityQueue
967 : public SchedulingPriorityQueue {
968 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
969 unsigned currentQueueId;
971 protected:
972 // SUnits - The SUnits for the current graph.
973 std::vector<SUnit> *SUnits;
975 const TargetInstrInfo *TII;
976 const TargetRegisterInfo *TRI;
977 ScheduleDAGRRList *scheduleDAG;
979 // SethiUllmanNumbers - The SethiUllman number for each node.
980 std::vector<unsigned> SethiUllmanNumbers;
982 public:
983 RegReductionPriorityQueue(const TargetInstrInfo *tii,
984 const TargetRegisterInfo *tri) :
985 Queue(SF(this)), currentQueueId(0),
986 TII(tii), TRI(tri), scheduleDAG(NULL) {}
988 void initNodes(std::vector<SUnit> &sunits) {
989 SUnits = &sunits;
990 // Add pseudo dependency edges for two-address nodes.
991 AddPseudoTwoAddrDeps();
992 // Reroute edges to nodes with multiple uses.
993 PrescheduleNodesWithMultipleUses();
994 // Calculate node priorities.
995 CalculateSethiUllmanNumbers();
998 void addNode(const SUnit *SU) {
999 unsigned SUSize = SethiUllmanNumbers.size();
1000 if (SUnits->size() > SUSize)
1001 SethiUllmanNumbers.resize(SUSize*2, 0);
1002 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1005 void updateNode(const SUnit *SU) {
1006 SethiUllmanNumbers[SU->NodeNum] = 0;
1007 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1010 void releaseState() {
1011 SUnits = 0;
1012 SethiUllmanNumbers.clear();
1015 unsigned getNodePriority(const SUnit *SU) const {
1016 assert(SU->NodeNum < SethiUllmanNumbers.size());
1017 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1018 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1019 // CopyToReg should be close to its uses to facilitate coalescing and
1020 // avoid spilling.
1021 return 0;
1022 if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
1023 Opc == TargetInstrInfo::SUBREG_TO_REG ||
1024 Opc == TargetInstrInfo::INSERT_SUBREG)
1025 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1026 // close to their uses to facilitate coalescing.
1027 return 0;
1028 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1029 // If SU does not have a register use, i.e. it doesn't produce a value
1030 // that would be consumed (e.g. store), then it terminates a chain of
1031 // computation. Give it a large SethiUllman number so it will be
1032 // scheduled right before its predecessors that it doesn't lengthen
1033 // their live ranges.
1034 return 0xffff;
1035 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1036 // If SU does not have a register def, schedule it close to its uses
1037 // because it does not lengthen any live ranges.
1038 return 0;
1039 return SethiUllmanNumbers[SU->NodeNum];
1042 unsigned size() const { return Queue.size(); }
1044 bool empty() const { return Queue.empty(); }
1046 void push(SUnit *U) {
1047 assert(!U->NodeQueueId && "Node in the queue already");
1048 U->NodeQueueId = ++currentQueueId;
1049 Queue.push(U);
1052 void push_all(const std::vector<SUnit *> &Nodes) {
1053 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1054 push(Nodes[i]);
1057 SUnit *pop() {
1058 if (empty()) return NULL;
1059 SUnit *V = Queue.top();
1060 Queue.pop();
1061 V->NodeQueueId = 0;
1062 return V;
1065 void remove(SUnit *SU) {
1066 assert(!Queue.empty() && "Queue is empty!");
1067 assert(SU->NodeQueueId != 0 && "Not in queue!");
1068 Queue.erase_one(SU);
1069 SU->NodeQueueId = 0;
1072 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1073 scheduleDAG = scheduleDag;
1076 protected:
1077 bool canClobber(const SUnit *SU, const SUnit *Op);
1078 void AddPseudoTwoAddrDeps();
1079 void PrescheduleNodesWithMultipleUses();
1080 void CalculateSethiUllmanNumbers();
1083 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1084 BURegReductionPriorityQueue;
1086 typedef RegReductionPriorityQueue<td_ls_rr_sort>
1087 TDRegReductionPriorityQueue;
1090 /// closestSucc - Returns the scheduled cycle of the successor which is
1091 /// closest to the current cycle.
1092 static unsigned closestSucc(const SUnit *SU) {
1093 unsigned MaxHeight = 0;
1094 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1095 I != E; ++I) {
1096 if (I->isCtrl()) continue; // ignore chain succs
1097 unsigned Height = I->getSUnit()->getHeight();
1098 // If there are bunch of CopyToRegs stacked up, they should be considered
1099 // to be at the same position.
1100 if (I->getSUnit()->getNode() &&
1101 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1102 Height = closestSucc(I->getSUnit())+1;
1103 if (Height > MaxHeight)
1104 MaxHeight = Height;
1106 return MaxHeight;
1109 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1110 /// for scratch registers, i.e. number of data dependencies.
1111 static unsigned calcMaxScratches(const SUnit *SU) {
1112 unsigned Scratches = 0;
1113 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1114 I != E; ++I) {
1115 if (I->isCtrl()) continue; // ignore chain preds
1116 Scratches++;
1118 return Scratches;
1121 // Bottom up
1122 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1123 unsigned LPriority = SPQ->getNodePriority(left);
1124 unsigned RPriority = SPQ->getNodePriority(right);
1125 if (LPriority != RPriority)
1126 return LPriority > RPriority;
1128 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1129 // e.g.
1130 // t1 = op t2, c1
1131 // t3 = op t4, c2
1133 // and the following instructions are both ready.
1134 // t2 = op c3
1135 // t4 = op c4
1137 // Then schedule t2 = op first.
1138 // i.e.
1139 // t4 = op c4
1140 // t2 = op c3
1141 // t1 = op t2, c1
1142 // t3 = op t4, c2
1144 // This creates more short live intervals.
1145 unsigned LDist = closestSucc(left);
1146 unsigned RDist = closestSucc(right);
1147 if (LDist != RDist)
1148 return LDist < RDist;
1150 // How many registers becomes live when the node is scheduled.
1151 unsigned LScratch = calcMaxScratches(left);
1152 unsigned RScratch = calcMaxScratches(right);
1153 if (LScratch != RScratch)
1154 return LScratch > RScratch;
1156 if (left->getHeight() != right->getHeight())
1157 return left->getHeight() > right->getHeight();
1159 if (left->getDepth() != right->getDepth())
1160 return left->getDepth() < right->getDepth();
1162 assert(left->NodeQueueId && right->NodeQueueId &&
1163 "NodeQueueId cannot be zero");
1164 return (left->NodeQueueId > right->NodeQueueId);
1167 template<class SF>
1168 bool
1169 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1170 if (SU->isTwoAddress) {
1171 unsigned Opc = SU->getNode()->getMachineOpcode();
1172 const TargetInstrDesc &TID = TII->get(Opc);
1173 unsigned NumRes = TID.getNumDefs();
1174 unsigned NumOps = TID.getNumOperands() - NumRes;
1175 for (unsigned i = 0; i != NumOps; ++i) {
1176 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1177 SDNode *DU = SU->getNode()->getOperand(i).getNode();
1178 if (DU->getNodeId() != -1 &&
1179 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1180 return true;
1184 return false;
1188 /// hasCopyToRegUse - Return true if SU has a value successor that is a
1189 /// CopyToReg node.
1190 static bool hasCopyToRegUse(const SUnit *SU) {
1191 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1192 I != E; ++I) {
1193 if (I->isCtrl()) continue;
1194 const SUnit *SuccSU = I->getSUnit();
1195 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1196 return true;
1198 return false;
1201 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1202 /// physical register defs.
1203 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1204 const TargetInstrInfo *TII,
1205 const TargetRegisterInfo *TRI) {
1206 SDNode *N = SuccSU->getNode();
1207 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1208 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1209 assert(ImpDefs && "Caller should check hasPhysRegDefs");
1210 for (const SDNode *SUNode = SU->getNode(); SUNode;
1211 SUNode = SUNode->getFlaggedNode()) {
1212 if (!SUNode->isMachineOpcode())
1213 continue;
1214 const unsigned *SUImpDefs =
1215 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1216 if (!SUImpDefs)
1217 return false;
1218 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1219 EVT VT = N->getValueType(i);
1220 if (VT == MVT::Flag || VT == MVT::Other)
1221 continue;
1222 if (!N->hasAnyUseOfValue(i))
1223 continue;
1224 unsigned Reg = ImpDefs[i - NumDefs];
1225 for (;*SUImpDefs; ++SUImpDefs) {
1226 unsigned SUReg = *SUImpDefs;
1227 if (TRI->regsOverlap(Reg, SUReg))
1228 return true;
1232 return false;
1235 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1236 /// are not handled well by the general register pressure reduction
1237 /// heuristics. When presented with code like this:
1239 /// N
1240 /// / |
1241 /// / |
1242 /// U store
1243 /// |
1244 /// ...
1246 /// the heuristics tend to push the store up, but since the
1247 /// operand of the store has another use (U), this would increase
1248 /// the length of that other use (the U->N edge).
1250 /// This function transforms code like the above to route U's
1251 /// dependence through the store when possible, like this:
1253 /// N
1254 /// ||
1255 /// ||
1256 /// store
1257 /// |
1258 /// U
1259 /// |
1260 /// ...
1262 /// This results in the store being scheduled immediately
1263 /// after N, which shortens the U->N live range, reducing
1264 /// register pressure.
1266 template<class SF>
1267 void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1268 // Visit all the nodes in topological order, working top-down.
1269 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1270 SUnit *SU = &(*SUnits)[i];
1271 // For now, only look at nodes with no data successors, such as stores.
1272 // These are especially important, due to the heuristics in
1273 // getNodePriority for nodes with no data successors.
1274 if (SU->NumSuccs != 0)
1275 continue;
1276 // For now, only look at nodes with exactly one data predecessor.
1277 if (SU->NumPreds != 1)
1278 continue;
1279 // Avoid prescheduling copies to virtual registers, which don't behave
1280 // like other nodes from the perspective of scheduling heuristics.
1281 if (SDNode *N = SU->getNode())
1282 if (N->getOpcode() == ISD::CopyToReg &&
1283 TargetRegisterInfo::isVirtualRegister
1284 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1285 continue;
1287 // Locate the single data predecessor.
1288 SUnit *PredSU = 0;
1289 for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1290 EE = SU->Preds.end(); II != EE; ++II)
1291 if (!II->isCtrl()) {
1292 PredSU = II->getSUnit();
1293 break;
1295 assert(PredSU);
1297 // Don't rewrite edges that carry physregs, because that requires additional
1298 // support infrastructure.
1299 if (PredSU->hasPhysRegDefs)
1300 continue;
1301 // Short-circuit the case where SU is PredSU's only data successor.
1302 if (PredSU->NumSuccs == 1)
1303 continue;
1304 // Avoid prescheduling to copies from virtual registers, which don't behave
1305 // like other nodes from the perspective of scheduling // heuristics.
1306 if (SDNode *N = SU->getNode())
1307 if (N->getOpcode() == ISD::CopyFromReg &&
1308 TargetRegisterInfo::isVirtualRegister
1309 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1310 continue;
1312 // Perform checks on the successors of PredSU.
1313 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1314 EE = PredSU->Succs.end(); II != EE; ++II) {
1315 SUnit *PredSuccSU = II->getSUnit();
1316 if (PredSuccSU == SU) continue;
1317 // If PredSU has another successor with no data successors, for
1318 // now don't attempt to choose either over the other.
1319 if (PredSuccSU->NumSuccs == 0)
1320 goto outer_loop_continue;
1321 // Don't break physical register dependencies.
1322 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1323 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1324 goto outer_loop_continue;
1325 // Don't introduce graph cycles.
1326 if (scheduleDAG->IsReachable(SU, PredSuccSU))
1327 goto outer_loop_continue;
1330 // Ok, the transformation is safe and the heuristics suggest it is
1331 // profitable. Update the graph.
1332 DOUT << "Prescheduling SU # " << SU->NodeNum
1333 << " next to PredSU # " << PredSU->NodeNum
1334 << " to guide scheduling in the presence of multiple uses\n";
1335 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1336 SDep Edge = PredSU->Succs[i];
1337 assert(!Edge.isAssignedRegDep());
1338 SUnit *SuccSU = Edge.getSUnit();
1339 if (SuccSU != SU) {
1340 Edge.setSUnit(PredSU);
1341 scheduleDAG->RemovePred(SuccSU, Edge);
1342 scheduleDAG->AddPred(SU, Edge);
1343 Edge.setSUnit(SU);
1344 scheduleDAG->AddPred(SuccSU, Edge);
1345 --i;
1348 outer_loop_continue:;
1352 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1353 /// it as a def&use operand. Add a pseudo control edge from it to the other
1354 /// node (if it won't create a cycle) so the two-address one will be scheduled
1355 /// first (lower in the schedule). If both nodes are two-address, favor the
1356 /// one that has a CopyToReg use (more likely to be a loop induction update).
1357 /// If both are two-address, but one is commutable while the other is not
1358 /// commutable, favor the one that's not commutable.
1359 template<class SF>
1360 void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1361 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1362 SUnit *SU = &(*SUnits)[i];
1363 if (!SU->isTwoAddress)
1364 continue;
1366 SDNode *Node = SU->getNode();
1367 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1368 continue;
1370 unsigned Opc = Node->getMachineOpcode();
1371 const TargetInstrDesc &TID = TII->get(Opc);
1372 unsigned NumRes = TID.getNumDefs();
1373 unsigned NumOps = TID.getNumOperands() - NumRes;
1374 for (unsigned j = 0; j != NumOps; ++j) {
1375 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1376 continue;
1377 SDNode *DU = SU->getNode()->getOperand(j).getNode();
1378 if (DU->getNodeId() == -1)
1379 continue;
1380 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1381 if (!DUSU) continue;
1382 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1383 E = DUSU->Succs.end(); I != E; ++I) {
1384 if (I->isCtrl()) continue;
1385 SUnit *SuccSU = I->getSUnit();
1386 if (SuccSU == SU)
1387 continue;
1388 // Be conservative. Ignore if nodes aren't at roughly the same
1389 // depth and height.
1390 if (SuccSU->getHeight() < SU->getHeight() &&
1391 (SU->getHeight() - SuccSU->getHeight()) > 1)
1392 continue;
1393 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1394 // constrains whatever is using the copy, instead of the copy
1395 // itself. In the case that the copy is coalesced, this
1396 // preserves the intent of the pseudo two-address heurietics.
1397 while (SuccSU->Succs.size() == 1 &&
1398 SuccSU->getNode()->isMachineOpcode() &&
1399 SuccSU->getNode()->getMachineOpcode() ==
1400 TargetInstrInfo::COPY_TO_REGCLASS)
1401 SuccSU = SuccSU->Succs.front().getSUnit();
1402 // Don't constrain non-instruction nodes.
1403 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1404 continue;
1405 // Don't constrain nodes with physical register defs if the
1406 // predecessor can clobber them.
1407 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1408 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1409 continue;
1411 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1412 // these may be coalesced away. We want them close to their uses.
1413 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1414 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
1415 SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
1416 SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
1417 continue;
1418 if ((!canClobber(SuccSU, DUSU) ||
1419 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1420 (!SU->isCommutable && SuccSU->isCommutable)) &&
1421 !scheduleDAG->IsReachable(SuccSU, SU)) {
1422 DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
1423 << " to SU #" << SuccSU->NodeNum << "\n";
1424 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1425 /*Reg=*/0, /*isNormalMemory=*/false,
1426 /*isMustAlias=*/false,
1427 /*isArtificial=*/true));
1434 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1435 /// scheduling units.
1436 template<class SF>
1437 void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1438 SethiUllmanNumbers.assign(SUnits->size(), 0);
1440 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1441 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1444 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1445 /// predecessors of the successors of the SUnit SU. Stop when the provided
1446 /// limit is exceeded.
1447 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1448 unsigned Limit) {
1449 unsigned Sum = 0;
1450 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1451 I != E; ++I) {
1452 const SUnit *SuccSU = I->getSUnit();
1453 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1454 EE = SuccSU->Preds.end(); II != EE; ++II) {
1455 SUnit *PredSU = II->getSUnit();
1456 if (!PredSU->isScheduled)
1457 if (++Sum > Limit)
1458 return Sum;
1461 return Sum;
1465 // Top down
1466 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1467 unsigned LPriority = SPQ->getNodePriority(left);
1468 unsigned RPriority = SPQ->getNodePriority(right);
1469 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1470 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1471 bool LIsFloater = LIsTarget && left->NumPreds == 0;
1472 bool RIsFloater = RIsTarget && right->NumPreds == 0;
1473 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1474 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1476 if (left->NumSuccs == 0 && right->NumSuccs != 0)
1477 return false;
1478 else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1479 return true;
1481 if (LIsFloater)
1482 LBonus -= 2;
1483 if (RIsFloater)
1484 RBonus -= 2;
1485 if (left->NumSuccs == 1)
1486 LBonus += 2;
1487 if (right->NumSuccs == 1)
1488 RBonus += 2;
1490 if (LPriority+LBonus != RPriority+RBonus)
1491 return LPriority+LBonus < RPriority+RBonus;
1493 if (left->getDepth() != right->getDepth())
1494 return left->getDepth() < right->getDepth();
1496 if (left->NumSuccsLeft != right->NumSuccsLeft)
1497 return left->NumSuccsLeft > right->NumSuccsLeft;
1499 assert(left->NodeQueueId && right->NodeQueueId &&
1500 "NodeQueueId cannot be zero");
1501 return (left->NodeQueueId > right->NodeQueueId);
1504 //===----------------------------------------------------------------------===//
1505 // Public Constructor Functions
1506 //===----------------------------------------------------------------------===//
1508 llvm::ScheduleDAGSDNodes *
1509 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1510 const TargetMachine &TM = IS->TM;
1511 const TargetInstrInfo *TII = TM.getInstrInfo();
1512 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1514 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1516 ScheduleDAGRRList *SD =
1517 new ScheduleDAGRRList(*IS->MF, true, PQ);
1518 PQ->setScheduleDAG(SD);
1519 return SD;
1522 llvm::ScheduleDAGSDNodes *
1523 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1524 const TargetMachine &TM = IS->TM;
1525 const TargetInstrInfo *TII = TM.getInstrInfo();
1526 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1528 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1530 ScheduleDAGRRList *SD =
1531 new ScheduleDAGRRList(*IS->MF, false, PQ);
1532 PQ->setScheduleDAG(SD);
1533 return SD;