1 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements the ScheduleDAG class, which is a base class used by
11 // scheduling implementation classes.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "pre-RA-sched"
16 #include "SDNodeDbgValue.h"
17 #include "ScheduleDAGSDNodes.h"
18 #include "InstrEmitter.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/MC/MCInstrItineraries.h"
21 #include "llvm/Target/TargetMachine.h"
22 #include "llvm/Target/TargetInstrInfo.h"
23 #include "llvm/Target/TargetLowering.h"
24 #include "llvm/Target/TargetRegisterInfo.h"
25 #include "llvm/Target/TargetSubtargetInfo.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
36 STATISTIC(LoadsClustered
, "Number of loads clustered together");
38 // This allows latency based scheduler to notice high latency instructions
39 // without a target itinerary. The choise if number here has more to do with
40 // balancing scheduler heursitics than with the actual machine latency.
41 static cl::opt
<int> HighLatencyCycles(
42 "sched-high-latency-cycles", cl::Hidden
, cl::init(10),
43 cl::desc("Roughly estimate the number of cycles that 'long latency'"
44 "instructions take for targets with no itinerary"));
46 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction
&mf
)
48 InstrItins(mf
.getTarget().getInstrItineraryData()) {}
50 /// Run - perform scheduling.
52 void ScheduleDAGSDNodes::Run(SelectionDAG
*dag
, MachineBasicBlock
*bb
,
53 MachineBasicBlock::iterator insertPos
) {
55 ScheduleDAG::Run(bb
, insertPos
);
58 /// NewSUnit - Creates a new SUnit and return a ptr to it.
60 SUnit
*ScheduleDAGSDNodes::NewSUnit(SDNode
*N
) {
62 const SUnit
*Addr
= 0;
66 SUnits
.push_back(SUnit(N
, (unsigned)SUnits
.size()));
67 assert((Addr
== 0 || Addr
== &SUnits
[0]) &&
68 "SUnits std::vector reallocated on the fly!");
69 SUnits
.back().OrigNode
= &SUnits
.back();
70 SUnit
*SU
= &SUnits
.back();
71 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
73 (N
->isMachineOpcode() &&
74 N
->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
))
75 SU
->SchedulingPref
= Sched::None
;
77 SU
->SchedulingPref
= TLI
.getSchedulingPreference(N
);
81 SUnit
*ScheduleDAGSDNodes::Clone(SUnit
*Old
) {
82 SUnit
*SU
= NewSUnit(Old
->getNode());
83 SU
->OrigNode
= Old
->OrigNode
;
84 SU
->Latency
= Old
->Latency
;
85 SU
->isVRegCycle
= Old
->isVRegCycle
;
86 SU
->isCall
= Old
->isCall
;
87 SU
->isCallOp
= Old
->isCallOp
;
88 SU
->isTwoAddress
= Old
->isTwoAddress
;
89 SU
->isCommutable
= Old
->isCommutable
;
90 SU
->hasPhysRegDefs
= Old
->hasPhysRegDefs
;
91 SU
->hasPhysRegClobbers
= Old
->hasPhysRegClobbers
;
92 SU
->isScheduleHigh
= Old
->isScheduleHigh
;
93 SU
->isScheduleLow
= Old
->isScheduleLow
;
94 SU
->SchedulingPref
= Old
->SchedulingPref
;
99 /// CheckForPhysRegDependency - Check if the dependency between def and use of
100 /// a specified operand is a physical register dependency. If so, returns the
101 /// register and the cost of copying the register.
102 static void CheckForPhysRegDependency(SDNode
*Def
, SDNode
*User
, unsigned Op
,
103 const TargetRegisterInfo
*TRI
,
104 const TargetInstrInfo
*TII
,
105 unsigned &PhysReg
, int &Cost
) {
106 if (Op
!= 2 || User
->getOpcode() != ISD::CopyToReg
)
109 unsigned Reg
= cast
<RegisterSDNode
>(User
->getOperand(1))->getReg();
110 if (TargetRegisterInfo::isVirtualRegister(Reg
))
113 unsigned ResNo
= User
->getOperand(2).getResNo();
114 if (Def
->isMachineOpcode()) {
115 const MCInstrDesc
&II
= TII
->get(Def
->getMachineOpcode());
116 if (ResNo
>= II
.getNumDefs() &&
117 II
.ImplicitDefs
[ResNo
- II
.getNumDefs()] == Reg
) {
119 const TargetRegisterClass
*RC
=
120 TRI
->getMinimalPhysRegClass(Reg
, Def
->getValueType(ResNo
));
121 Cost
= RC
->getCopyCost();
126 static void AddGlue(SDNode
*N
, SDValue Glue
, bool AddGlue
, SelectionDAG
*DAG
) {
127 SmallVector
<EVT
, 4> VTs
;
128 SDNode
*GlueDestNode
= Glue
.getNode();
130 // Don't add glue from a node to itself.
131 if (GlueDestNode
== N
) return;
133 // Don't add glue to something which already has glue.
134 if (N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
) return;
136 for (unsigned I
= 0, E
= N
->getNumValues(); I
!= E
; ++I
)
137 VTs
.push_back(N
->getValueType(I
));
140 VTs
.push_back(MVT::Glue
);
142 SmallVector
<SDValue
, 4> Ops
;
143 for (unsigned I
= 0, E
= N
->getNumOperands(); I
!= E
; ++I
)
144 Ops
.push_back(N
->getOperand(I
));
149 SDVTList VTList
= DAG
->getVTList(&VTs
[0], VTs
.size());
150 MachineSDNode::mmo_iterator Begin
= 0, End
= 0;
151 MachineSDNode
*MN
= dyn_cast
<MachineSDNode
>(N
);
153 // Store memory references.
155 Begin
= MN
->memoperands_begin();
156 End
= MN
->memoperands_end();
159 DAG
->MorphNodeTo(N
, N
->getOpcode(), VTList
, &Ops
[0], Ops
.size());
161 // Reset the memory references
163 MN
->setMemRefs(Begin
, End
);
166 /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
167 /// This function finds loads of the same base and different offsets. If the
168 /// offsets are not far apart (target specific), it add MVT::Glue inputs and
169 /// outputs to ensure they are scheduled together and in order. This
170 /// optimization may benefit some targets by improving cache locality.
171 void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode
*Node
) {
173 unsigned NumOps
= Node
->getNumOperands();
174 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Other
)
175 Chain
= Node
->getOperand(NumOps
-1).getNode();
179 // Look for other loads of the same chain. Find loads that are loading from
180 // the same base pointer and different offsets.
181 SmallPtrSet
<SDNode
*, 16> Visited
;
182 SmallVector
<int64_t, 4> Offsets
;
183 DenseMap
<long long, SDNode
*> O2SMap
; // Map from offset to SDNode.
184 bool Cluster
= false;
186 for (SDNode::use_iterator I
= Chain
->use_begin(), E
= Chain
->use_end();
189 if (User
== Node
|| !Visited
.insert(User
))
191 int64_t Offset1
, Offset2
;
192 if (!TII
->areLoadsFromSameBasePtr(Base
, User
, Offset1
, Offset2
) ||
194 // FIXME: Should be ok if they addresses are identical. But earlier
195 // optimizations really should have eliminated one of the loads.
197 if (O2SMap
.insert(std::make_pair(Offset1
, Base
)).second
)
198 Offsets
.push_back(Offset1
);
199 O2SMap
.insert(std::make_pair(Offset2
, User
));
200 Offsets
.push_back(Offset2
);
201 if (Offset2
< Offset1
)
209 // Sort them in increasing order.
210 std::sort(Offsets
.begin(), Offsets
.end());
212 // Check if the loads are close enough.
213 SmallVector
<SDNode
*, 4> Loads
;
214 unsigned NumLoads
= 0;
215 int64_t BaseOff
= Offsets
[0];
216 SDNode
*BaseLoad
= O2SMap
[BaseOff
];
217 Loads
.push_back(BaseLoad
);
218 for (unsigned i
= 1, e
= Offsets
.size(); i
!= e
; ++i
) {
219 int64_t Offset
= Offsets
[i
];
220 SDNode
*Load
= O2SMap
[Offset
];
221 if (!TII
->shouldScheduleLoadsNear(BaseLoad
, Load
, BaseOff
, Offset
,NumLoads
))
222 break; // Stop right here. Ignore loads that are further away.
223 Loads
.push_back(Load
);
230 // Cluster loads by adding MVT::Glue outputs and inputs. This also
231 // ensure they are scheduled in order of increasing addresses.
232 SDNode
*Lead
= Loads
[0];
233 AddGlue(Lead
, SDValue(0, 0), true, DAG
);
235 SDValue InGlue
= SDValue(Lead
, Lead
->getNumValues() - 1);
236 for (unsigned I
= 1, E
= Loads
.size(); I
!= E
; ++I
) {
237 bool OutGlue
= I
< E
- 1;
238 SDNode
*Load
= Loads
[I
];
240 AddGlue(Load
, InGlue
, OutGlue
, DAG
);
243 InGlue
= SDValue(Load
, Load
->getNumValues() - 1);
249 /// ClusterNodes - Cluster certain nodes which should be scheduled together.
251 void ScheduleDAGSDNodes::ClusterNodes() {
252 for (SelectionDAG::allnodes_iterator NI
= DAG
->allnodes_begin(),
253 E
= DAG
->allnodes_end(); NI
!= E
; ++NI
) {
255 if (!Node
|| !Node
->isMachineOpcode())
258 unsigned Opc
= Node
->getMachineOpcode();
259 const MCInstrDesc
&MCID
= TII
->get(Opc
);
261 // Cluster loads from "near" addresses into combined SUnits.
262 ClusterNeighboringLoads(Node
);
266 void ScheduleDAGSDNodes::BuildSchedUnits() {
267 // During scheduling, the NodeId field of SDNode is used to map SDNodes
268 // to their associated SUnits by holding SUnits table indices. A value
269 // of -1 means the SDNode does not yet have an associated SUnit.
270 unsigned NumNodes
= 0;
271 for (SelectionDAG::allnodes_iterator NI
= DAG
->allnodes_begin(),
272 E
= DAG
->allnodes_end(); NI
!= E
; ++NI
) {
277 // Reserve entries in the vector for each of the SUnits we are creating. This
278 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
280 // FIXME: Multiply by 2 because we may clone nodes during scheduling.
281 // This is a temporary workaround.
282 SUnits
.reserve(NumNodes
* 2);
284 // Add all nodes in depth first order.
285 SmallVector
<SDNode
*, 64> Worklist
;
286 SmallPtrSet
<SDNode
*, 64> Visited
;
287 Worklist
.push_back(DAG
->getRoot().getNode());
288 Visited
.insert(DAG
->getRoot().getNode());
290 SmallVector
<SUnit
*, 8> CallSUnits
;
291 while (!Worklist
.empty()) {
292 SDNode
*NI
= Worklist
.pop_back_val();
294 // Add all operands to the worklist unless they've already been added.
295 for (unsigned i
= 0, e
= NI
->getNumOperands(); i
!= e
; ++i
)
296 if (Visited
.insert(NI
->getOperand(i
).getNode()))
297 Worklist
.push_back(NI
->getOperand(i
).getNode());
299 if (isPassiveNode(NI
)) // Leaf node, e.g. a TargetImmediate.
302 // If this node has already been processed, stop now.
303 if (NI
->getNodeId() != -1) continue;
305 SUnit
*NodeSUnit
= NewSUnit(NI
);
307 // See if anything is glued to this node, if so, add them to glued
308 // nodes. Nodes can have at most one glue input and one glue output. Glue
309 // is required to be the last operand and result of a node.
311 // Scan up to find glued preds.
313 while (N
->getNumOperands() &&
314 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
315 N
= N
->getOperand(N
->getNumOperands()-1).getNode();
316 assert(N
->getNodeId() == -1 && "Node already inserted!");
317 N
->setNodeId(NodeSUnit
->NodeNum
);
318 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
319 NodeSUnit
->isCall
= true;
322 // Scan down to find any glued succs.
324 while (N
->getValueType(N
->getNumValues()-1) == MVT::Glue
) {
325 SDValue
GlueVal(N
, N
->getNumValues()-1);
327 // There are either zero or one users of the Glue result.
328 bool HasGlueUse
= false;
329 for (SDNode::use_iterator UI
= N
->use_begin(), E
= N
->use_end();
331 if (GlueVal
.isOperandOf(*UI
)) {
333 assert(N
->getNodeId() == -1 && "Node already inserted!");
334 N
->setNodeId(NodeSUnit
->NodeNum
);
336 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
337 NodeSUnit
->isCall
= true;
340 if (!HasGlueUse
) break;
343 if (NodeSUnit
->isCall
)
344 CallSUnits
.push_back(NodeSUnit
);
346 // Schedule zero-latency TokenFactor below any nodes that may increase the
347 // schedule height. Otherwise, ancestors of the TokenFactor may appear to
348 // have false stalls.
349 if (NI
->getOpcode() == ISD::TokenFactor
)
350 NodeSUnit
->isScheduleLow
= true;
352 // If there are glue operands involved, N is now the bottom-most node
353 // of the sequence of nodes that are glued together.
355 NodeSUnit
->setNode(N
);
356 assert(N
->getNodeId() == -1 && "Node already inserted!");
357 N
->setNodeId(NodeSUnit
->NodeNum
);
359 // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
360 InitNumRegDefsLeft(NodeSUnit
);
362 // Assign the Latency field of NodeSUnit using target-provided information.
363 ComputeLatency(NodeSUnit
);
366 // Find all call operands.
367 while (!CallSUnits
.empty()) {
368 SUnit
*SU
= CallSUnits
.pop_back_val();
369 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
370 SUNode
= SUNode
->getGluedNode()) {
371 if (SUNode
->getOpcode() != ISD::CopyToReg
)
373 SDNode
*SrcN
= SUNode
->getOperand(2).getNode();
374 if (isPassiveNode(SrcN
)) continue; // Not scheduled.
375 SUnit
*SrcSU
= &SUnits
[SrcN
->getNodeId()];
376 SrcSU
->isCallOp
= true;
381 void ScheduleDAGSDNodes::AddSchedEdges() {
382 const TargetSubtargetInfo
&ST
= TM
.getSubtarget
<TargetSubtargetInfo
>();
384 // Check to see if the scheduler cares about latencies.
385 bool UnitLatencies
= ForceUnitLatencies();
387 // Pass 2: add the preds, succs, etc.
388 for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
) {
389 SUnit
*SU
= &SUnits
[su
];
390 SDNode
*MainNode
= SU
->getNode();
392 if (MainNode
->isMachineOpcode()) {
393 unsigned Opc
= MainNode
->getMachineOpcode();
394 const MCInstrDesc
&MCID
= TII
->get(Opc
);
395 for (unsigned i
= 0; i
!= MCID
.getNumOperands(); ++i
) {
396 if (MCID
.getOperandConstraint(i
, MCOI::TIED_TO
) != -1) {
397 SU
->isTwoAddress
= true;
401 if (MCID
.isCommutable())
402 SU
->isCommutable
= true;
405 // Find all predecessors and successors of the group.
406 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode()) {
407 if (N
->isMachineOpcode() &&
408 TII
->get(N
->getMachineOpcode()).getImplicitDefs()) {
409 SU
->hasPhysRegClobbers
= true;
410 unsigned NumUsed
= InstrEmitter::CountResults(N
);
411 while (NumUsed
!= 0 && !N
->hasAnyUseOfValue(NumUsed
- 1))
412 --NumUsed
; // Skip over unused values at the end.
413 if (NumUsed
> TII
->get(N
->getMachineOpcode()).getNumDefs())
414 SU
->hasPhysRegDefs
= true;
417 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
418 SDNode
*OpN
= N
->getOperand(i
).getNode();
419 if (isPassiveNode(OpN
)) continue; // Not scheduled.
420 SUnit
*OpSU
= &SUnits
[OpN
->getNodeId()];
421 assert(OpSU
&& "Node has no SUnit!");
422 if (OpSU
== SU
) continue; // In the same group.
424 EVT OpVT
= N
->getOperand(i
).getValueType();
425 assert(OpVT
!= MVT::Glue
&& "Glued nodes should be in same sunit!");
426 bool isChain
= OpVT
== MVT::Other
;
428 unsigned PhysReg
= 0;
430 // Determine if this is a physical register dependency.
431 CheckForPhysRegDependency(OpN
, N
, i
, TRI
, TII
, PhysReg
, Cost
);
432 assert((PhysReg
== 0 || !isChain
) &&
433 "Chain dependence via physreg data?");
434 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
435 // emits a copy from the physical register to a virtual register unless
436 // it requires a cross class copy (cost < 0). That means we are only
437 // treating "expensive to copy" register dependency as physical register
438 // dependency. This may change in the future though.
439 if (Cost
>= 0 && !StressSched
)
442 // If this is a ctrl dep, latency is 1.
443 unsigned OpLatency
= isChain
? 1 : OpSU
->Latency
;
444 // Special-case TokenFactor chains as zero-latency.
445 if(isChain
&& OpN
->getOpcode() == ISD::TokenFactor
)
448 const SDep
&dep
= SDep(OpSU
, isChain
? SDep::Order
: SDep::Data
,
450 if (!isChain
&& !UnitLatencies
) {
451 ComputeOperandLatency(OpN
, N
, i
, const_cast<SDep
&>(dep
));
452 ST
.adjustSchedDependency(OpSU
, SU
, const_cast<SDep
&>(dep
));
455 if (!SU
->addPred(dep
) && !dep
.isCtrl() && OpSU
->NumRegDefsLeft
> 1) {
456 // Multiple register uses are combined in the same SUnit. For example,
457 // we could have a set of glued nodes with all their defs consumed by
458 // another set of glued nodes. Register pressure tracking sees this as
459 // a single use, so to keep pressure balanced we reduce the defs.
461 // We can't tell (without more book-keeping) if this results from
462 // glued nodes or duplicate operands. As long as we don't reduce
463 // NumRegDefsLeft to zero, we handle the common cases well.
464 --OpSU
->NumRegDefsLeft
;
471 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
472 /// are input. This SUnit graph is similar to the SelectionDAG, but
473 /// excludes nodes that aren't interesting to scheduling, and represents
474 /// glued together nodes with a single SUnit.
475 void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis
*AA
) {
476 // Cluster certain nodes which should be scheduled together.
478 // Populate the SUnits array.
480 // Compute all the scheduling dependencies between nodes.
484 // Initialize NumNodeDefs for the current Node's opcode.
485 void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
486 // Check for phys reg copy.
490 if (!Node
->isMachineOpcode()) {
491 if (Node
->getOpcode() == ISD::CopyFromReg
)
497 unsigned POpc
= Node
->getMachineOpcode();
498 if (POpc
== TargetOpcode::IMPLICIT_DEF
) {
499 // No register need be allocated for this.
503 unsigned NRegDefs
= SchedDAG
->TII
->get(Node
->getMachineOpcode()).getNumDefs();
504 // Some instructions define regs that are not represented in the selection DAG
505 // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
506 NodeNumDefs
= std::min(Node
->getNumValues(), NRegDefs
);
510 // Construct a RegDefIter for this SUnit and find the first valid value.
511 ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit
*SU
,
512 const ScheduleDAGSDNodes
*SD
)
513 : SchedDAG(SD
), Node(SU
->getNode()), DefIdx(0), NodeNumDefs(0) {
518 // Advance to the next valid value defined by the SUnit.
519 void ScheduleDAGSDNodes::RegDefIter::Advance() {
520 for (;Node
;) { // Visit all glued nodes.
521 for (;DefIdx
< NodeNumDefs
; ++DefIdx
) {
522 if (!Node
->hasAnyUseOfValue(DefIdx
))
524 ValueType
= Node
->getValueType(DefIdx
);
526 return; // Found a normal regdef.
528 Node
= Node
->getGluedNode();
530 return; // No values left to visit.
536 void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit
*SU
) {
537 assert(SU
->NumRegDefsLeft
== 0 && "expect a new node");
538 for (RegDefIter
I(SU
, this); I
.IsValid(); I
.Advance()) {
539 assert(SU
->NumRegDefsLeft
< USHRT_MAX
&& "overflow is ok but unexpected");
540 ++SU
->NumRegDefsLeft
;
544 void ScheduleDAGSDNodes::ComputeLatency(SUnit
*SU
) {
545 SDNode
*N
= SU
->getNode();
547 // TokenFactor operands are considered zero latency, and some schedulers
548 // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
549 // whenever node latency is nonzero.
550 if (N
&& N
->getOpcode() == ISD::TokenFactor
) {
555 // Check to see if the scheduler cares about latencies.
556 if (ForceUnitLatencies()) {
561 if (!InstrItins
|| InstrItins
->isEmpty()) {
562 if (N
&& N
->isMachineOpcode() &&
563 TII
->isHighLatencyDef(N
->getMachineOpcode()))
564 SU
->Latency
= HighLatencyCycles
;
570 // Compute the latency for the node. We use the sum of the latencies for
571 // all nodes glued together into this SUnit.
573 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode())
574 if (N
->isMachineOpcode())
575 SU
->Latency
+= TII
->getInstrLatency(InstrItins
, N
);
578 void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode
*Def
, SDNode
*Use
,
579 unsigned OpIdx
, SDep
& dep
) const{
580 // Check to see if the scheduler cares about latencies.
581 if (ForceUnitLatencies())
584 if (dep
.getKind() != SDep::Data
)
587 unsigned DefIdx
= Use
->getOperand(OpIdx
).getResNo();
588 if (Use
->isMachineOpcode())
589 // Adjust the use operand index by num of defs.
590 OpIdx
+= TII
->get(Use
->getMachineOpcode()).getNumDefs();
591 int Latency
= TII
->getOperandLatency(InstrItins
, Def
, DefIdx
, Use
, OpIdx
);
592 if (Latency
> 1 && Use
->getOpcode() == ISD::CopyToReg
&&
594 unsigned Reg
= cast
<RegisterSDNode
>(Use
->getOperand(1))->getReg();
595 if (TargetRegisterInfo::isVirtualRegister(Reg
))
596 // This copy is a liveout value. It is likely coalesced, so reduce the
597 // latency so not to penalize the def.
598 // FIXME: need target specific adjustment here?
599 Latency
= (Latency
> 1) ? Latency
- 1 : 1;
602 dep
.setLatency(Latency
);
605 void ScheduleDAGSDNodes::dumpNode(const SUnit
*SU
) const {
606 if (!SU
->getNode()) {
607 dbgs() << "PHYS REG COPY\n";
611 SU
->getNode()->dump(DAG
);
613 SmallVector
<SDNode
*, 4> GluedNodes
;
614 for (SDNode
*N
= SU
->getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
615 GluedNodes
.push_back(N
);
616 while (!GluedNodes
.empty()) {
618 GluedNodes
.back()->dump(DAG
);
620 GluedNodes
.pop_back();
626 bool operator()(const std::pair
<unsigned, MachineInstr
*> &A
,
627 const std::pair
<unsigned, MachineInstr
*> &B
) {
628 return A
.first
< B
.first
;
633 /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
634 static void ProcessSDDbgValues(SDNode
*N
, SelectionDAG
*DAG
,
635 InstrEmitter
&Emitter
,
636 SmallVector
<std::pair
<unsigned, MachineInstr
*>, 32> &Orders
,
637 DenseMap
<SDValue
, unsigned> &VRBaseMap
,
639 if (!N
->getHasDebugValue())
642 // Opportunistically insert immediate dbg_value uses, i.e. those with source
643 // order number right after the N.
644 MachineBasicBlock
*BB
= Emitter
.getBlock();
645 MachineBasicBlock::iterator InsertPos
= Emitter
.getInsertPos();
646 ArrayRef
<SDDbgValue
*> DVs
= DAG
->GetDbgValues(N
);
647 for (unsigned i
= 0, e
= DVs
.size(); i
!= e
; ++i
) {
648 if (DVs
[i
]->isInvalidated())
650 unsigned DVOrder
= DVs
[i
]->getOrder();
651 if (!Order
|| DVOrder
== ++Order
) {
652 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(DVs
[i
], VRBaseMap
);
654 Orders
.push_back(std::make_pair(DVOrder
, DbgMI
));
655 BB
->insert(InsertPos
, DbgMI
);
657 DVs
[i
]->setIsInvalidated();
662 // ProcessSourceNode - Process nodes with source order numbers. These are added
663 // to a vector which EmitSchedule uses to determine how to insert dbg_value
664 // instructions in the right order.
665 static void ProcessSourceNode(SDNode
*N
, SelectionDAG
*DAG
,
666 InstrEmitter
&Emitter
,
667 DenseMap
<SDValue
, unsigned> &VRBaseMap
,
668 SmallVector
<std::pair
<unsigned, MachineInstr
*>, 32> &Orders
,
669 SmallSet
<unsigned, 8> &Seen
) {
670 unsigned Order
= DAG
->GetOrdering(N
);
671 if (!Order
|| !Seen
.insert(Order
)) {
672 // Process any valid SDDbgValues even if node does not have any order
674 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, 0);
678 MachineBasicBlock
*BB
= Emitter
.getBlock();
679 if (Emitter
.getInsertPos() == BB
->begin() || BB
->back().isPHI()) {
680 // Did not insert any instruction.
681 Orders
.push_back(std::make_pair(Order
, (MachineInstr
*)0));
685 Orders
.push_back(std::make_pair(Order
, prior(Emitter
.getInsertPos())));
686 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, Order
);
690 /// EmitSchedule - Emit the machine code in scheduled order.
691 MachineBasicBlock
*ScheduleDAGSDNodes::EmitSchedule() {
692 InstrEmitter
Emitter(BB
, InsertPos
);
693 DenseMap
<SDValue
, unsigned> VRBaseMap
;
694 DenseMap
<SUnit
*, unsigned> CopyVRBaseMap
;
695 SmallVector
<std::pair
<unsigned, MachineInstr
*>, 32> Orders
;
696 SmallSet
<unsigned, 8> Seen
;
697 bool HasDbg
= DAG
->hasDebugValues();
699 // If this is the first BB, emit byval parameter dbg_value's.
700 if (HasDbg
&& BB
->getParent()->begin() == MachineFunction::iterator(BB
)) {
701 SDDbgInfo::DbgIterator PDI
= DAG
->ByvalParmDbgBegin();
702 SDDbgInfo::DbgIterator PDE
= DAG
->ByvalParmDbgEnd();
703 for (; PDI
!= PDE
; ++PDI
) {
704 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*PDI
, VRBaseMap
);
706 BB
->insert(InsertPos
, DbgMI
);
710 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; i
++) {
711 SUnit
*SU
= Sequence
[i
];
713 // Null SUnit* is a noop.
718 // For pre-regalloc scheduling, create instructions corresponding to the
719 // SDNode and any glued SDNodes and append them to the block.
720 if (!SU
->getNode()) {
722 EmitPhysRegCopy(SU
, CopyVRBaseMap
);
726 SmallVector
<SDNode
*, 4> GluedNodes
;
727 for (SDNode
*N
= SU
->getNode()->getGluedNode(); N
;
728 N
= N
->getGluedNode())
729 GluedNodes
.push_back(N
);
730 while (!GluedNodes
.empty()) {
731 SDNode
*N
= GluedNodes
.back();
732 Emitter
.EmitNode(GluedNodes
.back(), SU
->OrigNode
!= SU
, SU
->isCloned
,
734 // Remember the source order of the inserted instruction.
736 ProcessSourceNode(N
, DAG
, Emitter
, VRBaseMap
, Orders
, Seen
);
737 GluedNodes
.pop_back();
739 Emitter
.EmitNode(SU
->getNode(), SU
->OrigNode
!= SU
, SU
->isCloned
,
741 // Remember the source order of the inserted instruction.
743 ProcessSourceNode(SU
->getNode(), DAG
, Emitter
, VRBaseMap
, Orders
,
747 // Insert all the dbg_values which have not already been inserted in source
750 MachineBasicBlock::iterator BBBegin
= BB
->getFirstNonPHI();
752 // Sort the source order instructions and use the order to insert debug
754 std::sort(Orders
.begin(), Orders
.end(), OrderSorter());
756 SDDbgInfo::DbgIterator DI
= DAG
->DbgBegin();
757 SDDbgInfo::DbgIterator DE
= DAG
->DbgEnd();
758 // Now emit the rest according to source order.
759 unsigned LastOrder
= 0;
760 for (unsigned i
= 0, e
= Orders
.size(); i
!= e
&& DI
!= DE
; ++i
) {
761 unsigned Order
= Orders
[i
].first
;
762 MachineInstr
*MI
= Orders
[i
].second
;
763 // Insert all SDDbgValue's whose order(s) are before "Order".
767 (*DI
)->getOrder() >= LastOrder
&& (*DI
)->getOrder() < Order
; ++DI
) {
768 if ((*DI
)->isInvalidated())
770 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
);
773 // Insert to start of the BB (after PHIs).
774 BB
->insert(BBBegin
, DbgMI
);
776 // Insert at the instruction, which may be in a different
777 // block, if the block was split by a custom inserter.
778 MachineBasicBlock::iterator Pos
= MI
;
779 MI
->getParent()->insert(llvm::next(Pos
), DbgMI
);
785 // Add trailing DbgValue's before the terminator. FIXME: May want to add
786 // some of them before one or more conditional branches?
788 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
789 MachineBasicBlock::iterator Pos
= Emitter
.getBlock()->getFirstTerminator();
790 if (!(*DI
)->isInvalidated()) {
791 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
);
793 InsertBB
->insert(Pos
, DbgMI
);
799 BB
= Emitter
.getBlock();
800 InsertPos
= Emitter
.getInsertPos();