1 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements the ScheduleDAG class, which is a base class used by
10 // scheduling implementation classes.
12 //===----------------------------------------------------------------------===//
14 #include "ScheduleDAGSDNodes.h"
15 #include "InstrEmitter.h"
16 #include "SDNodeDbgValue.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/MC/MCInstrItineraries.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
36 #define DEBUG_TYPE "pre-RA-sched"
38 STATISTIC(LoadsClustered
, "Number of loads clustered together");
40 // This allows the latency-based scheduler to notice high latency instructions
41 // without a target itinerary. The choice of number here has more to do with
42 // balancing scheduler heuristics than with the actual machine latency.
43 static cl::opt
<int> HighLatencyCycles(
44 "sched-high-latency-cycles", cl::Hidden
, cl::init(10),
45 cl::desc("Roughly estimate the number of cycles that 'long latency'"
46 "instructions take for targets with no itinerary"));
48 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction
&mf
)
49 : ScheduleDAG(mf
), BB(nullptr), DAG(nullptr),
50 InstrItins(mf
.getSubtarget().getInstrItineraryData()) {}
52 /// Run - perform scheduling.
54 void ScheduleDAGSDNodes::Run(SelectionDAG
*dag
, MachineBasicBlock
*bb
) {
58 // Clear the scheduler's SUnit DAG.
59 ScheduleDAG::clearDAG();
62 // Invoke the target's selection of scheduler.
66 /// NewSUnit - Creates a new SUnit and return a ptr to it.
68 SUnit
*ScheduleDAGSDNodes::newSUnit(SDNode
*N
) {
70 const SUnit
*Addr
= nullptr;
74 SUnits
.emplace_back(N
, (unsigned)SUnits
.size());
75 assert((Addr
== nullptr || Addr
== &SUnits
[0]) &&
76 "SUnits std::vector reallocated on the fly!");
77 SUnits
.back().OrigNode
= &SUnits
.back();
78 SUnit
*SU
= &SUnits
.back();
79 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
81 (N
->isMachineOpcode() &&
82 N
->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
))
83 SU
->SchedulingPref
= Sched::None
;
85 SU
->SchedulingPref
= TLI
.getSchedulingPreference(N
);
89 SUnit
*ScheduleDAGSDNodes::Clone(SUnit
*Old
) {
90 SUnit
*SU
= newSUnit(Old
->getNode());
91 SU
->OrigNode
= Old
->OrigNode
;
92 SU
->Latency
= Old
->Latency
;
93 SU
->isVRegCycle
= Old
->isVRegCycle
;
94 SU
->isCall
= Old
->isCall
;
95 SU
->isCallOp
= Old
->isCallOp
;
96 SU
->isTwoAddress
= Old
->isTwoAddress
;
97 SU
->isCommutable
= Old
->isCommutable
;
98 SU
->hasPhysRegDefs
= Old
->hasPhysRegDefs
;
99 SU
->hasPhysRegClobbers
= Old
->hasPhysRegClobbers
;
100 SU
->isScheduleHigh
= Old
->isScheduleHigh
;
101 SU
->isScheduleLow
= Old
->isScheduleLow
;
102 SU
->SchedulingPref
= Old
->SchedulingPref
;
103 Old
->isCloned
= true;
107 /// CheckForPhysRegDependency - Check if the dependency between def and use of
108 /// a specified operand is a physical register dependency. If so, returns the
109 /// register and the cost of copying the register.
110 static void CheckForPhysRegDependency(SDNode
*Def
, SDNode
*User
, unsigned Op
,
111 const TargetRegisterInfo
*TRI
,
112 const TargetInstrInfo
*TII
,
113 unsigned &PhysReg
, int &Cost
) {
114 if (Op
!= 2 || User
->getOpcode() != ISD::CopyToReg
)
117 unsigned Reg
= cast
<RegisterSDNode
>(User
->getOperand(1))->getReg();
118 if (TargetRegisterInfo::isVirtualRegister(Reg
))
121 unsigned ResNo
= User
->getOperand(2).getResNo();
122 if (Def
->getOpcode() == ISD::CopyFromReg
&&
123 cast
<RegisterSDNode
>(Def
->getOperand(1))->getReg() == Reg
) {
125 } else if (Def
->isMachineOpcode()) {
126 const MCInstrDesc
&II
= TII
->get(Def
->getMachineOpcode());
127 if (ResNo
>= II
.getNumDefs() &&
128 II
.ImplicitDefs
[ResNo
- II
.getNumDefs()] == Reg
)
133 const TargetRegisterClass
*RC
=
134 TRI
->getMinimalPhysRegClass(Reg
, Def
->getSimpleValueType(ResNo
));
135 Cost
= RC
->getCopyCost();
139 // Helper for AddGlue to clone node operands.
140 static void CloneNodeWithValues(SDNode
*N
, SelectionDAG
*DAG
, ArrayRef
<EVT
> VTs
,
141 SDValue ExtraOper
= SDValue()) {
142 SmallVector
<SDValue
, 8> Ops(N
->op_begin(), N
->op_end());
143 if (ExtraOper
.getNode())
144 Ops
.push_back(ExtraOper
);
146 SDVTList VTList
= DAG
->getVTList(VTs
);
147 MachineSDNode
*MN
= dyn_cast
<MachineSDNode
>(N
);
149 // Store memory references.
150 SmallVector
<MachineMemOperand
*, 2> MMOs
;
152 MMOs
.assign(MN
->memoperands_begin(), MN
->memoperands_end());
154 DAG
->MorphNodeTo(N
, N
->getOpcode(), VTList
, Ops
);
156 // Reset the memory references
158 DAG
->setNodeMemRefs(MN
, MMOs
);
161 static bool AddGlue(SDNode
*N
, SDValue Glue
, bool AddGlue
, SelectionDAG
*DAG
) {
162 SDNode
*GlueDestNode
= Glue
.getNode();
164 // Don't add glue from a node to itself.
165 if (GlueDestNode
== N
) return false;
167 // Don't add a glue operand to something that already uses glue.
169 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
172 // Don't add glue to something that already has a glue value.
173 if (N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
) return false;
175 SmallVector
<EVT
, 4> VTs(N
->value_begin(), N
->value_end());
177 VTs
.push_back(MVT::Glue
);
179 CloneNodeWithValues(N
, DAG
, VTs
, Glue
);
184 // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
185 // node even though simply shrinking the value list is sufficient.
186 static void RemoveUnusedGlue(SDNode
*N
, SelectionDAG
*DAG
) {
187 assert((N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
&&
188 !N
->hasAnyUseOfValue(N
->getNumValues() - 1)) &&
189 "expected an unused glue value");
191 CloneNodeWithValues(N
, DAG
,
192 makeArrayRef(N
->value_begin(), N
->getNumValues() - 1));
195 /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
196 /// This function finds loads of the same base and different offsets. If the
197 /// offsets are not far apart (target specific), it add MVT::Glue inputs and
198 /// outputs to ensure they are scheduled together and in order. This
199 /// optimization may benefit some targets by improving cache locality.
200 void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode
*Node
) {
201 SDNode
*Chain
= nullptr;
202 unsigned NumOps
= Node
->getNumOperands();
203 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Other
)
204 Chain
= Node
->getOperand(NumOps
-1).getNode();
208 // Look for other loads of the same chain. Find loads that are loading from
209 // the same base pointer and different offsets.
210 SmallPtrSet
<SDNode
*, 16> Visited
;
211 SmallVector
<int64_t, 4> Offsets
;
212 DenseMap
<long long, SDNode
*> O2SMap
; // Map from offset to SDNode.
213 bool Cluster
= false;
215 // This algorithm requires a reasonably low use count before finding a match
216 // to avoid uselessly blowing up compile time in large blocks.
217 unsigned UseCount
= 0;
218 for (SDNode::use_iterator I
= Chain
->use_begin(), E
= Chain
->use_end();
219 I
!= E
&& UseCount
< 100; ++I
, ++UseCount
) {
221 if (User
== Node
|| !Visited
.insert(User
).second
)
223 int64_t Offset1
, Offset2
;
224 if (!TII
->areLoadsFromSameBasePtr(Base
, User
, Offset1
, Offset2
) ||
226 // FIXME: Should be ok if they addresses are identical. But earlier
227 // optimizations really should have eliminated one of the loads.
229 if (O2SMap
.insert(std::make_pair(Offset1
, Base
)).second
)
230 Offsets
.push_back(Offset1
);
231 O2SMap
.insert(std::make_pair(Offset2
, User
));
232 Offsets
.push_back(Offset2
);
233 if (Offset2
< Offset1
)
236 // Reset UseCount to allow more matches.
243 // Sort them in increasing order.
246 // Check if the loads are close enough.
247 SmallVector
<SDNode
*, 4> Loads
;
248 unsigned NumLoads
= 0;
249 int64_t BaseOff
= Offsets
[0];
250 SDNode
*BaseLoad
= O2SMap
[BaseOff
];
251 Loads
.push_back(BaseLoad
);
252 for (unsigned i
= 1, e
= Offsets
.size(); i
!= e
; ++i
) {
253 int64_t Offset
= Offsets
[i
];
254 SDNode
*Load
= O2SMap
[Offset
];
255 if (!TII
->shouldScheduleLoadsNear(BaseLoad
, Load
, BaseOff
, Offset
,NumLoads
))
256 break; // Stop right here. Ignore loads that are further away.
257 Loads
.push_back(Load
);
264 // Cluster loads by adding MVT::Glue outputs and inputs. This also
265 // ensure they are scheduled in order of increasing addresses.
266 SDNode
*Lead
= Loads
[0];
267 SDValue InGlue
= SDValue(nullptr, 0);
268 if (AddGlue(Lead
, InGlue
, true, DAG
))
269 InGlue
= SDValue(Lead
, Lead
->getNumValues() - 1);
270 for (unsigned I
= 1, E
= Loads
.size(); I
!= E
; ++I
) {
271 bool OutGlue
= I
< E
- 1;
272 SDNode
*Load
= Loads
[I
];
274 // If AddGlue fails, we could leave an unsused glue value. This should not
276 if (AddGlue(Load
, InGlue
, OutGlue
, DAG
)) {
278 InGlue
= SDValue(Load
, Load
->getNumValues() - 1);
282 else if (!OutGlue
&& InGlue
.getNode())
283 RemoveUnusedGlue(InGlue
.getNode(), DAG
);
287 /// ClusterNodes - Cluster certain nodes which should be scheduled together.
289 void ScheduleDAGSDNodes::ClusterNodes() {
290 for (SDNode
&NI
: DAG
->allnodes()) {
292 if (!Node
|| !Node
->isMachineOpcode())
295 unsigned Opc
= Node
->getMachineOpcode();
296 const MCInstrDesc
&MCID
= TII
->get(Opc
);
298 // Cluster loads from "near" addresses into combined SUnits.
299 ClusterNeighboringLoads(Node
);
303 void ScheduleDAGSDNodes::BuildSchedUnits() {
304 // During scheduling, the NodeId field of SDNode is used to map SDNodes
305 // to their associated SUnits by holding SUnits table indices. A value
306 // of -1 means the SDNode does not yet have an associated SUnit.
307 unsigned NumNodes
= 0;
308 for (SDNode
&NI
: DAG
->allnodes()) {
313 // Reserve entries in the vector for each of the SUnits we are creating. This
314 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
316 // FIXME: Multiply by 2 because we may clone nodes during scheduling.
317 // This is a temporary workaround.
318 SUnits
.reserve(NumNodes
* 2);
320 // Add all nodes in depth first order.
321 SmallVector
<SDNode
*, 64> Worklist
;
322 SmallPtrSet
<SDNode
*, 32> Visited
;
323 Worklist
.push_back(DAG
->getRoot().getNode());
324 Visited
.insert(DAG
->getRoot().getNode());
326 SmallVector
<SUnit
*, 8> CallSUnits
;
327 while (!Worklist
.empty()) {
328 SDNode
*NI
= Worklist
.pop_back_val();
330 // Add all operands to the worklist unless they've already been added.
331 for (const SDValue
&Op
: NI
->op_values())
332 if (Visited
.insert(Op
.getNode()).second
)
333 Worklist
.push_back(Op
.getNode());
335 if (isPassiveNode(NI
)) // Leaf node, e.g. a TargetImmediate.
338 // If this node has already been processed, stop now.
339 if (NI
->getNodeId() != -1) continue;
341 SUnit
*NodeSUnit
= newSUnit(NI
);
343 // See if anything is glued to this node, if so, add them to glued
344 // nodes. Nodes can have at most one glue input and one glue output. Glue
345 // is required to be the last operand and result of a node.
347 // Scan up to find glued preds.
349 while (N
->getNumOperands() &&
350 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
351 N
= N
->getOperand(N
->getNumOperands()-1).getNode();
352 assert(N
->getNodeId() == -1 && "Node already inserted!");
353 N
->setNodeId(NodeSUnit
->NodeNum
);
354 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
355 NodeSUnit
->isCall
= true;
358 // Scan down to find any glued succs.
360 while (N
->getValueType(N
->getNumValues()-1) == MVT::Glue
) {
361 SDValue
GlueVal(N
, N
->getNumValues()-1);
363 // There are either zero or one users of the Glue result.
364 bool HasGlueUse
= false;
365 for (SDNode::use_iterator UI
= N
->use_begin(), E
= N
->use_end();
367 if (GlueVal
.isOperandOf(*UI
)) {
369 assert(N
->getNodeId() == -1 && "Node already inserted!");
370 N
->setNodeId(NodeSUnit
->NodeNum
);
372 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
373 NodeSUnit
->isCall
= true;
376 if (!HasGlueUse
) break;
379 if (NodeSUnit
->isCall
)
380 CallSUnits
.push_back(NodeSUnit
);
382 // Schedule zero-latency TokenFactor below any nodes that may increase the
383 // schedule height. Otherwise, ancestors of the TokenFactor may appear to
384 // have false stalls.
385 if (NI
->getOpcode() == ISD::TokenFactor
)
386 NodeSUnit
->isScheduleLow
= true;
388 // If there are glue operands involved, N is now the bottom-most node
389 // of the sequence of nodes that are glued together.
391 NodeSUnit
->setNode(N
);
392 assert(N
->getNodeId() == -1 && "Node already inserted!");
393 N
->setNodeId(NodeSUnit
->NodeNum
);
395 // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
396 InitNumRegDefsLeft(NodeSUnit
);
398 // Assign the Latency field of NodeSUnit using target-provided information.
399 computeLatency(NodeSUnit
);
402 // Find all call operands.
403 while (!CallSUnits
.empty()) {
404 SUnit
*SU
= CallSUnits
.pop_back_val();
405 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
406 SUNode
= SUNode
->getGluedNode()) {
407 if (SUNode
->getOpcode() != ISD::CopyToReg
)
409 SDNode
*SrcN
= SUNode
->getOperand(2).getNode();
410 if (isPassiveNode(SrcN
)) continue; // Not scheduled.
411 SUnit
*SrcSU
= &SUnits
[SrcN
->getNodeId()];
412 SrcSU
->isCallOp
= true;
417 void ScheduleDAGSDNodes::AddSchedEdges() {
418 const TargetSubtargetInfo
&ST
= MF
.getSubtarget();
420 // Check to see if the scheduler cares about latencies.
421 bool UnitLatencies
= forceUnitLatencies();
423 // Pass 2: add the preds, succs, etc.
424 for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
) {
425 SUnit
*SU
= &SUnits
[su
];
426 SDNode
*MainNode
= SU
->getNode();
428 if (MainNode
->isMachineOpcode()) {
429 unsigned Opc
= MainNode
->getMachineOpcode();
430 const MCInstrDesc
&MCID
= TII
->get(Opc
);
431 for (unsigned i
= 0; i
!= MCID
.getNumOperands(); ++i
) {
432 if (MCID
.getOperandConstraint(i
, MCOI::TIED_TO
) != -1) {
433 SU
->isTwoAddress
= true;
437 if (MCID
.isCommutable())
438 SU
->isCommutable
= true;
441 // Find all predecessors and successors of the group.
442 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode()) {
443 if (N
->isMachineOpcode() &&
444 TII
->get(N
->getMachineOpcode()).getImplicitDefs()) {
445 SU
->hasPhysRegClobbers
= true;
446 unsigned NumUsed
= InstrEmitter::CountResults(N
);
447 while (NumUsed
!= 0 && !N
->hasAnyUseOfValue(NumUsed
- 1))
448 --NumUsed
; // Skip over unused values at the end.
449 if (NumUsed
> TII
->get(N
->getMachineOpcode()).getNumDefs())
450 SU
->hasPhysRegDefs
= true;
453 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
454 SDNode
*OpN
= N
->getOperand(i
).getNode();
455 if (isPassiveNode(OpN
)) continue; // Not scheduled.
456 SUnit
*OpSU
= &SUnits
[OpN
->getNodeId()];
457 assert(OpSU
&& "Node has no SUnit!");
458 if (OpSU
== SU
) continue; // In the same group.
460 EVT OpVT
= N
->getOperand(i
).getValueType();
461 assert(OpVT
!= MVT::Glue
&& "Glued nodes should be in same sunit!");
462 bool isChain
= OpVT
== MVT::Other
;
464 unsigned PhysReg
= 0;
466 // Determine if this is a physical register dependency.
467 CheckForPhysRegDependency(OpN
, N
, i
, TRI
, TII
, PhysReg
, Cost
);
468 assert((PhysReg
== 0 || !isChain
) &&
469 "Chain dependence via physreg data?");
470 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
471 // emits a copy from the physical register to a virtual register unless
472 // it requires a cross class copy (cost < 0). That means we are only
473 // treating "expensive to copy" register dependency as physical register
474 // dependency. This may change in the future though.
475 if (Cost
>= 0 && !StressSched
)
478 // If this is a ctrl dep, latency is 1.
479 unsigned OpLatency
= isChain
? 1 : OpSU
->Latency
;
480 // Special-case TokenFactor chains as zero-latency.
481 if(isChain
&& OpN
->getOpcode() == ISD::TokenFactor
)
484 SDep Dep
= isChain
? SDep(OpSU
, SDep::Barrier
)
485 : SDep(OpSU
, SDep::Data
, PhysReg
);
486 Dep
.setLatency(OpLatency
);
487 if (!isChain
&& !UnitLatencies
) {
488 computeOperandLatency(OpN
, N
, i
, Dep
);
489 ST
.adjustSchedDependency(OpSU
, SU
, Dep
);
492 if (!SU
->addPred(Dep
) && !Dep
.isCtrl() && OpSU
->NumRegDefsLeft
> 1) {
493 // Multiple register uses are combined in the same SUnit. For example,
494 // we could have a set of glued nodes with all their defs consumed by
495 // another set of glued nodes. Register pressure tracking sees this as
496 // a single use, so to keep pressure balanced we reduce the defs.
498 // We can't tell (without more book-keeping) if this results from
499 // glued nodes or duplicate operands. As long as we don't reduce
500 // NumRegDefsLeft to zero, we handle the common cases well.
501 --OpSU
->NumRegDefsLeft
;
508 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
509 /// are input. This SUnit graph is similar to the SelectionDAG, but
510 /// excludes nodes that aren't interesting to scheduling, and represents
511 /// glued together nodes with a single SUnit.
512 void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis
*AA
) {
513 // Cluster certain nodes which should be scheduled together.
515 // Populate the SUnits array.
517 // Compute all the scheduling dependencies between nodes.
521 // Initialize NumNodeDefs for the current Node's opcode.
522 void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
523 // Check for phys reg copy.
527 if (!Node
->isMachineOpcode()) {
528 if (Node
->getOpcode() == ISD::CopyFromReg
)
534 unsigned POpc
= Node
->getMachineOpcode();
535 if (POpc
== TargetOpcode::IMPLICIT_DEF
) {
536 // No register need be allocated for this.
540 if (POpc
== TargetOpcode::PATCHPOINT
&&
541 Node
->getValueType(0) == MVT::Other
) {
542 // PATCHPOINT is defined to have one result, but it might really have none
543 // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
548 unsigned NRegDefs
= SchedDAG
->TII
->get(Node
->getMachineOpcode()).getNumDefs();
549 // Some instructions define regs that are not represented in the selection DAG
550 // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
551 NodeNumDefs
= std::min(Node
->getNumValues(), NRegDefs
);
555 // Construct a RegDefIter for this SUnit and find the first valid value.
556 ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit
*SU
,
557 const ScheduleDAGSDNodes
*SD
)
558 : SchedDAG(SD
), Node(SU
->getNode()), DefIdx(0), NodeNumDefs(0) {
563 // Advance to the next valid value defined by the SUnit.
564 void ScheduleDAGSDNodes::RegDefIter::Advance() {
565 for (;Node
;) { // Visit all glued nodes.
566 for (;DefIdx
< NodeNumDefs
; ++DefIdx
) {
567 if (!Node
->hasAnyUseOfValue(DefIdx
))
569 ValueType
= Node
->getSimpleValueType(DefIdx
);
571 return; // Found a normal regdef.
573 Node
= Node
->getGluedNode();
575 return; // No values left to visit.
581 void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit
*SU
) {
582 assert(SU
->NumRegDefsLeft
== 0 && "expect a new node");
583 for (RegDefIter
I(SU
, this); I
.IsValid(); I
.Advance()) {
584 assert(SU
->NumRegDefsLeft
< USHRT_MAX
&& "overflow is ok but unexpected");
585 ++SU
->NumRegDefsLeft
;
589 void ScheduleDAGSDNodes::computeLatency(SUnit
*SU
) {
590 SDNode
*N
= SU
->getNode();
592 // TokenFactor operands are considered zero latency, and some schedulers
593 // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
594 // whenever node latency is nonzero.
595 if (N
&& N
->getOpcode() == ISD::TokenFactor
) {
600 // Check to see if the scheduler cares about latencies.
601 if (forceUnitLatencies()) {
606 if (!InstrItins
|| InstrItins
->isEmpty()) {
607 if (N
&& N
->isMachineOpcode() &&
608 TII
->isHighLatencyDef(N
->getMachineOpcode()))
609 SU
->Latency
= HighLatencyCycles
;
615 // Compute the latency for the node. We use the sum of the latencies for
616 // all nodes glued together into this SUnit.
618 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode())
619 if (N
->isMachineOpcode())
620 SU
->Latency
+= TII
->getInstrLatency(InstrItins
, N
);
623 void ScheduleDAGSDNodes::computeOperandLatency(SDNode
*Def
, SDNode
*Use
,
624 unsigned OpIdx
, SDep
& dep
) const{
625 // Check to see if the scheduler cares about latencies.
626 if (forceUnitLatencies())
629 if (dep
.getKind() != SDep::Data
)
632 unsigned DefIdx
= Use
->getOperand(OpIdx
).getResNo();
633 if (Use
->isMachineOpcode())
634 // Adjust the use operand index by num of defs.
635 OpIdx
+= TII
->get(Use
->getMachineOpcode()).getNumDefs();
636 int Latency
= TII
->getOperandLatency(InstrItins
, Def
, DefIdx
, Use
, OpIdx
);
637 if (Latency
> 1 && Use
->getOpcode() == ISD::CopyToReg
&&
639 unsigned Reg
= cast
<RegisterSDNode
>(Use
->getOperand(1))->getReg();
640 if (TargetRegisterInfo::isVirtualRegister(Reg
))
641 // This copy is a liveout value. It is likely coalesced, so reduce the
642 // latency so not to penalize the def.
643 // FIXME: need target specific adjustment here?
644 Latency
= (Latency
> 1) ? Latency
- 1 : 1;
647 dep
.setLatency(Latency
);
650 void ScheduleDAGSDNodes::dumpNode(const SUnit
&SU
) const {
651 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
656 dbgs() << "PHYS REG COPY\n";
660 SU
.getNode()->dump(DAG
);
662 SmallVector
<SDNode
*, 4> GluedNodes
;
663 for (SDNode
*N
= SU
.getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
664 GluedNodes
.push_back(N
);
665 while (!GluedNodes
.empty()) {
667 GluedNodes
.back()->dump(DAG
);
669 GluedNodes
.pop_back();
674 void ScheduleDAGSDNodes::dump() const {
675 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
676 if (EntrySU
.getNode() != nullptr)
677 dumpNodeAll(EntrySU
);
678 for (const SUnit
&SU
: SUnits
)
680 if (ExitSU
.getNode() != nullptr)
685 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
686 void ScheduleDAGSDNodes::dumpSchedule() const {
687 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; i
++) {
688 if (SUnit
*SU
= Sequence
[i
])
691 dbgs() << "**** NOOP ****\n";
697 /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
698 /// their state is consistent with the nodes listed in Sequence.
700 void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp
) {
701 unsigned ScheduledNodes
= ScheduleDAG::VerifyScheduledDAG(isBottomUp
);
703 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; ++i
)
706 assert(Sequence
.size() - Noops
== ScheduledNodes
&&
707 "The number of nodes scheduled doesn't match the expected number!");
711 /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
713 ProcessSDDbgValues(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
714 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*> > &Orders
,
715 DenseMap
<SDValue
, unsigned> &VRBaseMap
, unsigned Order
) {
716 if (!N
->getHasDebugValue())
719 // Opportunistically insert immediate dbg_value uses, i.e. those with the same
720 // source order number as N.
721 MachineBasicBlock
*BB
= Emitter
.getBlock();
722 MachineBasicBlock::iterator InsertPos
= Emitter
.getInsertPos();
723 for (auto DV
: DAG
->GetDbgValues(N
)) {
726 unsigned DVOrder
= DV
->getOrder();
727 if (!Order
|| DVOrder
== Order
) {
728 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(DV
, VRBaseMap
);
730 Orders
.push_back({DVOrder
, DbgMI
});
731 BB
->insert(InsertPos
, DbgMI
);
737 // ProcessSourceNode - Process nodes with source order numbers. These are added
738 // to a vector which EmitSchedule uses to determine how to insert dbg_value
739 // instructions in the right order.
741 ProcessSourceNode(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
742 DenseMap
<SDValue
, unsigned> &VRBaseMap
,
743 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*>> &Orders
,
744 SmallSet
<unsigned, 8> &Seen
, MachineInstr
*NewInsn
) {
745 unsigned Order
= N
->getIROrder();
746 if (!Order
|| Seen
.count(Order
)) {
747 // Process any valid SDDbgValues even if node does not have any order
749 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, 0);
753 // If a new instruction was generated for this Order number, record it.
754 // Otherwise, leave this order number unseen: we will either find later
755 // instructions for it, or leave it unseen if there were no instructions at
759 Orders
.push_back({Order
, NewInsn
});
762 // Even if no instruction was generated, a Value may have become defined via
763 // earlier nodes. Try to process them now.
764 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, Order
);
767 void ScheduleDAGSDNodes::
768 EmitPhysRegCopy(SUnit
*SU
, DenseMap
<SUnit
*, unsigned> &VRBaseMap
,
769 MachineBasicBlock::iterator InsertPos
) {
770 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
772 if (I
->isCtrl()) continue; // ignore chain preds
773 if (I
->getSUnit()->CopyDstRC
) {
774 // Copy to physical register.
775 DenseMap
<SUnit
*, unsigned>::iterator VRI
= VRBaseMap
.find(I
->getSUnit());
776 assert(VRI
!= VRBaseMap
.end() && "Node emitted out of order - late");
777 // Find the destination physical register.
779 for (SUnit::const_succ_iterator II
= SU
->Succs
.begin(),
780 EE
= SU
->Succs
.end(); II
!= EE
; ++II
) {
781 if (II
->isCtrl()) continue; // ignore chain preds
787 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), Reg
)
788 .addReg(VRI
->second
);
790 // Copy from physical register.
791 assert(I
->getReg() && "Unknown physical register!");
792 unsigned VRBase
= MRI
.createVirtualRegister(SU
->CopyDstRC
);
793 bool isNew
= VRBaseMap
.insert(std::make_pair(SU
, VRBase
)).second
;
794 (void)isNew
; // Silence compiler warning.
795 assert(isNew
&& "Node emitted out of order - early");
796 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), VRBase
)
797 .addReg(I
->getReg());
803 /// EmitSchedule - Emit the machine code in scheduled order. Return the new
804 /// InsertPos and MachineBasicBlock that contains this insertion
805 /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
806 /// not necessarily refer to returned BB. The emitter may split blocks.
807 MachineBasicBlock
*ScheduleDAGSDNodes::
808 EmitSchedule(MachineBasicBlock::iterator
&InsertPos
) {
809 InstrEmitter
Emitter(BB
, InsertPos
);
810 DenseMap
<SDValue
, unsigned> VRBaseMap
;
811 DenseMap
<SUnit
*, unsigned> CopyVRBaseMap
;
812 SmallVector
<std::pair
<unsigned, MachineInstr
*>, 32> Orders
;
813 SmallSet
<unsigned, 8> Seen
;
814 bool HasDbg
= DAG
->hasDebugValues();
816 // Emit a node, and determine where its first instruction is for debuginfo.
817 // Zero, one, or multiple instructions can be created when emitting a node.
819 [&](SDNode
*Node
, bool IsClone
, bool IsCloned
,
820 DenseMap
<SDValue
, unsigned> &VRBaseMap
) -> MachineInstr
* {
821 // Fetch instruction prior to this, or end() if nonexistant.
822 auto GetPrevInsn
= [&](MachineBasicBlock::iterator I
) {
823 if (I
== BB
->begin())
826 return std::prev(Emitter
.getInsertPos());
829 MachineBasicBlock::iterator Before
= GetPrevInsn(Emitter
.getInsertPos());
830 Emitter
.EmitNode(Node
, IsClone
, IsCloned
, VRBaseMap
);
831 MachineBasicBlock::iterator After
= GetPrevInsn(Emitter
.getInsertPos());
833 // If the iterator did not change, no instructions were inserted.
837 if (Before
== BB
->end()) {
838 // There were no prior instructions; the new ones must start at the
839 // beginning of the block.
840 return &Emitter
.getBlock()->instr_front();
842 // Return first instruction after the pre-existing instructions.
843 return &*std::next(Before
);
847 // If this is the first BB, emit byval parameter dbg_value's.
848 if (HasDbg
&& BB
->getParent()->begin() == MachineFunction::iterator(BB
)) {
849 SDDbgInfo::DbgIterator PDI
= DAG
->ByvalParmDbgBegin();
850 SDDbgInfo::DbgIterator PDE
= DAG
->ByvalParmDbgEnd();
851 for (; PDI
!= PDE
; ++PDI
) {
852 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*PDI
, VRBaseMap
);
854 BB
->insert(InsertPos
, DbgMI
);
855 // We re-emit the dbg_value closer to its use, too, after instructions
856 // are emitted to the BB.
857 (*PDI
)->clearIsEmitted();
862 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; i
++) {
863 SUnit
*SU
= Sequence
[i
];
865 // Null SUnit* is a noop.
866 TII
->insertNoop(*Emitter
.getBlock(), InsertPos
);
870 // For pre-regalloc scheduling, create instructions corresponding to the
871 // SDNode and any glued SDNodes and append them to the block.
872 if (!SU
->getNode()) {
874 EmitPhysRegCopy(SU
, CopyVRBaseMap
, InsertPos
);
878 SmallVector
<SDNode
*, 4> GluedNodes
;
879 for (SDNode
*N
= SU
->getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
880 GluedNodes
.push_back(N
);
881 while (!GluedNodes
.empty()) {
882 SDNode
*N
= GluedNodes
.back();
883 auto NewInsn
= EmitNode(N
, SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
884 // Remember the source order of the inserted instruction.
886 ProcessSourceNode(N
, DAG
, Emitter
, VRBaseMap
, Orders
, Seen
, NewInsn
);
887 GluedNodes
.pop_back();
890 EmitNode(SU
->getNode(), SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
891 // Remember the source order of the inserted instruction.
893 ProcessSourceNode(SU
->getNode(), DAG
, Emitter
, VRBaseMap
, Orders
, Seen
,
897 // Insert all the dbg_values which have not already been inserted in source
900 MachineBasicBlock::iterator BBBegin
= BB
->getFirstNonPHI();
902 // Sort the source order instructions and use the order to insert debug
903 // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
904 // regardless of the host's implementation fo std::sort.
905 std::stable_sort(Orders
.begin(), Orders
.end(), less_first());
906 std::stable_sort(DAG
->DbgBegin(), DAG
->DbgEnd(),
907 [](const SDDbgValue
*LHS
, const SDDbgValue
*RHS
) {
908 return LHS
->getOrder() < RHS
->getOrder();
911 SDDbgInfo::DbgIterator DI
= DAG
->DbgBegin();
912 SDDbgInfo::DbgIterator DE
= DAG
->DbgEnd();
913 // Now emit the rest according to source order.
914 unsigned LastOrder
= 0;
915 for (unsigned i
= 0, e
= Orders
.size(); i
!= e
&& DI
!= DE
; ++i
) {
916 unsigned Order
= Orders
[i
].first
;
917 MachineInstr
*MI
= Orders
[i
].second
;
918 // Insert all SDDbgValue's whose order(s) are before "Order".
920 for (; DI
!= DE
; ++DI
) {
921 if ((*DI
)->getOrder() < LastOrder
|| (*DI
)->getOrder() >= Order
)
923 if ((*DI
)->isEmitted())
926 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
);
929 // Insert to start of the BB (after PHIs).
930 BB
->insert(BBBegin
, DbgMI
);
932 // Insert at the instruction, which may be in a different
933 // block, if the block was split by a custom inserter.
934 MachineBasicBlock::iterator Pos
= MI
;
935 MI
->getParent()->insert(Pos
, DbgMI
);
941 // Add trailing DbgValue's before the terminator. FIXME: May want to add
942 // some of them before one or more conditional branches?
943 SmallVector
<MachineInstr
*, 8> DbgMIs
;
944 for (; DI
!= DE
; ++DI
) {
945 if ((*DI
)->isEmitted())
947 assert((*DI
)->getOrder() >= LastOrder
&&
948 "emitting DBG_VALUE out of order");
949 if (MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
))
950 DbgMIs
.push_back(DbgMI
);
953 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
954 MachineBasicBlock::iterator Pos
= InsertBB
->getFirstTerminator();
955 InsertBB
->insert(Pos
, DbgMIs
.begin(), DbgMIs
.end());
957 SDDbgInfo::DbgLabelIterator DLI
= DAG
->DbgLabelBegin();
958 SDDbgInfo::DbgLabelIterator DLE
= DAG
->DbgLabelEnd();
959 // Now emit the rest according to source order.
961 for (const auto &InstrOrder
: Orders
) {
962 unsigned Order
= InstrOrder
.first
;
963 MachineInstr
*MI
= InstrOrder
.second
;
967 // Insert all SDDbgLabel's whose order(s) are before "Order".
969 (*DLI
)->getOrder() >= LastOrder
&& (*DLI
)->getOrder() < Order
;
971 MachineInstr
*DbgMI
= Emitter
.EmitDbgLabel(*DLI
);
974 // Insert to start of the BB (after PHIs).
975 BB
->insert(BBBegin
, DbgMI
);
977 // Insert at the instruction, which may be in a different
978 // block, if the block was split by a custom inserter.
979 MachineBasicBlock::iterator Pos
= MI
;
980 MI
->getParent()->insert(Pos
, DbgMI
);
991 InsertPos
= Emitter
.getInsertPos();
992 return Emitter
.getBlock();
995 /// Return the basic block label.
996 std::string
ScheduleDAGSDNodes::getDAGName() const {
997 return "sunit-dag." + BB
->getFullName();