1 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements the ScheduleDAG class, which is a base class used by
10 // scheduling implementation classes.
12 //===----------------------------------------------------------------------===//
14 #include "ScheduleDAGSDNodes.h"
15 #include "InstrEmitter.h"
16 #include "SDNodeDbgValue.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/MC/MCInstrItineraries.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
36 #define DEBUG_TYPE "pre-RA-sched"
38 STATISTIC(LoadsClustered
, "Number of loads clustered together");
40 // This allows the latency-based scheduler to notice high latency instructions
41 // without a target itinerary. The choice of number here has more to do with
42 // balancing scheduler heuristics than with the actual machine latency.
43 static cl::opt
<int> HighLatencyCycles(
44 "sched-high-latency-cycles", cl::Hidden
, cl::init(10),
45 cl::desc("Roughly estimate the number of cycles that 'long latency'"
46 "instructions take for targets with no itinerary"));
48 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction
&mf
)
49 : ScheduleDAG(mf
), BB(nullptr), DAG(nullptr),
50 InstrItins(mf
.getSubtarget().getInstrItineraryData()) {}
52 /// Run - perform scheduling.
54 void ScheduleDAGSDNodes::Run(SelectionDAG
*dag
, MachineBasicBlock
*bb
) {
58 // Clear the scheduler's SUnit DAG.
59 ScheduleDAG::clearDAG();
62 // Invoke the target's selection of scheduler.
66 /// NewSUnit - Creates a new SUnit and return a ptr to it.
68 SUnit
*ScheduleDAGSDNodes::newSUnit(SDNode
*N
) {
70 const SUnit
*Addr
= nullptr;
74 SUnits
.emplace_back(N
, (unsigned)SUnits
.size());
75 assert((Addr
== nullptr || Addr
== &SUnits
[0]) &&
76 "SUnits std::vector reallocated on the fly!");
77 SUnits
.back().OrigNode
= &SUnits
.back();
78 SUnit
*SU
= &SUnits
.back();
79 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
81 (N
->isMachineOpcode() &&
82 N
->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
))
83 SU
->SchedulingPref
= Sched::None
;
85 SU
->SchedulingPref
= TLI
.getSchedulingPreference(N
);
89 SUnit
*ScheduleDAGSDNodes::Clone(SUnit
*Old
) {
90 SUnit
*SU
= newSUnit(Old
->getNode());
91 SU
->OrigNode
= Old
->OrigNode
;
92 SU
->Latency
= Old
->Latency
;
93 SU
->isVRegCycle
= Old
->isVRegCycle
;
94 SU
->isCall
= Old
->isCall
;
95 SU
->isCallOp
= Old
->isCallOp
;
96 SU
->isTwoAddress
= Old
->isTwoAddress
;
97 SU
->isCommutable
= Old
->isCommutable
;
98 SU
->hasPhysRegDefs
= Old
->hasPhysRegDefs
;
99 SU
->hasPhysRegClobbers
= Old
->hasPhysRegClobbers
;
100 SU
->isScheduleHigh
= Old
->isScheduleHigh
;
101 SU
->isScheduleLow
= Old
->isScheduleLow
;
102 SU
->SchedulingPref
= Old
->SchedulingPref
;
103 Old
->isCloned
= true;
107 /// CheckForPhysRegDependency - Check if the dependency between def and use of
108 /// a specified operand is a physical register dependency. If so, returns the
109 /// register and the cost of copying the register.
110 static void CheckForPhysRegDependency(SDNode
*Def
, SDNode
*User
, unsigned Op
,
111 const TargetRegisterInfo
*TRI
,
112 const TargetInstrInfo
*TII
,
113 unsigned &PhysReg
, int &Cost
) {
114 if (Op
!= 2 || User
->getOpcode() != ISD::CopyToReg
)
117 unsigned Reg
= cast
<RegisterSDNode
>(User
->getOperand(1))->getReg();
118 if (Register::isVirtualRegister(Reg
))
121 unsigned ResNo
= User
->getOperand(2).getResNo();
122 if (Def
->getOpcode() == ISD::CopyFromReg
&&
123 cast
<RegisterSDNode
>(Def
->getOperand(1))->getReg() == Reg
) {
125 } else if (Def
->isMachineOpcode()) {
126 const MCInstrDesc
&II
= TII
->get(Def
->getMachineOpcode());
127 if (ResNo
>= II
.getNumDefs() &&
128 II
.ImplicitDefs
[ResNo
- II
.getNumDefs()] == Reg
)
133 const TargetRegisterClass
*RC
=
134 TRI
->getMinimalPhysRegClass(Reg
, Def
->getSimpleValueType(ResNo
));
135 Cost
= RC
->getCopyCost();
139 // Helper for AddGlue to clone node operands.
140 static void CloneNodeWithValues(SDNode
*N
, SelectionDAG
*DAG
, ArrayRef
<EVT
> VTs
,
141 SDValue ExtraOper
= SDValue()) {
142 SmallVector
<SDValue
, 8> Ops(N
->op_begin(), N
->op_end());
143 if (ExtraOper
.getNode())
144 Ops
.push_back(ExtraOper
);
146 SDVTList VTList
= DAG
->getVTList(VTs
);
147 MachineSDNode
*MN
= dyn_cast
<MachineSDNode
>(N
);
149 // Store memory references.
150 SmallVector
<MachineMemOperand
*, 2> MMOs
;
152 MMOs
.assign(MN
->memoperands_begin(), MN
->memoperands_end());
154 DAG
->MorphNodeTo(N
, N
->getOpcode(), VTList
, Ops
);
156 // Reset the memory references
158 DAG
->setNodeMemRefs(MN
, MMOs
);
161 static bool AddGlue(SDNode
*N
, SDValue Glue
, bool AddGlue
, SelectionDAG
*DAG
) {
162 SDNode
*GlueDestNode
= Glue
.getNode();
164 // Don't add glue from a node to itself.
165 if (GlueDestNode
== N
) return false;
167 // Don't add a glue operand to something that already uses glue.
169 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
172 // Don't add glue to something that already has a glue value.
173 if (N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
) return false;
175 SmallVector
<EVT
, 4> VTs(N
->value_begin(), N
->value_end());
177 VTs
.push_back(MVT::Glue
);
179 CloneNodeWithValues(N
, DAG
, VTs
, Glue
);
184 // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
185 // node even though simply shrinking the value list is sufficient.
186 static void RemoveUnusedGlue(SDNode
*N
, SelectionDAG
*DAG
) {
187 assert((N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
&&
188 !N
->hasAnyUseOfValue(N
->getNumValues() - 1)) &&
189 "expected an unused glue value");
191 CloneNodeWithValues(N
, DAG
,
192 makeArrayRef(N
->value_begin(), N
->getNumValues() - 1));
195 /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
196 /// This function finds loads of the same base and different offsets. If the
197 /// offsets are not far apart (target specific), it add MVT::Glue inputs and
198 /// outputs to ensure they are scheduled together and in order. This
199 /// optimization may benefit some targets by improving cache locality.
200 void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode
*Node
) {
201 SDNode
*Chain
= nullptr;
202 unsigned NumOps
= Node
->getNumOperands();
203 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Other
)
204 Chain
= Node
->getOperand(NumOps
-1).getNode();
208 // Skip any load instruction that has a tied input. There may be an additional
209 // dependency requiring a different order than by increasing offsets, and the
210 // added glue may introduce a cycle.
211 auto hasTiedInput
= [this](const SDNode
*N
) {
212 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
213 for (unsigned I
= 0; I
!= MCID
.getNumOperands(); ++I
) {
214 if (MCID
.getOperandConstraint(I
, MCOI::TIED_TO
) != -1)
221 // Look for other loads of the same chain. Find loads that are loading from
222 // the same base pointer and different offsets.
223 SmallPtrSet
<SDNode
*, 16> Visited
;
224 SmallVector
<int64_t, 4> Offsets
;
225 DenseMap
<long long, SDNode
*> O2SMap
; // Map from offset to SDNode.
226 bool Cluster
= false;
229 if (hasTiedInput(Base
))
232 // This algorithm requires a reasonably low use count before finding a match
233 // to avoid uselessly blowing up compile time in large blocks.
234 unsigned UseCount
= 0;
235 for (SDNode::use_iterator I
= Chain
->use_begin(), E
= Chain
->use_end();
236 I
!= E
&& UseCount
< 100; ++I
, ++UseCount
) {
238 if (User
== Node
|| !Visited
.insert(User
).second
)
240 int64_t Offset1
, Offset2
;
241 if (!TII
->areLoadsFromSameBasePtr(Base
, User
, Offset1
, Offset2
) ||
242 Offset1
== Offset2
||
243 hasTiedInput(User
)) {
244 // FIXME: Should be ok if they addresses are identical. But earlier
245 // optimizations really should have eliminated one of the loads.
248 if (O2SMap
.insert(std::make_pair(Offset1
, Base
)).second
)
249 Offsets
.push_back(Offset1
);
250 O2SMap
.insert(std::make_pair(Offset2
, User
));
251 Offsets
.push_back(Offset2
);
252 if (Offset2
< Offset1
)
255 // Reset UseCount to allow more matches.
262 // Sort them in increasing order.
265 // Check if the loads are close enough.
266 SmallVector
<SDNode
*, 4> Loads
;
267 unsigned NumLoads
= 0;
268 int64_t BaseOff
= Offsets
[0];
269 SDNode
*BaseLoad
= O2SMap
[BaseOff
];
270 Loads
.push_back(BaseLoad
);
271 for (unsigned i
= 1, e
= Offsets
.size(); i
!= e
; ++i
) {
272 int64_t Offset
= Offsets
[i
];
273 SDNode
*Load
= O2SMap
[Offset
];
274 if (!TII
->shouldScheduleLoadsNear(BaseLoad
, Load
, BaseOff
, Offset
,NumLoads
))
275 break; // Stop right here. Ignore loads that are further away.
276 Loads
.push_back(Load
);
283 // Cluster loads by adding MVT::Glue outputs and inputs. This also
284 // ensure they are scheduled in order of increasing addresses.
285 SDNode
*Lead
= Loads
[0];
286 SDValue InGlue
= SDValue(nullptr, 0);
287 if (AddGlue(Lead
, InGlue
, true, DAG
))
288 InGlue
= SDValue(Lead
, Lead
->getNumValues() - 1);
289 for (unsigned I
= 1, E
= Loads
.size(); I
!= E
; ++I
) {
290 bool OutGlue
= I
< E
- 1;
291 SDNode
*Load
= Loads
[I
];
293 // If AddGlue fails, we could leave an unsused glue value. This should not
295 if (AddGlue(Load
, InGlue
, OutGlue
, DAG
)) {
297 InGlue
= SDValue(Load
, Load
->getNumValues() - 1);
301 else if (!OutGlue
&& InGlue
.getNode())
302 RemoveUnusedGlue(InGlue
.getNode(), DAG
);
306 /// ClusterNodes - Cluster certain nodes which should be scheduled together.
308 void ScheduleDAGSDNodes::ClusterNodes() {
309 for (SDNode
&NI
: DAG
->allnodes()) {
311 if (!Node
|| !Node
->isMachineOpcode())
314 unsigned Opc
= Node
->getMachineOpcode();
315 const MCInstrDesc
&MCID
= TII
->get(Opc
);
317 // Cluster loads from "near" addresses into combined SUnits.
318 ClusterNeighboringLoads(Node
);
322 void ScheduleDAGSDNodes::BuildSchedUnits() {
323 // During scheduling, the NodeId field of SDNode is used to map SDNodes
324 // to their associated SUnits by holding SUnits table indices. A value
325 // of -1 means the SDNode does not yet have an associated SUnit.
326 unsigned NumNodes
= 0;
327 for (SDNode
&NI
: DAG
->allnodes()) {
332 // Reserve entries in the vector for each of the SUnits we are creating. This
333 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
335 // FIXME: Multiply by 2 because we may clone nodes during scheduling.
336 // This is a temporary workaround.
337 SUnits
.reserve(NumNodes
* 2);
339 // Add all nodes in depth first order.
340 SmallVector
<SDNode
*, 64> Worklist
;
341 SmallPtrSet
<SDNode
*, 32> Visited
;
342 Worklist
.push_back(DAG
->getRoot().getNode());
343 Visited
.insert(DAG
->getRoot().getNode());
345 SmallVector
<SUnit
*, 8> CallSUnits
;
346 while (!Worklist
.empty()) {
347 SDNode
*NI
= Worklist
.pop_back_val();
349 // Add all operands to the worklist unless they've already been added.
350 for (const SDValue
&Op
: NI
->op_values())
351 if (Visited
.insert(Op
.getNode()).second
)
352 Worklist
.push_back(Op
.getNode());
354 if (isPassiveNode(NI
)) // Leaf node, e.g. a TargetImmediate.
357 // If this node has already been processed, stop now.
358 if (NI
->getNodeId() != -1) continue;
360 SUnit
*NodeSUnit
= newSUnit(NI
);
362 // See if anything is glued to this node, if so, add them to glued
363 // nodes. Nodes can have at most one glue input and one glue output. Glue
364 // is required to be the last operand and result of a node.
366 // Scan up to find glued preds.
368 while (N
->getNumOperands() &&
369 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
370 N
= N
->getOperand(N
->getNumOperands()-1).getNode();
371 assert(N
->getNodeId() == -1 && "Node already inserted!");
372 N
->setNodeId(NodeSUnit
->NodeNum
);
373 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
374 NodeSUnit
->isCall
= true;
377 // Scan down to find any glued succs.
379 while (N
->getValueType(N
->getNumValues()-1) == MVT::Glue
) {
380 SDValue
GlueVal(N
, N
->getNumValues()-1);
382 // There are either zero or one users of the Glue result.
383 bool HasGlueUse
= false;
384 for (SDNode::use_iterator UI
= N
->use_begin(), E
= N
->use_end();
386 if (GlueVal
.isOperandOf(*UI
)) {
388 assert(N
->getNodeId() == -1 && "Node already inserted!");
389 N
->setNodeId(NodeSUnit
->NodeNum
);
391 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
392 NodeSUnit
->isCall
= true;
395 if (!HasGlueUse
) break;
398 if (NodeSUnit
->isCall
)
399 CallSUnits
.push_back(NodeSUnit
);
401 // Schedule zero-latency TokenFactor below any nodes that may increase the
402 // schedule height. Otherwise, ancestors of the TokenFactor may appear to
403 // have false stalls.
404 if (NI
->getOpcode() == ISD::TokenFactor
)
405 NodeSUnit
->isScheduleLow
= true;
407 // If there are glue operands involved, N is now the bottom-most node
408 // of the sequence of nodes that are glued together.
410 NodeSUnit
->setNode(N
);
411 assert(N
->getNodeId() == -1 && "Node already inserted!");
412 N
->setNodeId(NodeSUnit
->NodeNum
);
414 // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
415 InitNumRegDefsLeft(NodeSUnit
);
417 // Assign the Latency field of NodeSUnit using target-provided information.
418 computeLatency(NodeSUnit
);
421 // Find all call operands.
422 while (!CallSUnits
.empty()) {
423 SUnit
*SU
= CallSUnits
.pop_back_val();
424 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
425 SUNode
= SUNode
->getGluedNode()) {
426 if (SUNode
->getOpcode() != ISD::CopyToReg
)
428 SDNode
*SrcN
= SUNode
->getOperand(2).getNode();
429 if (isPassiveNode(SrcN
)) continue; // Not scheduled.
430 SUnit
*SrcSU
= &SUnits
[SrcN
->getNodeId()];
431 SrcSU
->isCallOp
= true;
436 void ScheduleDAGSDNodes::AddSchedEdges() {
437 const TargetSubtargetInfo
&ST
= MF
.getSubtarget();
439 // Check to see if the scheduler cares about latencies.
440 bool UnitLatencies
= forceUnitLatencies();
442 // Pass 2: add the preds, succs, etc.
443 for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
) {
444 SUnit
*SU
= &SUnits
[su
];
445 SDNode
*MainNode
= SU
->getNode();
447 if (MainNode
->isMachineOpcode()) {
448 unsigned Opc
= MainNode
->getMachineOpcode();
449 const MCInstrDesc
&MCID
= TII
->get(Opc
);
450 for (unsigned i
= 0; i
!= MCID
.getNumOperands(); ++i
) {
451 if (MCID
.getOperandConstraint(i
, MCOI::TIED_TO
) != -1) {
452 SU
->isTwoAddress
= true;
456 if (MCID
.isCommutable())
457 SU
->isCommutable
= true;
460 // Find all predecessors and successors of the group.
461 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode()) {
462 if (N
->isMachineOpcode() &&
463 TII
->get(N
->getMachineOpcode()).getImplicitDefs()) {
464 SU
->hasPhysRegClobbers
= true;
465 unsigned NumUsed
= InstrEmitter::CountResults(N
);
466 while (NumUsed
!= 0 && !N
->hasAnyUseOfValue(NumUsed
- 1))
467 --NumUsed
; // Skip over unused values at the end.
468 if (NumUsed
> TII
->get(N
->getMachineOpcode()).getNumDefs())
469 SU
->hasPhysRegDefs
= true;
472 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
473 SDNode
*OpN
= N
->getOperand(i
).getNode();
474 if (isPassiveNode(OpN
)) continue; // Not scheduled.
475 SUnit
*OpSU
= &SUnits
[OpN
->getNodeId()];
476 assert(OpSU
&& "Node has no SUnit!");
477 if (OpSU
== SU
) continue; // In the same group.
479 EVT OpVT
= N
->getOperand(i
).getValueType();
480 assert(OpVT
!= MVT::Glue
&& "Glued nodes should be in same sunit!");
481 bool isChain
= OpVT
== MVT::Other
;
483 unsigned PhysReg
= 0;
485 // Determine if this is a physical register dependency.
486 CheckForPhysRegDependency(OpN
, N
, i
, TRI
, TII
, PhysReg
, Cost
);
487 assert((PhysReg
== 0 || !isChain
) &&
488 "Chain dependence via physreg data?");
489 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
490 // emits a copy from the physical register to a virtual register unless
491 // it requires a cross class copy (cost < 0). That means we are only
492 // treating "expensive to copy" register dependency as physical register
493 // dependency. This may change in the future though.
494 if (Cost
>= 0 && !StressSched
)
497 // If this is a ctrl dep, latency is 1.
498 unsigned OpLatency
= isChain
? 1 : OpSU
->Latency
;
499 // Special-case TokenFactor chains as zero-latency.
500 if(isChain
&& OpN
->getOpcode() == ISD::TokenFactor
)
503 SDep Dep
= isChain
? SDep(OpSU
, SDep::Barrier
)
504 : SDep(OpSU
, SDep::Data
, PhysReg
);
505 Dep
.setLatency(OpLatency
);
506 if (!isChain
&& !UnitLatencies
) {
507 computeOperandLatency(OpN
, N
, i
, Dep
);
508 ST
.adjustSchedDependency(OpSU
, SU
, Dep
);
511 if (!SU
->addPred(Dep
) && !Dep
.isCtrl() && OpSU
->NumRegDefsLeft
> 1) {
512 // Multiple register uses are combined in the same SUnit. For example,
513 // we could have a set of glued nodes with all their defs consumed by
514 // another set of glued nodes. Register pressure tracking sees this as
515 // a single use, so to keep pressure balanced we reduce the defs.
517 // We can't tell (without more book-keeping) if this results from
518 // glued nodes or duplicate operands. As long as we don't reduce
519 // NumRegDefsLeft to zero, we handle the common cases well.
520 --OpSU
->NumRegDefsLeft
;
527 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
528 /// are input. This SUnit graph is similar to the SelectionDAG, but
529 /// excludes nodes that aren't interesting to scheduling, and represents
530 /// glued together nodes with a single SUnit.
531 void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis
*AA
) {
532 // Cluster certain nodes which should be scheduled together.
534 // Populate the SUnits array.
536 // Compute all the scheduling dependencies between nodes.
540 // Initialize NumNodeDefs for the current Node's opcode.
541 void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
542 // Check for phys reg copy.
546 if (!Node
->isMachineOpcode()) {
547 if (Node
->getOpcode() == ISD::CopyFromReg
)
553 unsigned POpc
= Node
->getMachineOpcode();
554 if (POpc
== TargetOpcode::IMPLICIT_DEF
) {
555 // No register need be allocated for this.
559 if (POpc
== TargetOpcode::PATCHPOINT
&&
560 Node
->getValueType(0) == MVT::Other
) {
561 // PATCHPOINT is defined to have one result, but it might really have none
562 // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
567 unsigned NRegDefs
= SchedDAG
->TII
->get(Node
->getMachineOpcode()).getNumDefs();
568 // Some instructions define regs that are not represented in the selection DAG
569 // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
570 NodeNumDefs
= std::min(Node
->getNumValues(), NRegDefs
);
574 // Construct a RegDefIter for this SUnit and find the first valid value.
575 ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit
*SU
,
576 const ScheduleDAGSDNodes
*SD
)
577 : SchedDAG(SD
), Node(SU
->getNode()), DefIdx(0), NodeNumDefs(0) {
582 // Advance to the next valid value defined by the SUnit.
583 void ScheduleDAGSDNodes::RegDefIter::Advance() {
584 for (;Node
;) { // Visit all glued nodes.
585 for (;DefIdx
< NodeNumDefs
; ++DefIdx
) {
586 if (!Node
->hasAnyUseOfValue(DefIdx
))
588 ValueType
= Node
->getSimpleValueType(DefIdx
);
590 return; // Found a normal regdef.
592 Node
= Node
->getGluedNode();
594 return; // No values left to visit.
600 void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit
*SU
) {
601 assert(SU
->NumRegDefsLeft
== 0 && "expect a new node");
602 for (RegDefIter
I(SU
, this); I
.IsValid(); I
.Advance()) {
603 assert(SU
->NumRegDefsLeft
< USHRT_MAX
&& "overflow is ok but unexpected");
604 ++SU
->NumRegDefsLeft
;
608 void ScheduleDAGSDNodes::computeLatency(SUnit
*SU
) {
609 SDNode
*N
= SU
->getNode();
611 // TokenFactor operands are considered zero latency, and some schedulers
612 // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
613 // whenever node latency is nonzero.
614 if (N
&& N
->getOpcode() == ISD::TokenFactor
) {
619 // Check to see if the scheduler cares about latencies.
620 if (forceUnitLatencies()) {
625 if (!InstrItins
|| InstrItins
->isEmpty()) {
626 if (N
&& N
->isMachineOpcode() &&
627 TII
->isHighLatencyDef(N
->getMachineOpcode()))
628 SU
->Latency
= HighLatencyCycles
;
634 // Compute the latency for the node. We use the sum of the latencies for
635 // all nodes glued together into this SUnit.
637 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode())
638 if (N
->isMachineOpcode())
639 SU
->Latency
+= TII
->getInstrLatency(InstrItins
, N
);
642 void ScheduleDAGSDNodes::computeOperandLatency(SDNode
*Def
, SDNode
*Use
,
643 unsigned OpIdx
, SDep
& dep
) const{
644 // Check to see if the scheduler cares about latencies.
645 if (forceUnitLatencies())
648 if (dep
.getKind() != SDep::Data
)
651 unsigned DefIdx
= Use
->getOperand(OpIdx
).getResNo();
652 if (Use
->isMachineOpcode())
653 // Adjust the use operand index by num of defs.
654 OpIdx
+= TII
->get(Use
->getMachineOpcode()).getNumDefs();
655 int Latency
= TII
->getOperandLatency(InstrItins
, Def
, DefIdx
, Use
, OpIdx
);
656 if (Latency
> 1 && Use
->getOpcode() == ISD::CopyToReg
&&
658 unsigned Reg
= cast
<RegisterSDNode
>(Use
->getOperand(1))->getReg();
659 if (Register::isVirtualRegister(Reg
))
660 // This copy is a liveout value. It is likely coalesced, so reduce the
661 // latency so not to penalize the def.
662 // FIXME: need target specific adjustment here?
663 Latency
= (Latency
> 1) ? Latency
- 1 : 1;
666 dep
.setLatency(Latency
);
669 void ScheduleDAGSDNodes::dumpNode(const SUnit
&SU
) const {
670 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
675 dbgs() << "PHYS REG COPY\n";
679 SU
.getNode()->dump(DAG
);
681 SmallVector
<SDNode
*, 4> GluedNodes
;
682 for (SDNode
*N
= SU
.getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
683 GluedNodes
.push_back(N
);
684 while (!GluedNodes
.empty()) {
686 GluedNodes
.back()->dump(DAG
);
688 GluedNodes
.pop_back();
693 void ScheduleDAGSDNodes::dump() const {
694 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
695 if (EntrySU
.getNode() != nullptr)
696 dumpNodeAll(EntrySU
);
697 for (const SUnit
&SU
: SUnits
)
699 if (ExitSU
.getNode() != nullptr)
704 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
705 void ScheduleDAGSDNodes::dumpSchedule() const {
706 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; i
++) {
707 if (SUnit
*SU
= Sequence
[i
])
710 dbgs() << "**** NOOP ****\n";
716 /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
717 /// their state is consistent with the nodes listed in Sequence.
719 void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp
) {
720 unsigned ScheduledNodes
= ScheduleDAG::VerifyScheduledDAG(isBottomUp
);
722 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; ++i
)
725 assert(Sequence
.size() - Noops
== ScheduledNodes
&&
726 "The number of nodes scheduled doesn't match the expected number!");
730 /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
732 ProcessSDDbgValues(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
733 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*> > &Orders
,
734 DenseMap
<SDValue
, unsigned> &VRBaseMap
, unsigned Order
) {
735 if (!N
->getHasDebugValue())
738 // Opportunistically insert immediate dbg_value uses, i.e. those with the same
739 // source order number as N.
740 MachineBasicBlock
*BB
= Emitter
.getBlock();
741 MachineBasicBlock::iterator InsertPos
= Emitter
.getInsertPos();
742 for (auto DV
: DAG
->GetDbgValues(N
)) {
745 unsigned DVOrder
= DV
->getOrder();
746 if (!Order
|| DVOrder
== Order
) {
747 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(DV
, VRBaseMap
);
749 Orders
.push_back({DVOrder
, DbgMI
});
750 BB
->insert(InsertPos
, DbgMI
);
756 // ProcessSourceNode - Process nodes with source order numbers. These are added
757 // to a vector which EmitSchedule uses to determine how to insert dbg_value
758 // instructions in the right order.
760 ProcessSourceNode(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
761 DenseMap
<SDValue
, unsigned> &VRBaseMap
,
762 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*>> &Orders
,
763 SmallSet
<unsigned, 8> &Seen
, MachineInstr
*NewInsn
) {
764 unsigned Order
= N
->getIROrder();
765 if (!Order
|| Seen
.count(Order
)) {
766 // Process any valid SDDbgValues even if node does not have any order
768 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, 0);
772 // If a new instruction was generated for this Order number, record it.
773 // Otherwise, leave this order number unseen: we will either find later
774 // instructions for it, or leave it unseen if there were no instructions at
778 Orders
.push_back({Order
, NewInsn
});
781 // Even if no instruction was generated, a Value may have become defined via
782 // earlier nodes. Try to process them now.
783 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, Order
);
786 void ScheduleDAGSDNodes::
787 EmitPhysRegCopy(SUnit
*SU
, DenseMap
<SUnit
*, unsigned> &VRBaseMap
,
788 MachineBasicBlock::iterator InsertPos
) {
789 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
791 if (I
->isCtrl()) continue; // ignore chain preds
792 if (I
->getSUnit()->CopyDstRC
) {
793 // Copy to physical register.
794 DenseMap
<SUnit
*, unsigned>::iterator VRI
= VRBaseMap
.find(I
->getSUnit());
795 assert(VRI
!= VRBaseMap
.end() && "Node emitted out of order - late");
796 // Find the destination physical register.
798 for (SUnit::const_succ_iterator II
= SU
->Succs
.begin(),
799 EE
= SU
->Succs
.end(); II
!= EE
; ++II
) {
800 if (II
->isCtrl()) continue; // ignore chain preds
806 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), Reg
)
807 .addReg(VRI
->second
);
809 // Copy from physical register.
810 assert(I
->getReg() && "Unknown physical register!");
811 Register VRBase
= MRI
.createVirtualRegister(SU
->CopyDstRC
);
812 bool isNew
= VRBaseMap
.insert(std::make_pair(SU
, VRBase
)).second
;
813 (void)isNew
; // Silence compiler warning.
814 assert(isNew
&& "Node emitted out of order - early");
815 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), VRBase
)
816 .addReg(I
->getReg());
822 /// EmitSchedule - Emit the machine code in scheduled order. Return the new
823 /// InsertPos and MachineBasicBlock that contains this insertion
824 /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
825 /// not necessarily refer to returned BB. The emitter may split blocks.
826 MachineBasicBlock
*ScheduleDAGSDNodes::
827 EmitSchedule(MachineBasicBlock::iterator
&InsertPos
) {
828 InstrEmitter
Emitter(BB
, InsertPos
);
829 DenseMap
<SDValue
, unsigned> VRBaseMap
;
830 DenseMap
<SUnit
*, unsigned> CopyVRBaseMap
;
831 SmallVector
<std::pair
<unsigned, MachineInstr
*>, 32> Orders
;
832 SmallSet
<unsigned, 8> Seen
;
833 bool HasDbg
= DAG
->hasDebugValues();
835 // Emit a node, and determine where its first instruction is for debuginfo.
836 // Zero, one, or multiple instructions can be created when emitting a node.
838 [&](SDNode
*Node
, bool IsClone
, bool IsCloned
,
839 DenseMap
<SDValue
, unsigned> &VRBaseMap
) -> MachineInstr
* {
840 // Fetch instruction prior to this, or end() if nonexistant.
841 auto GetPrevInsn
= [&](MachineBasicBlock::iterator I
) {
842 if (I
== BB
->begin())
845 return std::prev(Emitter
.getInsertPos());
848 MachineBasicBlock::iterator Before
= GetPrevInsn(Emitter
.getInsertPos());
849 Emitter
.EmitNode(Node
, IsClone
, IsCloned
, VRBaseMap
);
850 MachineBasicBlock::iterator After
= GetPrevInsn(Emitter
.getInsertPos());
852 // If the iterator did not change, no instructions were inserted.
857 if (Before
== BB
->end()) {
858 // There were no prior instructions; the new ones must start at the
859 // beginning of the block.
860 MI
= &Emitter
.getBlock()->instr_front();
862 // Return first instruction after the pre-existing instructions.
863 MI
= &*std::next(Before
);
866 if (MI
->isCall() && DAG
->getTarget().Options
.EnableDebugEntryValues
)
867 MF
.addCallArgsForwardingRegs(MI
, DAG
->getSDCallSiteInfo(Node
));
872 // If this is the first BB, emit byval parameter dbg_value's.
873 if (HasDbg
&& BB
->getParent()->begin() == MachineFunction::iterator(BB
)) {
874 SDDbgInfo::DbgIterator PDI
= DAG
->ByvalParmDbgBegin();
875 SDDbgInfo::DbgIterator PDE
= DAG
->ByvalParmDbgEnd();
876 for (; PDI
!= PDE
; ++PDI
) {
877 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*PDI
, VRBaseMap
);
879 BB
->insert(InsertPos
, DbgMI
);
880 // We re-emit the dbg_value closer to its use, too, after instructions
881 // are emitted to the BB.
882 (*PDI
)->clearIsEmitted();
887 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; i
++) {
888 SUnit
*SU
= Sequence
[i
];
890 // Null SUnit* is a noop.
891 TII
->insertNoop(*Emitter
.getBlock(), InsertPos
);
895 // For pre-regalloc scheduling, create instructions corresponding to the
896 // SDNode and any glued SDNodes and append them to the block.
897 if (!SU
->getNode()) {
899 EmitPhysRegCopy(SU
, CopyVRBaseMap
, InsertPos
);
903 SmallVector
<SDNode
*, 4> GluedNodes
;
904 for (SDNode
*N
= SU
->getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
905 GluedNodes
.push_back(N
);
906 while (!GluedNodes
.empty()) {
907 SDNode
*N
= GluedNodes
.back();
908 auto NewInsn
= EmitNode(N
, SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
909 // Remember the source order of the inserted instruction.
911 ProcessSourceNode(N
, DAG
, Emitter
, VRBaseMap
, Orders
, Seen
, NewInsn
);
913 if (MDNode
*MD
= DAG
->getHeapAllocSite(N
)) {
914 if (NewInsn
&& NewInsn
->isCall())
915 MF
.addCodeViewHeapAllocSite(NewInsn
, MD
);
918 GluedNodes
.pop_back();
921 EmitNode(SU
->getNode(), SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
922 // Remember the source order of the inserted instruction.
924 ProcessSourceNode(SU
->getNode(), DAG
, Emitter
, VRBaseMap
, Orders
, Seen
,
926 if (MDNode
*MD
= DAG
->getHeapAllocSite(SU
->getNode())) {
927 if (NewInsn
&& NewInsn
->isCall())
928 MF
.addCodeViewHeapAllocSite(NewInsn
, MD
);
932 // Insert all the dbg_values which have not already been inserted in source
935 MachineBasicBlock::iterator BBBegin
= BB
->getFirstNonPHI();
937 // Sort the source order instructions and use the order to insert debug
938 // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
939 // regardless of the host's implementation fo std::sort.
940 llvm::stable_sort(Orders
, less_first());
941 std::stable_sort(DAG
->DbgBegin(), DAG
->DbgEnd(),
942 [](const SDDbgValue
*LHS
, const SDDbgValue
*RHS
) {
943 return LHS
->getOrder() < RHS
->getOrder();
946 SDDbgInfo::DbgIterator DI
= DAG
->DbgBegin();
947 SDDbgInfo::DbgIterator DE
= DAG
->DbgEnd();
948 // Now emit the rest according to source order.
949 unsigned LastOrder
= 0;
950 for (unsigned i
= 0, e
= Orders
.size(); i
!= e
&& DI
!= DE
; ++i
) {
951 unsigned Order
= Orders
[i
].first
;
952 MachineInstr
*MI
= Orders
[i
].second
;
953 // Insert all SDDbgValue's whose order(s) are before "Order".
955 for (; DI
!= DE
; ++DI
) {
956 if ((*DI
)->getOrder() < LastOrder
|| (*DI
)->getOrder() >= Order
)
958 if ((*DI
)->isEmitted())
961 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
);
964 // Insert to start of the BB (after PHIs).
965 BB
->insert(BBBegin
, DbgMI
);
967 // Insert at the instruction, which may be in a different
968 // block, if the block was split by a custom inserter.
969 MachineBasicBlock::iterator Pos
= MI
;
970 MI
->getParent()->insert(Pos
, DbgMI
);
976 // Add trailing DbgValue's before the terminator. FIXME: May want to add
977 // some of them before one or more conditional branches?
978 SmallVector
<MachineInstr
*, 8> DbgMIs
;
979 for (; DI
!= DE
; ++DI
) {
980 if ((*DI
)->isEmitted())
982 assert((*DI
)->getOrder() >= LastOrder
&&
983 "emitting DBG_VALUE out of order");
984 if (MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
))
985 DbgMIs
.push_back(DbgMI
);
988 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
989 MachineBasicBlock::iterator Pos
= InsertBB
->getFirstTerminator();
990 InsertBB
->insert(Pos
, DbgMIs
.begin(), DbgMIs
.end());
992 SDDbgInfo::DbgLabelIterator DLI
= DAG
->DbgLabelBegin();
993 SDDbgInfo::DbgLabelIterator DLE
= DAG
->DbgLabelEnd();
994 // Now emit the rest according to source order.
996 for (const auto &InstrOrder
: Orders
) {
997 unsigned Order
= InstrOrder
.first
;
998 MachineInstr
*MI
= InstrOrder
.second
;
1002 // Insert all SDDbgLabel's whose order(s) are before "Order".
1003 for (; DLI
!= DLE
&&
1004 (*DLI
)->getOrder() >= LastOrder
&& (*DLI
)->getOrder() < Order
;
1006 MachineInstr
*DbgMI
= Emitter
.EmitDbgLabel(*DLI
);
1009 // Insert to start of the BB (after PHIs).
1010 BB
->insert(BBBegin
, DbgMI
);
1012 // Insert at the instruction, which may be in a different
1013 // block, if the block was split by a custom inserter.
1014 MachineBasicBlock::iterator Pos
= MI
;
1015 MI
->getParent()->insert(Pos
, DbgMI
);
1026 InsertPos
= Emitter
.getInsertPos();
1027 return Emitter
.getBlock();
1030 /// Return the basic block label.
1031 std::string
ScheduleDAGSDNodes::getDAGName() const {
1032 return "sunit-dag." + BB
->getFullName();