1 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements the ScheduleDAG class, which is a base class used by
10 // scheduling implementation classes.
12 //===----------------------------------------------------------------------===//
14 #include "ScheduleDAGSDNodes.h"
15 #include "InstrEmitter.h"
16 #include "SDNodeDbgValue.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/MC/MCInstrItineraries.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Target/TargetMachine.h"
37 #define DEBUG_TYPE "pre-RA-sched"
39 STATISTIC(LoadsClustered
, "Number of loads clustered together");
41 // This allows the latency-based scheduler to notice high latency instructions
42 // without a target itinerary. The choice of number here has more to do with
43 // balancing scheduler heuristics than with the actual machine latency.
44 static cl::opt
<int> HighLatencyCycles(
45 "sched-high-latency-cycles", cl::Hidden
, cl::init(10),
46 cl::desc("Roughly estimate the number of cycles that 'long latency'"
47 "instructions take for targets with no itinerary"));
49 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction
&mf
)
50 : ScheduleDAG(mf
), InstrItins(mf
.getSubtarget().getInstrItineraryData()) {}
52 /// Run - perform scheduling.
54 void ScheduleDAGSDNodes::Run(SelectionDAG
*dag
, MachineBasicBlock
*bb
) {
58 // Clear the scheduler's SUnit DAG.
59 ScheduleDAG::clearDAG();
62 // Invoke the target's selection of scheduler.
66 /// NewSUnit - Creates a new SUnit and return a ptr to it.
68 SUnit
*ScheduleDAGSDNodes::newSUnit(SDNode
*N
) {
70 const SUnit
*Addr
= nullptr;
74 SUnits
.emplace_back(N
, (unsigned)SUnits
.size());
75 assert((Addr
== nullptr || Addr
== &SUnits
[0]) &&
76 "SUnits std::vector reallocated on the fly!");
77 SUnits
.back().OrigNode
= &SUnits
.back();
78 SUnit
*SU
= &SUnits
.back();
79 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
81 (N
->isMachineOpcode() &&
82 N
->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
))
83 SU
->SchedulingPref
= Sched::None
;
85 SU
->SchedulingPref
= TLI
.getSchedulingPreference(N
);
89 SUnit
*ScheduleDAGSDNodes::Clone(SUnit
*Old
) {
90 SUnit
*SU
= newSUnit(Old
->getNode());
91 SU
->OrigNode
= Old
->OrigNode
;
92 SU
->Latency
= Old
->Latency
;
93 SU
->isVRegCycle
= Old
->isVRegCycle
;
94 SU
->isCall
= Old
->isCall
;
95 SU
->isCallOp
= Old
->isCallOp
;
96 SU
->isTwoAddress
= Old
->isTwoAddress
;
97 SU
->isCommutable
= Old
->isCommutable
;
98 SU
->hasPhysRegDefs
= Old
->hasPhysRegDefs
;
99 SU
->hasPhysRegClobbers
= Old
->hasPhysRegClobbers
;
100 SU
->isScheduleHigh
= Old
->isScheduleHigh
;
101 SU
->isScheduleLow
= Old
->isScheduleLow
;
102 SU
->SchedulingPref
= Old
->SchedulingPref
;
103 Old
->isCloned
= true;
107 /// CheckForPhysRegDependency - Check if the dependency between def and use of
108 /// a specified operand is a physical register dependency. If so, returns the
109 /// register and the cost of copying the register.
110 static void CheckForPhysRegDependency(SDNode
*Def
, SDNode
*User
, unsigned Op
,
111 const TargetRegisterInfo
*TRI
,
112 const TargetInstrInfo
*TII
,
113 const TargetLowering
&TLI
,
114 unsigned &PhysReg
, int &Cost
) {
115 if (Op
!= 2 || User
->getOpcode() != ISD::CopyToReg
)
118 unsigned Reg
= cast
<RegisterSDNode
>(User
->getOperand(1))->getReg();
119 if (TLI
.checkForPhysRegDependency(Def
, User
, Op
, TRI
, TII
, PhysReg
, Cost
))
122 if (Register::isVirtualRegister(Reg
))
125 unsigned ResNo
= User
->getOperand(2).getResNo();
126 if (Def
->getOpcode() == ISD::CopyFromReg
&&
127 cast
<RegisterSDNode
>(Def
->getOperand(1))->getReg() == Reg
) {
129 } else if (Def
->isMachineOpcode()) {
130 const MCInstrDesc
&II
= TII
->get(Def
->getMachineOpcode());
131 if (ResNo
>= II
.getNumDefs() && II
.hasImplicitDefOfPhysReg(Reg
))
136 const TargetRegisterClass
*RC
=
137 TRI
->getMinimalPhysRegClass(Reg
, Def
->getSimpleValueType(ResNo
));
138 Cost
= RC
->getCopyCost();
142 // Helper for AddGlue to clone node operands.
143 static void CloneNodeWithValues(SDNode
*N
, SelectionDAG
*DAG
, ArrayRef
<EVT
> VTs
,
144 SDValue ExtraOper
= SDValue()) {
145 SmallVector
<SDValue
, 8> Ops(N
->op_begin(), N
->op_end());
146 if (ExtraOper
.getNode())
147 Ops
.push_back(ExtraOper
);
149 SDVTList VTList
= DAG
->getVTList(VTs
);
150 MachineSDNode
*MN
= dyn_cast
<MachineSDNode
>(N
);
152 // Store memory references.
153 SmallVector
<MachineMemOperand
*, 2> MMOs
;
155 MMOs
.assign(MN
->memoperands_begin(), MN
->memoperands_end());
157 DAG
->MorphNodeTo(N
, N
->getOpcode(), VTList
, Ops
);
159 // Reset the memory references
161 DAG
->setNodeMemRefs(MN
, MMOs
);
164 static bool AddGlue(SDNode
*N
, SDValue Glue
, bool AddGlue
, SelectionDAG
*DAG
) {
165 SDNode
*GlueDestNode
= Glue
.getNode();
167 // Don't add glue from a node to itself.
168 if (GlueDestNode
== N
) return false;
170 // Don't add a glue operand to something that already uses glue.
172 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
175 // Don't add glue to something that already has a glue value.
176 if (N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
) return false;
178 SmallVector
<EVT
, 4> VTs(N
->values());
180 VTs
.push_back(MVT::Glue
);
182 CloneNodeWithValues(N
, DAG
, VTs
, Glue
);
187 // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
188 // node even though simply shrinking the value list is sufficient.
189 static void RemoveUnusedGlue(SDNode
*N
, SelectionDAG
*DAG
) {
190 assert((N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
&&
191 !N
->hasAnyUseOfValue(N
->getNumValues() - 1)) &&
192 "expected an unused glue value");
194 CloneNodeWithValues(N
, DAG
,
195 ArrayRef(N
->value_begin(), N
->getNumValues() - 1));
198 /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
199 /// This function finds loads of the same base and different offsets. If the
200 /// offsets are not far apart (target specific), it add MVT::Glue inputs and
201 /// outputs to ensure they are scheduled together and in order. This
202 /// optimization may benefit some targets by improving cache locality.
203 void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode
*Node
) {
205 unsigned NumOps
= Node
->getNumOperands();
206 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Other
)
207 Chain
= Node
->getOperand(NumOps
-1);
211 // Skip any load instruction that has a tied input. There may be an additional
212 // dependency requiring a different order than by increasing offsets, and the
213 // added glue may introduce a cycle.
214 auto hasTiedInput
= [this](const SDNode
*N
) {
215 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
216 for (unsigned I
= 0; I
!= MCID
.getNumOperands(); ++I
) {
217 if (MCID
.getOperandConstraint(I
, MCOI::TIED_TO
) != -1)
224 // Look for other loads of the same chain. Find loads that are loading from
225 // the same base pointer and different offsets.
226 SmallPtrSet
<SDNode
*, 16> Visited
;
227 SmallVector
<int64_t, 4> Offsets
;
228 DenseMap
<long long, SDNode
*> O2SMap
; // Map from offset to SDNode.
229 bool Cluster
= false;
232 if (hasTiedInput(Base
))
235 // This algorithm requires a reasonably low use count before finding a match
236 // to avoid uselessly blowing up compile time in large blocks.
237 unsigned UseCount
= 0;
238 for (SDNode::use_iterator I
= Chain
->use_begin(), E
= Chain
->use_end();
239 I
!= E
&& UseCount
< 100; ++I
, ++UseCount
) {
240 if (I
.getUse().getResNo() != Chain
.getResNo())
244 if (User
== Node
|| !Visited
.insert(User
).second
)
246 int64_t Offset1
, Offset2
;
247 if (!TII
->areLoadsFromSameBasePtr(Base
, User
, Offset1
, Offset2
) ||
248 Offset1
== Offset2
||
249 hasTiedInput(User
)) {
250 // FIXME: Should be ok if they addresses are identical. But earlier
251 // optimizations really should have eliminated one of the loads.
254 if (O2SMap
.insert(std::make_pair(Offset1
, Base
)).second
)
255 Offsets
.push_back(Offset1
);
256 O2SMap
.insert(std::make_pair(Offset2
, User
));
257 Offsets
.push_back(Offset2
);
258 if (Offset2
< Offset1
)
261 // Reset UseCount to allow more matches.
268 // Sort them in increasing order.
271 // Check if the loads are close enough.
272 SmallVector
<SDNode
*, 4> Loads
;
273 unsigned NumLoads
= 0;
274 int64_t BaseOff
= Offsets
[0];
275 SDNode
*BaseLoad
= O2SMap
[BaseOff
];
276 Loads
.push_back(BaseLoad
);
277 for (unsigned i
= 1, e
= Offsets
.size(); i
!= e
; ++i
) {
278 int64_t Offset
= Offsets
[i
];
279 SDNode
*Load
= O2SMap
[Offset
];
280 if (!TII
->shouldScheduleLoadsNear(BaseLoad
, Load
, BaseOff
, Offset
,NumLoads
))
281 break; // Stop right here. Ignore loads that are further away.
282 Loads
.push_back(Load
);
289 // Cluster loads by adding MVT::Glue outputs and inputs. This also
290 // ensure they are scheduled in order of increasing addresses.
291 SDNode
*Lead
= Loads
[0];
293 if (AddGlue(Lead
, InGlue
, true, DAG
))
294 InGlue
= SDValue(Lead
, Lead
->getNumValues() - 1);
295 for (unsigned I
= 1, E
= Loads
.size(); I
!= E
; ++I
) {
296 bool OutGlue
= I
< E
- 1;
297 SDNode
*Load
= Loads
[I
];
299 // If AddGlue fails, we could leave an unsused glue value. This should not
301 if (AddGlue(Load
, InGlue
, OutGlue
, DAG
)) {
303 InGlue
= SDValue(Load
, Load
->getNumValues() - 1);
307 else if (!OutGlue
&& InGlue
.getNode())
308 RemoveUnusedGlue(InGlue
.getNode(), DAG
);
312 /// ClusterNodes - Cluster certain nodes which should be scheduled together.
314 void ScheduleDAGSDNodes::ClusterNodes() {
315 for (SDNode
&NI
: DAG
->allnodes()) {
317 if (!Node
|| !Node
->isMachineOpcode())
320 unsigned Opc
= Node
->getMachineOpcode();
321 const MCInstrDesc
&MCID
= TII
->get(Opc
);
323 // Cluster loads from "near" addresses into combined SUnits.
324 ClusterNeighboringLoads(Node
);
328 void ScheduleDAGSDNodes::BuildSchedUnits() {
329 // During scheduling, the NodeId field of SDNode is used to map SDNodes
330 // to their associated SUnits by holding SUnits table indices. A value
331 // of -1 means the SDNode does not yet have an associated SUnit.
332 unsigned NumNodes
= 0;
333 for (SDNode
&NI
: DAG
->allnodes()) {
338 // Reserve entries in the vector for each of the SUnits we are creating. This
339 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
341 // FIXME: Multiply by 2 because we may clone nodes during scheduling.
342 // This is a temporary workaround.
343 SUnits
.reserve(NumNodes
* 2);
345 // Add all nodes in depth first order.
346 SmallVector
<SDNode
*, 64> Worklist
;
347 SmallPtrSet
<SDNode
*, 32> Visited
;
348 Worklist
.push_back(DAG
->getRoot().getNode());
349 Visited
.insert(DAG
->getRoot().getNode());
351 SmallVector
<SUnit
*, 8> CallSUnits
;
352 while (!Worklist
.empty()) {
353 SDNode
*NI
= Worklist
.pop_back_val();
355 // Add all operands to the worklist unless they've already been added.
356 for (const SDValue
&Op
: NI
->op_values())
357 if (Visited
.insert(Op
.getNode()).second
)
358 Worklist
.push_back(Op
.getNode());
360 if (isPassiveNode(NI
)) // Leaf node, e.g. a TargetImmediate.
363 // If this node has already been processed, stop now.
364 if (NI
->getNodeId() != -1) continue;
366 SUnit
*NodeSUnit
= newSUnit(NI
);
368 // See if anything is glued to this node, if so, add them to glued
369 // nodes. Nodes can have at most one glue input and one glue output. Glue
370 // is required to be the last operand and result of a node.
372 // Scan up to find glued preds.
374 while (N
->getNumOperands() &&
375 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
376 N
= N
->getOperand(N
->getNumOperands()-1).getNode();
377 assert(N
->getNodeId() == -1 && "Node already inserted!");
378 N
->setNodeId(NodeSUnit
->NodeNum
);
379 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
380 NodeSUnit
->isCall
= true;
383 // Scan down to find any glued succs.
385 while (N
->getValueType(N
->getNumValues()-1) == MVT::Glue
) {
386 SDValue
GlueVal(N
, N
->getNumValues()-1);
388 // There are either zero or one users of the Glue result.
389 bool HasGlueUse
= false;
390 for (SDNode
*U
: N
->uses())
391 if (GlueVal
.isOperandOf(U
)) {
393 assert(N
->getNodeId() == -1 && "Node already inserted!");
394 N
->setNodeId(NodeSUnit
->NodeNum
);
396 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
397 NodeSUnit
->isCall
= true;
400 if (!HasGlueUse
) break;
403 if (NodeSUnit
->isCall
)
404 CallSUnits
.push_back(NodeSUnit
);
406 // Schedule zero-latency TokenFactor below any nodes that may increase the
407 // schedule height. Otherwise, ancestors of the TokenFactor may appear to
408 // have false stalls.
409 if (NI
->getOpcode() == ISD::TokenFactor
)
410 NodeSUnit
->isScheduleLow
= true;
412 // If there are glue operands involved, N is now the bottom-most node
413 // of the sequence of nodes that are glued together.
415 NodeSUnit
->setNode(N
);
416 assert(N
->getNodeId() == -1 && "Node already inserted!");
417 N
->setNodeId(NodeSUnit
->NodeNum
);
419 // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
420 InitNumRegDefsLeft(NodeSUnit
);
422 // Assign the Latency field of NodeSUnit using target-provided information.
423 computeLatency(NodeSUnit
);
426 // Find all call operands.
427 while (!CallSUnits
.empty()) {
428 SUnit
*SU
= CallSUnits
.pop_back_val();
429 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
430 SUNode
= SUNode
->getGluedNode()) {
431 if (SUNode
->getOpcode() != ISD::CopyToReg
)
433 SDNode
*SrcN
= SUNode
->getOperand(2).getNode();
434 if (isPassiveNode(SrcN
)) continue; // Not scheduled.
435 SUnit
*SrcSU
= &SUnits
[SrcN
->getNodeId()];
436 SrcSU
->isCallOp
= true;
441 void ScheduleDAGSDNodes::AddSchedEdges() {
442 const TargetSubtargetInfo
&ST
= MF
.getSubtarget();
444 // Check to see if the scheduler cares about latencies.
445 bool UnitLatencies
= forceUnitLatencies();
447 // Pass 2: add the preds, succs, etc.
448 for (SUnit
&SU
: SUnits
) {
449 SDNode
*MainNode
= SU
.getNode();
451 if (MainNode
->isMachineOpcode()) {
452 unsigned Opc
= MainNode
->getMachineOpcode();
453 const MCInstrDesc
&MCID
= TII
->get(Opc
);
454 for (unsigned i
= 0; i
!= MCID
.getNumOperands(); ++i
) {
455 if (MCID
.getOperandConstraint(i
, MCOI::TIED_TO
) != -1) {
456 SU
.isTwoAddress
= true;
460 if (MCID
.isCommutable())
461 SU
.isCommutable
= true;
464 // Find all predecessors and successors of the group.
465 for (SDNode
*N
= SU
.getNode(); N
; N
= N
->getGluedNode()) {
466 if (N
->isMachineOpcode() &&
467 !TII
->get(N
->getMachineOpcode()).implicit_defs().empty()) {
468 SU
.hasPhysRegClobbers
= true;
469 unsigned NumUsed
= InstrEmitter::CountResults(N
);
470 while (NumUsed
!= 0 && !N
->hasAnyUseOfValue(NumUsed
- 1))
471 --NumUsed
; // Skip over unused values at the end.
472 if (NumUsed
> TII
->get(N
->getMachineOpcode()).getNumDefs())
473 SU
.hasPhysRegDefs
= true;
476 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
477 SDNode
*OpN
= N
->getOperand(i
).getNode();
478 unsigned DefIdx
= N
->getOperand(i
).getResNo();
479 if (isPassiveNode(OpN
)) continue; // Not scheduled.
480 SUnit
*OpSU
= &SUnits
[OpN
->getNodeId()];
481 assert(OpSU
&& "Node has no SUnit!");
483 continue; // In the same group.
485 EVT OpVT
= N
->getOperand(i
).getValueType();
486 assert(OpVT
!= MVT::Glue
&& "Glued nodes should be in same sunit!");
487 bool isChain
= OpVT
== MVT::Other
;
489 unsigned PhysReg
= 0;
491 // Determine if this is a physical register dependency.
492 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
493 CheckForPhysRegDependency(OpN
, N
, i
, TRI
, TII
, TLI
, PhysReg
, Cost
);
494 assert((PhysReg
== 0 || !isChain
) &&
495 "Chain dependence via physreg data?");
496 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
497 // emits a copy from the physical register to a virtual register unless
498 // it requires a cross class copy (cost < 0). That means we are only
499 // treating "expensive to copy" register dependency as physical register
500 // dependency. This may change in the future though.
501 if (Cost
>= 0 && !StressSched
)
504 // If this is a ctrl dep, latency is 1.
505 unsigned OpLatency
= isChain
? 1 : OpSU
->Latency
;
506 // Special-case TokenFactor chains as zero-latency.
507 if(isChain
&& OpN
->getOpcode() == ISD::TokenFactor
)
510 SDep Dep
= isChain
? SDep(OpSU
, SDep::Barrier
)
511 : SDep(OpSU
, SDep::Data
, PhysReg
);
512 Dep
.setLatency(OpLatency
);
513 if (!isChain
&& !UnitLatencies
) {
514 computeOperandLatency(OpN
, N
, i
, Dep
);
515 ST
.adjustSchedDependency(OpSU
, DefIdx
, &SU
, i
, Dep
);
518 if (!SU
.addPred(Dep
) && !Dep
.isCtrl() && OpSU
->NumRegDefsLeft
> 1) {
519 // Multiple register uses are combined in the same SUnit. For example,
520 // we could have a set of glued nodes with all their defs consumed by
521 // another set of glued nodes. Register pressure tracking sees this as
522 // a single use, so to keep pressure balanced we reduce the defs.
524 // We can't tell (without more book-keeping) if this results from
525 // glued nodes or duplicate operands. As long as we don't reduce
526 // NumRegDefsLeft to zero, we handle the common cases well.
527 --OpSU
->NumRegDefsLeft
;
534 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
535 /// are input. This SUnit graph is similar to the SelectionDAG, but
536 /// excludes nodes that aren't interesting to scheduling, and represents
537 /// glued together nodes with a single SUnit.
538 void ScheduleDAGSDNodes::BuildSchedGraph(AAResults
*AA
) {
539 // Cluster certain nodes which should be scheduled together.
541 // Populate the SUnits array.
543 // Compute all the scheduling dependencies between nodes.
547 // Initialize NumNodeDefs for the current Node's opcode.
548 void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
549 // Check for phys reg copy.
553 if (!Node
->isMachineOpcode()) {
554 if (Node
->getOpcode() == ISD::CopyFromReg
)
560 unsigned POpc
= Node
->getMachineOpcode();
561 if (POpc
== TargetOpcode::IMPLICIT_DEF
) {
562 // No register need be allocated for this.
566 if (POpc
== TargetOpcode::PATCHPOINT
&&
567 Node
->getValueType(0) == MVT::Other
) {
568 // PATCHPOINT is defined to have one result, but it might really have none
569 // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
574 unsigned NRegDefs
= SchedDAG
->TII
->get(Node
->getMachineOpcode()).getNumDefs();
575 // Some instructions define regs that are not represented in the selection DAG
576 // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
577 NodeNumDefs
= std::min(Node
->getNumValues(), NRegDefs
);
581 // Construct a RegDefIter for this SUnit and find the first valid value.
582 ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit
*SU
,
583 const ScheduleDAGSDNodes
*SD
)
584 : SchedDAG(SD
), Node(SU
->getNode()) {
589 // Advance to the next valid value defined by the SUnit.
590 void ScheduleDAGSDNodes::RegDefIter::Advance() {
591 for (;Node
;) { // Visit all glued nodes.
592 for (;DefIdx
< NodeNumDefs
; ++DefIdx
) {
593 if (!Node
->hasAnyUseOfValue(DefIdx
))
595 ValueType
= Node
->getSimpleValueType(DefIdx
);
597 return; // Found a normal regdef.
599 Node
= Node
->getGluedNode();
601 return; // No values left to visit.
607 void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit
*SU
) {
608 assert(SU
->NumRegDefsLeft
== 0 && "expect a new node");
609 for (RegDefIter
I(SU
, this); I
.IsValid(); I
.Advance()) {
610 assert(SU
->NumRegDefsLeft
< USHRT_MAX
&& "overflow is ok but unexpected");
611 ++SU
->NumRegDefsLeft
;
615 void ScheduleDAGSDNodes::computeLatency(SUnit
*SU
) {
616 SDNode
*N
= SU
->getNode();
618 // TokenFactor operands are considered zero latency, and some schedulers
619 // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
620 // whenever node latency is nonzero.
621 if (N
&& N
->getOpcode() == ISD::TokenFactor
) {
626 // Check to see if the scheduler cares about latencies.
627 if (forceUnitLatencies()) {
632 if (!InstrItins
|| InstrItins
->isEmpty()) {
633 if (N
&& N
->isMachineOpcode() &&
634 TII
->isHighLatencyDef(N
->getMachineOpcode()))
635 SU
->Latency
= HighLatencyCycles
;
641 // Compute the latency for the node. We use the sum of the latencies for
642 // all nodes glued together into this SUnit.
644 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode())
645 if (N
->isMachineOpcode())
646 SU
->Latency
+= TII
->getInstrLatency(InstrItins
, N
);
649 void ScheduleDAGSDNodes::computeOperandLatency(SDNode
*Def
, SDNode
*Use
,
650 unsigned OpIdx
, SDep
& dep
) const{
651 // Check to see if the scheduler cares about latencies.
652 if (forceUnitLatencies())
655 if (dep
.getKind() != SDep::Data
)
658 unsigned DefIdx
= Use
->getOperand(OpIdx
).getResNo();
659 if (Use
->isMachineOpcode())
660 // Adjust the use operand index by num of defs.
661 OpIdx
+= TII
->get(Use
->getMachineOpcode()).getNumDefs();
662 int Latency
= TII
->getOperandLatency(InstrItins
, Def
, DefIdx
, Use
, OpIdx
);
663 if (Latency
> 1 && Use
->getOpcode() == ISD::CopyToReg
&&
665 unsigned Reg
= cast
<RegisterSDNode
>(Use
->getOperand(1))->getReg();
666 if (Register::isVirtualRegister(Reg
))
667 // This copy is a liveout value. It is likely coalesced, so reduce the
668 // latency so not to penalize the def.
669 // FIXME: need target specific adjustment here?
670 Latency
= Latency
- 1;
673 dep
.setLatency(Latency
);
676 void ScheduleDAGSDNodes::dumpNode(const SUnit
&SU
) const {
677 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
682 dbgs() << "PHYS REG COPY\n";
686 SU
.getNode()->dump(DAG
);
688 SmallVector
<SDNode
*, 4> GluedNodes
;
689 for (SDNode
*N
= SU
.getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
690 GluedNodes
.push_back(N
);
691 while (!GluedNodes
.empty()) {
693 GluedNodes
.back()->dump(DAG
);
695 GluedNodes
.pop_back();
700 void ScheduleDAGSDNodes::dump() const {
701 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
702 if (EntrySU
.getNode() != nullptr)
703 dumpNodeAll(EntrySU
);
704 for (const SUnit
&SU
: SUnits
)
706 if (ExitSU
.getNode() != nullptr)
711 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
712 void ScheduleDAGSDNodes::dumpSchedule() const {
713 for (const SUnit
*SU
: Sequence
) {
717 dbgs() << "**** NOOP ****\n";
723 /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
724 /// their state is consistent with the nodes listed in Sequence.
726 void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp
) {
727 unsigned ScheduledNodes
= ScheduleDAG::VerifyScheduledDAG(isBottomUp
);
728 unsigned Noops
= llvm::count(Sequence
, nullptr);
729 assert(Sequence
.size() - Noops
== ScheduledNodes
&&
730 "The number of nodes scheduled doesn't match the expected number!");
734 /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
736 ProcessSDDbgValues(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
737 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*> > &Orders
,
738 DenseMap
<SDValue
, Register
> &VRBaseMap
, unsigned Order
) {
739 if (!N
->getHasDebugValue())
742 /// Returns true if \p DV has any VReg operand locations which don't exist in
744 auto HasUnknownVReg
= [&VRBaseMap
](SDDbgValue
*DV
) {
745 for (const SDDbgOperand
&L
: DV
->getLocationOps()) {
746 if (L
.getKind() == SDDbgOperand::SDNODE
&&
747 VRBaseMap
.count({L
.getSDNode(), L
.getResNo()}) == 0)
753 // Opportunistically insert immediate dbg_value uses, i.e. those with the same
754 // source order number as N.
755 MachineBasicBlock
*BB
= Emitter
.getBlock();
756 MachineBasicBlock::iterator InsertPos
= Emitter
.getInsertPos();
757 for (auto *DV
: DAG
->GetDbgValues(N
)) {
760 unsigned DVOrder
= DV
->getOrder();
761 if (Order
!= 0 && DVOrder
!= Order
)
763 // If DV has any VReg location operands which haven't been mapped then
764 // either that node is no longer available or we just haven't visited the
765 // node yet. In the former case we should emit an undef dbg_value, but we
766 // can do it later. And for the latter we'll want to wait until all
767 // dependent nodes have been visited.
768 if (!DV
->isInvalidated() && HasUnknownVReg(DV
))
770 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(DV
, VRBaseMap
);
773 Orders
.push_back({DVOrder
, DbgMI
});
774 BB
->insert(InsertPos
, DbgMI
);
778 // ProcessSourceNode - Process nodes with source order numbers. These are added
779 // to a vector which EmitSchedule uses to determine how to insert dbg_value
780 // instructions in the right order.
782 ProcessSourceNode(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
783 DenseMap
<SDValue
, Register
> &VRBaseMap
,
784 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*>> &Orders
,
785 SmallSet
<Register
, 8> &Seen
, MachineInstr
*NewInsn
) {
786 unsigned Order
= N
->getIROrder();
787 if (!Order
|| Seen
.count(Order
)) {
788 // Process any valid SDDbgValues even if node does not have any order
790 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, 0);
794 // If a new instruction was generated for this Order number, record it.
795 // Otherwise, leave this order number unseen: we will either find later
796 // instructions for it, or leave it unseen if there were no instructions at
800 Orders
.push_back({Order
, NewInsn
});
803 // Even if no instruction was generated, a Value may have become defined via
804 // earlier nodes. Try to process them now.
805 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, Order
);
808 void ScheduleDAGSDNodes::
809 EmitPhysRegCopy(SUnit
*SU
, DenseMap
<SUnit
*, Register
> &VRBaseMap
,
810 MachineBasicBlock::iterator InsertPos
) {
811 for (const SDep
&Pred
: SU
->Preds
) {
813 continue; // ignore chain preds
814 if (Pred
.getSUnit()->CopyDstRC
) {
815 // Copy to physical register.
816 DenseMap
<SUnit
*, Register
>::iterator VRI
=
817 VRBaseMap
.find(Pred
.getSUnit());
818 assert(VRI
!= VRBaseMap
.end() && "Node emitted out of order - late");
819 // Find the destination physical register.
821 for (const SDep
&Succ
: SU
->Succs
) {
823 continue; // ignore chain preds
829 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), Reg
)
830 .addReg(VRI
->second
);
832 // Copy from physical register.
833 assert(Pred
.getReg() && "Unknown physical register!");
834 Register VRBase
= MRI
.createVirtualRegister(SU
->CopyDstRC
);
835 bool isNew
= VRBaseMap
.insert(std::make_pair(SU
, VRBase
)).second
;
836 (void)isNew
; // Silence compiler warning.
837 assert(isNew
&& "Node emitted out of order - early");
838 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), VRBase
)
839 .addReg(Pred
.getReg());
845 /// EmitSchedule - Emit the machine code in scheduled order. Return the new
846 /// InsertPos and MachineBasicBlock that contains this insertion
847 /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
848 /// not necessarily refer to returned BB. The emitter may split blocks.
849 MachineBasicBlock
*ScheduleDAGSDNodes::
850 EmitSchedule(MachineBasicBlock::iterator
&InsertPos
) {
851 InstrEmitter
Emitter(DAG
->getTarget(), BB
, InsertPos
);
852 DenseMap
<SDValue
, Register
> VRBaseMap
;
853 DenseMap
<SUnit
*, Register
> CopyVRBaseMap
;
854 SmallVector
<std::pair
<unsigned, MachineInstr
*>, 32> Orders
;
855 SmallSet
<Register
, 8> Seen
;
856 bool HasDbg
= DAG
->hasDebugValues();
858 // Emit a node, and determine where its first instruction is for debuginfo.
859 // Zero, one, or multiple instructions can be created when emitting a node.
861 [&](SDNode
*Node
, bool IsClone
, bool IsCloned
,
862 DenseMap
<SDValue
, Register
> &VRBaseMap
) -> MachineInstr
* {
863 // Fetch instruction prior to this, or end() if nonexistant.
864 auto GetPrevInsn
= [&](MachineBasicBlock::iterator I
) {
865 if (I
== BB
->begin())
868 return std::prev(Emitter
.getInsertPos());
871 MachineBasicBlock::iterator Before
= GetPrevInsn(Emitter
.getInsertPos());
872 Emitter
.EmitNode(Node
, IsClone
, IsCloned
, VRBaseMap
);
873 MachineBasicBlock::iterator After
= GetPrevInsn(Emitter
.getInsertPos());
875 // If the iterator did not change, no instructions were inserted.
880 if (Before
== BB
->end()) {
881 // There were no prior instructions; the new ones must start at the
882 // beginning of the block.
883 MI
= &Emitter
.getBlock()->instr_front();
885 // Return first instruction after the pre-existing instructions.
886 MI
= &*std::next(Before
);
889 if (MI
->isCandidateForCallSiteEntry() &&
890 DAG
->getTarget().Options
.EmitCallSiteInfo
)
891 MF
.addCallArgsForwardingRegs(MI
, DAG
->getCallSiteInfo(Node
));
893 if (DAG
->getNoMergeSiteInfo(Node
)) {
894 MI
->setFlag(MachineInstr::MIFlag::NoMerge
);
897 if (MDNode
*MD
= DAG
->getPCSections(Node
))
898 MI
->setPCSections(MF
, MD
);
903 // If this is the first BB, emit byval parameter dbg_value's.
904 if (HasDbg
&& BB
->getParent()->begin() == MachineFunction::iterator(BB
)) {
905 SDDbgInfo::DbgIterator PDI
= DAG
->ByvalParmDbgBegin();
906 SDDbgInfo::DbgIterator PDE
= DAG
->ByvalParmDbgEnd();
907 for (; PDI
!= PDE
; ++PDI
) {
908 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*PDI
, VRBaseMap
);
910 BB
->insert(InsertPos
, DbgMI
);
911 // We re-emit the dbg_value closer to its use, too, after instructions
912 // are emitted to the BB.
913 (*PDI
)->clearIsEmitted();
918 for (SUnit
*SU
: Sequence
) {
920 // Null SUnit* is a noop.
921 TII
->insertNoop(*Emitter
.getBlock(), InsertPos
);
925 // For pre-regalloc scheduling, create instructions corresponding to the
926 // SDNode and any glued SDNodes and append them to the block.
927 if (!SU
->getNode()) {
929 EmitPhysRegCopy(SU
, CopyVRBaseMap
, InsertPos
);
933 SmallVector
<SDNode
*, 4> GluedNodes
;
934 for (SDNode
*N
= SU
->getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
935 GluedNodes
.push_back(N
);
936 while (!GluedNodes
.empty()) {
937 SDNode
*N
= GluedNodes
.back();
938 auto NewInsn
= EmitNode(N
, SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
939 // Remember the source order of the inserted instruction.
941 ProcessSourceNode(N
, DAG
, Emitter
, VRBaseMap
, Orders
, Seen
, NewInsn
);
943 if (MDNode
*MD
= DAG
->getHeapAllocSite(N
))
944 if (NewInsn
&& NewInsn
->isCall())
945 NewInsn
->setHeapAllocMarker(MF
, MD
);
947 GluedNodes
.pop_back();
950 EmitNode(SU
->getNode(), SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
951 // Remember the source order of the inserted instruction.
953 ProcessSourceNode(SU
->getNode(), DAG
, Emitter
, VRBaseMap
, Orders
, Seen
,
956 if (MDNode
*MD
= DAG
->getHeapAllocSite(SU
->getNode())) {
957 if (NewInsn
&& NewInsn
->isCall())
958 NewInsn
->setHeapAllocMarker(MF
, MD
);
962 // Insert all the dbg_values which have not already been inserted in source
965 MachineBasicBlock::iterator BBBegin
= BB
->getFirstNonPHI();
967 // Sort the source order instructions and use the order to insert debug
968 // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
969 // regardless of the host's implementation fo std::sort.
970 llvm::stable_sort(Orders
, less_first());
971 std::stable_sort(DAG
->DbgBegin(), DAG
->DbgEnd(),
972 [](const SDDbgValue
*LHS
, const SDDbgValue
*RHS
) {
973 return LHS
->getOrder() < RHS
->getOrder();
976 SDDbgInfo::DbgIterator DI
= DAG
->DbgBegin();
977 SDDbgInfo::DbgIterator DE
= DAG
->DbgEnd();
978 // Now emit the rest according to source order.
979 unsigned LastOrder
= 0;
980 for (unsigned i
= 0, e
= Orders
.size(); i
!= e
&& DI
!= DE
; ++i
) {
981 unsigned Order
= Orders
[i
].first
;
982 MachineInstr
*MI
= Orders
[i
].second
;
983 // Insert all SDDbgValue's whose order(s) are before "Order".
985 for (; DI
!= DE
; ++DI
) {
986 if ((*DI
)->getOrder() < LastOrder
|| (*DI
)->getOrder() >= Order
)
988 if ((*DI
)->isEmitted())
991 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
);
994 // Insert to start of the BB (after PHIs).
995 BB
->insert(BBBegin
, DbgMI
);
997 // Insert at the instruction, which may be in a different
998 // block, if the block was split by a custom inserter.
999 MachineBasicBlock::iterator Pos
= MI
;
1000 MI
->getParent()->insert(Pos
, DbgMI
);
1006 // Add trailing DbgValue's before the terminator. FIXME: May want to add
1007 // some of them before one or more conditional branches?
1008 SmallVector
<MachineInstr
*, 8> DbgMIs
;
1009 for (; DI
!= DE
; ++DI
) {
1010 if ((*DI
)->isEmitted())
1012 assert((*DI
)->getOrder() >= LastOrder
&&
1013 "emitting DBG_VALUE out of order");
1014 if (MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
))
1015 DbgMIs
.push_back(DbgMI
);
1018 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
1019 MachineBasicBlock::iterator Pos
= InsertBB
->getFirstTerminator();
1020 InsertBB
->insert(Pos
, DbgMIs
.begin(), DbgMIs
.end());
1022 SDDbgInfo::DbgLabelIterator DLI
= DAG
->DbgLabelBegin();
1023 SDDbgInfo::DbgLabelIterator DLE
= DAG
->DbgLabelEnd();
1024 // Now emit the rest according to source order.
1026 for (const auto &InstrOrder
: Orders
) {
1027 unsigned Order
= InstrOrder
.first
;
1028 MachineInstr
*MI
= InstrOrder
.second
;
1032 // Insert all SDDbgLabel's whose order(s) are before "Order".
1033 for (; DLI
!= DLE
&&
1034 (*DLI
)->getOrder() >= LastOrder
&& (*DLI
)->getOrder() < Order
;
1036 MachineInstr
*DbgMI
= Emitter
.EmitDbgLabel(*DLI
);
1039 // Insert to start of the BB (after PHIs).
1040 BB
->insert(BBBegin
, DbgMI
);
1042 // Insert at the instruction, which may be in a different
1043 // block, if the block was split by a custom inserter.
1044 MachineBasicBlock::iterator Pos
= MI
;
1045 MI
->getParent()->insert(Pos
, DbgMI
);
1056 InsertPos
= Emitter
.getInsertPos();
1057 // In some cases, DBG_VALUEs might be inserted after the first terminator,
1058 // which results in an invalid MBB. If that happens, move the DBG_VALUEs
1059 // before the first terminator.
1060 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
1061 auto FirstTerm
= InsertBB
->getFirstTerminator();
1062 if (FirstTerm
!= InsertBB
->end()) {
1063 assert(!FirstTerm
->isDebugValue() &&
1064 "first terminator cannot be a debug value");
1065 for (MachineInstr
&MI
: make_early_inc_range(
1066 make_range(std::next(FirstTerm
), InsertBB
->end()))) {
1067 // Only scan up to insertion point.
1068 if (&MI
== InsertPos
)
1071 if (!MI
.isDebugValue())
1074 // The DBG_VALUE was referencing a value produced by a terminator. By
1075 // moving the DBG_VALUE, the referenced value also needs invalidating.
1076 MI
.getOperand(0).ChangeToRegister(0, false);
1077 MI
.moveBefore(&*FirstTerm
);
1083 /// Return the basic block label.
1084 std::string
ScheduleDAGSDNodes::getDAGName() const {
1085 return "sunit-dag." + BB
->getFullName();