1 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements the ScheduleDAG class, which is a base class used by
10 // scheduling implementation classes.
12 //===----------------------------------------------------------------------===//
14 #include "ScheduleDAGSDNodes.h"
15 #include "InstrEmitter.h"
16 #include "SDNodeDbgValue.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
31 #include "llvm/MC/MCInstrItineraries.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Target/TargetMachine.h"
38 #define DEBUG_TYPE "pre-RA-sched"
40 STATISTIC(LoadsClustered
, "Number of loads clustered together");
42 // This allows the latency-based scheduler to notice high latency instructions
43 // without a target itinerary. The choice of number here has more to do with
44 // balancing scheduler heuristics than with the actual machine latency.
45 static cl::opt
<int> HighLatencyCycles(
46 "sched-high-latency-cycles", cl::Hidden
, cl::init(10),
47 cl::desc("Roughly estimate the number of cycles that 'long latency' "
48 "instructions take for targets with no itinerary"));
50 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction
&mf
)
51 : ScheduleDAG(mf
), InstrItins(mf
.getSubtarget().getInstrItineraryData()) {}
53 /// Run - perform scheduling.
55 void ScheduleDAGSDNodes::Run(SelectionDAG
*dag
, MachineBasicBlock
*bb
) {
59 // Clear the scheduler's SUnit DAG.
60 ScheduleDAG::clearDAG();
63 // Invoke the target's selection of scheduler.
67 /// NewSUnit - Creates a new SUnit and return a ptr to it.
69 SUnit
*ScheduleDAGSDNodes::newSUnit(SDNode
*N
) {
71 const SUnit
*Addr
= nullptr;
75 SUnits
.emplace_back(N
, (unsigned)SUnits
.size());
76 assert((Addr
== nullptr || Addr
== &SUnits
[0]) &&
77 "SUnits std::vector reallocated on the fly!");
78 SUnits
.back().OrigNode
= &SUnits
.back();
79 SUnit
*SU
= &SUnits
.back();
80 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
82 (N
->isMachineOpcode() &&
83 N
->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
))
84 SU
->SchedulingPref
= Sched::None
;
86 SU
->SchedulingPref
= TLI
.getSchedulingPreference(N
);
90 SUnit
*ScheduleDAGSDNodes::Clone(SUnit
*Old
) {
91 SUnit
*SU
= newSUnit(Old
->getNode());
92 SU
->OrigNode
= Old
->OrigNode
;
93 SU
->Latency
= Old
->Latency
;
94 SU
->isVRegCycle
= Old
->isVRegCycle
;
95 SU
->isCall
= Old
->isCall
;
96 SU
->isCallOp
= Old
->isCallOp
;
97 SU
->isTwoAddress
= Old
->isTwoAddress
;
98 SU
->isCommutable
= Old
->isCommutable
;
99 SU
->hasPhysRegDefs
= Old
->hasPhysRegDefs
;
100 SU
->hasPhysRegClobbers
= Old
->hasPhysRegClobbers
;
101 SU
->isScheduleHigh
= Old
->isScheduleHigh
;
102 SU
->isScheduleLow
= Old
->isScheduleLow
;
103 SU
->SchedulingPref
= Old
->SchedulingPref
;
104 Old
->isCloned
= true;
108 /// CheckForPhysRegDependency - Check if the dependency between def and use of
109 /// a specified operand is a physical register dependency. If so, returns the
110 /// register and the cost of copying the register.
111 static void CheckForPhysRegDependency(SDNode
*Def
, SDNode
*User
, unsigned Op
,
112 const TargetRegisterInfo
*TRI
,
113 const TargetInstrInfo
*TII
,
114 const TargetLowering
&TLI
,
115 unsigned &PhysReg
, int &Cost
) {
116 if (Op
!= 2 || User
->getOpcode() != ISD::CopyToReg
)
119 unsigned Reg
= cast
<RegisterSDNode
>(User
->getOperand(1))->getReg();
120 if (TLI
.checkForPhysRegDependency(Def
, User
, Op
, TRI
, TII
, PhysReg
, Cost
))
123 if (Register::isVirtualRegister(Reg
))
126 unsigned ResNo
= User
->getOperand(2).getResNo();
127 if (Def
->getOpcode() == ISD::CopyFromReg
&&
128 cast
<RegisterSDNode
>(Def
->getOperand(1))->getReg() == Reg
) {
130 } else if (Def
->isMachineOpcode()) {
131 const MCInstrDesc
&II
= TII
->get(Def
->getMachineOpcode());
132 if (ResNo
>= II
.getNumDefs() && II
.hasImplicitDefOfPhysReg(Reg
))
137 const TargetRegisterClass
*RC
=
138 TRI
->getMinimalPhysRegClass(Reg
, Def
->getSimpleValueType(ResNo
));
139 Cost
= RC
->getCopyCost();
143 // Helper for AddGlue to clone node operands.
144 static void CloneNodeWithValues(SDNode
*N
, SelectionDAG
*DAG
, ArrayRef
<EVT
> VTs
,
145 SDValue ExtraOper
= SDValue()) {
146 SmallVector
<SDValue
, 8> Ops(N
->ops());
147 if (ExtraOper
.getNode())
148 Ops
.push_back(ExtraOper
);
150 SDVTList VTList
= DAG
->getVTList(VTs
);
151 MachineSDNode
*MN
= dyn_cast
<MachineSDNode
>(N
);
153 // Store memory references.
154 SmallVector
<MachineMemOperand
*, 2> MMOs
;
156 MMOs
.assign(MN
->memoperands_begin(), MN
->memoperands_end());
158 DAG
->MorphNodeTo(N
, N
->getOpcode(), VTList
, Ops
);
160 // Reset the memory references
162 DAG
->setNodeMemRefs(MN
, MMOs
);
165 static bool AddGlue(SDNode
*N
, SDValue Glue
, bool AddGlue
, SelectionDAG
*DAG
) {
166 SDNode
*GlueDestNode
= Glue
.getNode();
168 // Don't add glue from a node to itself.
169 if (GlueDestNode
== N
) return false;
171 // Don't add a glue operand to something that already uses glue.
173 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
176 // Don't add glue to something that already has a glue value.
177 if (N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
) return false;
179 SmallVector
<EVT
, 4> VTs(N
->values());
181 VTs
.push_back(MVT::Glue
);
183 CloneNodeWithValues(N
, DAG
, VTs
, Glue
);
188 // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
189 // node even though simply shrinking the value list is sufficient.
190 static void RemoveUnusedGlue(SDNode
*N
, SelectionDAG
*DAG
) {
191 assert((N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
&&
192 !N
->hasAnyUseOfValue(N
->getNumValues() - 1)) &&
193 "expected an unused glue value");
195 CloneNodeWithValues(N
, DAG
,
196 ArrayRef(N
->value_begin(), N
->getNumValues() - 1));
199 /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
200 /// This function finds loads of the same base and different offsets. If the
201 /// offsets are not far apart (target specific), it add MVT::Glue inputs and
202 /// outputs to ensure they are scheduled together and in order. This
203 /// optimization may benefit some targets by improving cache locality.
204 void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode
*Node
) {
206 unsigned NumOps
= Node
->getNumOperands();
207 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Other
)
208 Chain
= Node
->getOperand(NumOps
-1);
212 // Skip any load instruction that has a tied input. There may be an additional
213 // dependency requiring a different order than by increasing offsets, and the
214 // added glue may introduce a cycle.
215 auto hasTiedInput
= [this](const SDNode
*N
) {
216 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
217 for (unsigned I
= 0; I
!= MCID
.getNumOperands(); ++I
) {
218 if (MCID
.getOperandConstraint(I
, MCOI::TIED_TO
) != -1)
225 // Look for other loads of the same chain. Find loads that are loading from
226 // the same base pointer and different offsets.
227 SmallPtrSet
<SDNode
*, 16> Visited
;
228 SmallVector
<int64_t, 4> Offsets
;
229 DenseMap
<long long, SDNode
*> O2SMap
; // Map from offset to SDNode.
230 bool Cluster
= false;
233 if (hasTiedInput(Base
))
236 // This algorithm requires a reasonably low use count before finding a match
237 // to avoid uselessly blowing up compile time in large blocks.
238 unsigned UseCount
= 0;
239 for (SDNode::user_iterator I
= Chain
->user_begin(), E
= Chain
->user_end();
240 I
!= E
&& UseCount
< 100; ++I
, ++UseCount
) {
241 if (I
.getUse().getResNo() != Chain
.getResNo())
245 if (User
== Node
|| !Visited
.insert(User
).second
)
247 int64_t Offset1
, Offset2
;
248 if (!TII
->areLoadsFromSameBasePtr(Base
, User
, Offset1
, Offset2
) ||
249 Offset1
== Offset2
||
250 hasTiedInput(User
)) {
251 // FIXME: Should be ok if they addresses are identical. But earlier
252 // optimizations really should have eliminated one of the loads.
255 if (O2SMap
.insert(std::make_pair(Offset1
, Base
)).second
)
256 Offsets
.push_back(Offset1
);
257 O2SMap
.insert(std::make_pair(Offset2
, User
));
258 Offsets
.push_back(Offset2
);
259 if (Offset2
< Offset1
)
262 // Reset UseCount to allow more matches.
269 // Sort them in increasing order.
272 // Check if the loads are close enough.
273 SmallVector
<SDNode
*, 4> Loads
;
274 unsigned NumLoads
= 0;
275 int64_t BaseOff
= Offsets
[0];
276 SDNode
*BaseLoad
= O2SMap
[BaseOff
];
277 Loads
.push_back(BaseLoad
);
278 for (unsigned i
= 1, e
= Offsets
.size(); i
!= e
; ++i
) {
279 int64_t Offset
= Offsets
[i
];
280 SDNode
*Load
= O2SMap
[Offset
];
281 if (!TII
->shouldScheduleLoadsNear(BaseLoad
, Load
, BaseOff
, Offset
,NumLoads
))
282 break; // Stop right here. Ignore loads that are further away.
283 Loads
.push_back(Load
);
290 // Cluster loads by adding MVT::Glue outputs and inputs. This also
291 // ensure they are scheduled in order of increasing addresses.
292 SDNode
*Lead
= Loads
[0];
294 if (AddGlue(Lead
, InGlue
, true, DAG
))
295 InGlue
= SDValue(Lead
, Lead
->getNumValues() - 1);
296 for (unsigned I
= 1, E
= Loads
.size(); I
!= E
; ++I
) {
297 bool OutGlue
= I
< E
- 1;
298 SDNode
*Load
= Loads
[I
];
300 // If AddGlue fails, we could leave an unsused glue value. This should not
302 if (AddGlue(Load
, InGlue
, OutGlue
, DAG
)) {
304 InGlue
= SDValue(Load
, Load
->getNumValues() - 1);
308 else if (!OutGlue
&& InGlue
.getNode())
309 RemoveUnusedGlue(InGlue
.getNode(), DAG
);
313 /// ClusterNodes - Cluster certain nodes which should be scheduled together.
315 void ScheduleDAGSDNodes::ClusterNodes() {
316 for (SDNode
&NI
: DAG
->allnodes()) {
318 if (!Node
|| !Node
->isMachineOpcode())
321 unsigned Opc
= Node
->getMachineOpcode();
322 const MCInstrDesc
&MCID
= TII
->get(Opc
);
324 // Cluster loads from "near" addresses into combined SUnits.
325 ClusterNeighboringLoads(Node
);
329 void ScheduleDAGSDNodes::BuildSchedUnits() {
330 // During scheduling, the NodeId field of SDNode is used to map SDNodes
331 // to their associated SUnits by holding SUnits table indices. A value
332 // of -1 means the SDNode does not yet have an associated SUnit.
333 unsigned NumNodes
= 0;
334 for (SDNode
&NI
: DAG
->allnodes()) {
339 // Reserve entries in the vector for each of the SUnits we are creating. This
340 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
342 // FIXME: Multiply by 2 because we may clone nodes during scheduling.
343 // This is a temporary workaround.
344 SUnits
.reserve(NumNodes
* 2);
346 // Add all nodes in depth first order.
347 SmallVector
<SDNode
*, 64> Worklist
;
348 SmallPtrSet
<SDNode
*, 32> Visited
;
349 Worklist
.push_back(DAG
->getRoot().getNode());
350 Visited
.insert(DAG
->getRoot().getNode());
352 SmallVector
<SUnit
*, 8> CallSUnits
;
353 while (!Worklist
.empty()) {
354 SDNode
*NI
= Worklist
.pop_back_val();
356 // Add all operands to the worklist unless they've already been added.
357 for (const SDValue
&Op
: NI
->op_values())
358 if (Visited
.insert(Op
.getNode()).second
)
359 Worklist
.push_back(Op
.getNode());
361 if (isPassiveNode(NI
)) // Leaf node, e.g. a TargetImmediate.
364 // If this node has already been processed, stop now.
365 if (NI
->getNodeId() != -1) continue;
367 SUnit
*NodeSUnit
= newSUnit(NI
);
369 // See if anything is glued to this node, if so, add them to glued
370 // nodes. Nodes can have at most one glue input and one glue output. Glue
371 // is required to be the last operand and result of a node.
373 // Scan up to find glued preds.
375 while (N
->getNumOperands() &&
376 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
377 N
= N
->getOperand(N
->getNumOperands()-1).getNode();
378 assert(N
->getNodeId() == -1 && "Node already inserted!");
379 N
->setNodeId(NodeSUnit
->NodeNum
);
380 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
381 NodeSUnit
->isCall
= true;
384 // Scan down to find any glued succs.
386 while (N
->getValueType(N
->getNumValues()-1) == MVT::Glue
) {
387 SDValue
GlueVal(N
, N
->getNumValues()-1);
389 // There are either zero or one users of the Glue result.
390 bool HasGlueUse
= false;
391 for (SDNode
*U
: N
->users())
392 if (GlueVal
.isOperandOf(U
)) {
394 assert(N
->getNodeId() == -1 && "Node already inserted!");
395 N
->setNodeId(NodeSUnit
->NodeNum
);
397 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
398 NodeSUnit
->isCall
= true;
401 if (!HasGlueUse
) break;
404 if (NodeSUnit
->isCall
)
405 CallSUnits
.push_back(NodeSUnit
);
407 // Schedule zero-latency TokenFactor below any nodes that may increase the
408 // schedule height. Otherwise, ancestors of the TokenFactor may appear to
409 // have false stalls.
410 if (NI
->getOpcode() == ISD::TokenFactor
)
411 NodeSUnit
->isScheduleLow
= true;
413 // If there are glue operands involved, N is now the bottom-most node
414 // of the sequence of nodes that are glued together.
416 NodeSUnit
->setNode(N
);
417 assert(N
->getNodeId() == -1 && "Node already inserted!");
418 N
->setNodeId(NodeSUnit
->NodeNum
);
420 // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
421 InitNumRegDefsLeft(NodeSUnit
);
423 // Assign the Latency field of NodeSUnit using target-provided information.
424 computeLatency(NodeSUnit
);
427 // Find all call operands.
428 while (!CallSUnits
.empty()) {
429 SUnit
*SU
= CallSUnits
.pop_back_val();
430 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
431 SUNode
= SUNode
->getGluedNode()) {
432 if (SUNode
->getOpcode() != ISD::CopyToReg
)
434 SDNode
*SrcN
= SUNode
->getOperand(2).getNode();
435 if (isPassiveNode(SrcN
)) continue; // Not scheduled.
436 SUnit
*SrcSU
= &SUnits
[SrcN
->getNodeId()];
437 SrcSU
->isCallOp
= true;
442 void ScheduleDAGSDNodes::AddSchedEdges() {
443 const TargetSubtargetInfo
&ST
= MF
.getSubtarget();
445 // Check to see if the scheduler cares about latencies.
446 bool UnitLatencies
= forceUnitLatencies();
448 // Pass 2: add the preds, succs, etc.
449 for (SUnit
&SU
: SUnits
) {
450 SDNode
*MainNode
= SU
.getNode();
452 if (MainNode
->isMachineOpcode()) {
453 unsigned Opc
= MainNode
->getMachineOpcode();
454 const MCInstrDesc
&MCID
= TII
->get(Opc
);
455 for (unsigned i
= 0; i
!= MCID
.getNumOperands(); ++i
) {
456 if (MCID
.getOperandConstraint(i
, MCOI::TIED_TO
) != -1) {
457 SU
.isTwoAddress
= true;
461 if (MCID
.isCommutable())
462 SU
.isCommutable
= true;
465 // Find all predecessors and successors of the group.
466 for (SDNode
*N
= SU
.getNode(); N
; N
= N
->getGluedNode()) {
467 if (N
->isMachineOpcode() &&
468 !TII
->get(N
->getMachineOpcode()).implicit_defs().empty()) {
469 SU
.hasPhysRegClobbers
= true;
470 unsigned NumUsed
= InstrEmitter::CountResults(N
);
471 while (NumUsed
!= 0 && !N
->hasAnyUseOfValue(NumUsed
- 1))
472 --NumUsed
; // Skip over unused values at the end.
473 if (NumUsed
> TII
->get(N
->getMachineOpcode()).getNumDefs())
474 SU
.hasPhysRegDefs
= true;
477 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
478 SDNode
*OpN
= N
->getOperand(i
).getNode();
479 unsigned DefIdx
= N
->getOperand(i
).getResNo();
480 if (isPassiveNode(OpN
)) continue; // Not scheduled.
481 SUnit
*OpSU
= &SUnits
[OpN
->getNodeId()];
482 assert(OpSU
&& "Node has no SUnit!");
484 continue; // In the same group.
486 EVT OpVT
= N
->getOperand(i
).getValueType();
487 assert(OpVT
!= MVT::Glue
&& "Glued nodes should be in same sunit!");
488 bool isChain
= OpVT
== MVT::Other
;
490 unsigned PhysReg
= 0;
492 // Determine if this is a physical register dependency.
493 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
494 CheckForPhysRegDependency(OpN
, N
, i
, TRI
, TII
, TLI
, PhysReg
, Cost
);
495 assert((PhysReg
== 0 || !isChain
) &&
496 "Chain dependence via physreg data?");
497 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
498 // emits a copy from the physical register to a virtual register unless
499 // it requires a cross class copy (cost < 0). That means we are only
500 // treating "expensive to copy" register dependency as physical register
501 // dependency. This may change in the future though.
502 if (Cost
>= 0 && !StressSched
)
505 // If this is a ctrl dep, latency is 1.
506 unsigned OpLatency
= isChain
? 1 : OpSU
->Latency
;
507 // Special-case TokenFactor chains as zero-latency.
508 if(isChain
&& OpN
->getOpcode() == ISD::TokenFactor
)
511 SDep Dep
= isChain
? SDep(OpSU
, SDep::Barrier
)
512 : SDep(OpSU
, SDep::Data
, PhysReg
);
513 Dep
.setLatency(OpLatency
);
514 if (!isChain
&& !UnitLatencies
) {
515 computeOperandLatency(OpN
, N
, i
, Dep
);
516 ST
.adjustSchedDependency(OpSU
, DefIdx
, &SU
, i
, Dep
, nullptr);
519 if (!SU
.addPred(Dep
) && !Dep
.isCtrl() && OpSU
->NumRegDefsLeft
> 1) {
520 // Multiple register uses are combined in the same SUnit. For example,
521 // we could have a set of glued nodes with all their defs consumed by
522 // another set of glued nodes. Register pressure tracking sees this as
523 // a single use, so to keep pressure balanced we reduce the defs.
525 // We can't tell (without more book-keeping) if this results from
526 // glued nodes or duplicate operands. As long as we don't reduce
527 // NumRegDefsLeft to zero, we handle the common cases well.
528 --OpSU
->NumRegDefsLeft
;
535 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
536 /// are input. This SUnit graph is similar to the SelectionDAG, but
537 /// excludes nodes that aren't interesting to scheduling, and represents
538 /// glued together nodes with a single SUnit.
539 void ScheduleDAGSDNodes::BuildSchedGraph(AAResults
*AA
) {
540 // Cluster certain nodes which should be scheduled together.
542 // Populate the SUnits array.
544 // Compute all the scheduling dependencies between nodes.
548 // Initialize NumNodeDefs for the current Node's opcode.
549 void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
550 // Check for phys reg copy.
554 if (!Node
->isMachineOpcode()) {
555 if (Node
->getOpcode() == ISD::CopyFromReg
)
561 unsigned POpc
= Node
->getMachineOpcode();
562 if (POpc
== TargetOpcode::IMPLICIT_DEF
) {
563 // No register need be allocated for this.
567 if (POpc
== TargetOpcode::PATCHPOINT
&&
568 Node
->getValueType(0) == MVT::Other
) {
569 // PATCHPOINT is defined to have one result, but it might really have none
570 // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
575 unsigned NRegDefs
= SchedDAG
->TII
->get(Node
->getMachineOpcode()).getNumDefs();
576 // Some instructions define regs that are not represented in the selection DAG
577 // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
578 NodeNumDefs
= std::min(Node
->getNumValues(), NRegDefs
);
582 // Construct a RegDefIter for this SUnit and find the first valid value.
583 ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit
*SU
,
584 const ScheduleDAGSDNodes
*SD
)
585 : SchedDAG(SD
), Node(SU
->getNode()) {
590 // Advance to the next valid value defined by the SUnit.
591 void ScheduleDAGSDNodes::RegDefIter::Advance() {
592 for (;Node
;) { // Visit all glued nodes.
593 for (;DefIdx
< NodeNumDefs
; ++DefIdx
) {
594 if (!Node
->hasAnyUseOfValue(DefIdx
))
596 ValueType
= Node
->getSimpleValueType(DefIdx
);
598 return; // Found a normal regdef.
600 Node
= Node
->getGluedNode();
602 return; // No values left to visit.
608 void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit
*SU
) {
609 assert(SU
->NumRegDefsLeft
== 0 && "expect a new node");
610 for (RegDefIter
I(SU
, this); I
.IsValid(); I
.Advance()) {
611 assert(SU
->NumRegDefsLeft
< USHRT_MAX
&& "overflow is ok but unexpected");
612 ++SU
->NumRegDefsLeft
;
616 void ScheduleDAGSDNodes::computeLatency(SUnit
*SU
) {
617 SDNode
*N
= SU
->getNode();
619 // TokenFactor operands are considered zero latency, and some schedulers
620 // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
621 // whenever node latency is nonzero.
622 if (N
&& N
->getOpcode() == ISD::TokenFactor
) {
627 // Check to see if the scheduler cares about latencies.
628 if (forceUnitLatencies()) {
633 if (!InstrItins
|| InstrItins
->isEmpty()) {
634 if (N
&& N
->isMachineOpcode() &&
635 TII
->isHighLatencyDef(N
->getMachineOpcode()))
636 SU
->Latency
= HighLatencyCycles
;
642 // Compute the latency for the node. We use the sum of the latencies for
643 // all nodes glued together into this SUnit.
645 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode())
646 if (N
->isMachineOpcode())
647 SU
->Latency
+= TII
->getInstrLatency(InstrItins
, N
);
650 void ScheduleDAGSDNodes::computeOperandLatency(SDNode
*Def
, SDNode
*Use
,
651 unsigned OpIdx
, SDep
& dep
) const{
652 // Check to see if the scheduler cares about latencies.
653 if (forceUnitLatencies())
656 if (dep
.getKind() != SDep::Data
)
659 unsigned DefIdx
= Use
->getOperand(OpIdx
).getResNo();
660 if (Use
->isMachineOpcode())
661 // Adjust the use operand index by num of defs.
662 OpIdx
+= TII
->get(Use
->getMachineOpcode()).getNumDefs();
663 std::optional
<unsigned> Latency
=
664 TII
->getOperandLatency(InstrItins
, Def
, DefIdx
, Use
, OpIdx
);
665 if (Latency
> 1U && Use
->getOpcode() == ISD::CopyToReg
&&
667 unsigned Reg
= cast
<RegisterSDNode
>(Use
->getOperand(1))->getReg();
668 if (Register::isVirtualRegister(Reg
))
669 // This copy is a liveout value. It is likely coalesced, so reduce the
670 // latency so not to penalize the def.
671 // FIXME: need target specific adjustment here?
672 Latency
= *Latency
- 1;
675 dep
.setLatency(*Latency
);
678 void ScheduleDAGSDNodes::dumpNode(const SUnit
&SU
) const {
679 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
684 dbgs() << "PHYS REG COPY\n";
688 SU
.getNode()->dump(DAG
);
690 SmallVector
<SDNode
*, 4> GluedNodes
;
691 for (SDNode
*N
= SU
.getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
692 GluedNodes
.push_back(N
);
693 while (!GluedNodes
.empty()) {
695 GluedNodes
.back()->dump(DAG
);
697 GluedNodes
.pop_back();
702 void ScheduleDAGSDNodes::dump() const {
703 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
704 if (EntrySU
.getNode() != nullptr)
705 dumpNodeAll(EntrySU
);
706 for (const SUnit
&SU
: SUnits
)
708 if (ExitSU
.getNode() != nullptr)
713 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
714 void ScheduleDAGSDNodes::dumpSchedule() const {
715 for (const SUnit
*SU
: Sequence
) {
719 dbgs() << "**** NOOP ****\n";
725 /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
726 /// their state is consistent with the nodes listed in Sequence.
728 void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp
) {
729 unsigned ScheduledNodes
= ScheduleDAG::VerifyScheduledDAG(isBottomUp
);
730 unsigned Noops
= llvm::count(Sequence
, nullptr);
731 assert(Sequence
.size() - Noops
== ScheduledNodes
&&
732 "The number of nodes scheduled doesn't match the expected number!");
736 /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
738 ProcessSDDbgValues(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
739 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*> > &Orders
,
740 InstrEmitter::VRBaseMapType
&VRBaseMap
, unsigned Order
) {
741 if (!N
->getHasDebugValue())
744 /// Returns true if \p DV has any VReg operand locations which don't exist in
746 auto HasUnknownVReg
= [&VRBaseMap
](SDDbgValue
*DV
) {
747 for (const SDDbgOperand
&L
: DV
->getLocationOps()) {
748 if (L
.getKind() == SDDbgOperand::SDNODE
&&
749 VRBaseMap
.count({L
.getSDNode(), L
.getResNo()}) == 0)
755 // Opportunistically insert immediate dbg_value uses, i.e. those with the same
756 // source order number as N.
757 MachineBasicBlock
*BB
= Emitter
.getBlock();
758 MachineBasicBlock::iterator InsertPos
= Emitter
.getInsertPos();
759 for (auto *DV
: DAG
->GetDbgValues(N
)) {
762 unsigned DVOrder
= DV
->getOrder();
763 if (Order
!= 0 && DVOrder
!= Order
)
765 // If DV has any VReg location operands which haven't been mapped then
766 // either that node is no longer available or we just haven't visited the
767 // node yet. In the former case we should emit an undef dbg_value, but we
768 // can do it later. And for the latter we'll want to wait until all
769 // dependent nodes have been visited.
770 if (!DV
->isInvalidated() && HasUnknownVReg(DV
))
772 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(DV
, VRBaseMap
);
775 Orders
.push_back({DVOrder
, DbgMI
});
776 BB
->insert(InsertPos
, DbgMI
);
780 // ProcessSourceNode - Process nodes with source order numbers. These are added
781 // to a vector which EmitSchedule uses to determine how to insert dbg_value
782 // instructions in the right order.
784 ProcessSourceNode(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
785 InstrEmitter::VRBaseMapType
&VRBaseMap
,
786 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*>> &Orders
,
787 SmallSet
<Register
, 8> &Seen
, MachineInstr
*NewInsn
) {
788 unsigned Order
= N
->getIROrder();
789 if (!Order
|| Seen
.count(Order
)) {
790 // Process any valid SDDbgValues even if node does not have any order
792 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, 0);
796 // If a new instruction was generated for this Order number, record it.
797 // Otherwise, leave this order number unseen: we will either find later
798 // instructions for it, or leave it unseen if there were no instructions at
802 Orders
.push_back({Order
, NewInsn
});
805 // Even if no instruction was generated, a Value may have become defined via
806 // earlier nodes. Try to process them now.
807 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, Order
);
810 void ScheduleDAGSDNodes::
811 EmitPhysRegCopy(SUnit
*SU
, SmallDenseMap
<SUnit
*, Register
, 16> &VRBaseMap
,
812 MachineBasicBlock::iterator InsertPos
) {
813 for (const SDep
&Pred
: SU
->Preds
) {
815 continue; // ignore chain preds
816 if (Pred
.getSUnit()->CopyDstRC
) {
817 // Copy to physical register.
818 DenseMap
<SUnit
*, Register
>::iterator VRI
=
819 VRBaseMap
.find(Pred
.getSUnit());
820 assert(VRI
!= VRBaseMap
.end() && "Node emitted out of order - late");
821 // Find the destination physical register.
823 for (const SDep
&Succ
: SU
->Succs
) {
825 continue; // ignore chain preds
831 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), Reg
)
832 .addReg(VRI
->second
);
834 // Copy from physical register.
835 assert(Pred
.getReg() && "Unknown physical register!");
836 Register VRBase
= MRI
.createVirtualRegister(SU
->CopyDstRC
);
837 bool isNew
= VRBaseMap
.insert(std::make_pair(SU
, VRBase
)).second
;
838 (void)isNew
; // Silence compiler warning.
839 assert(isNew
&& "Node emitted out of order - early");
840 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), VRBase
)
841 .addReg(Pred
.getReg());
847 /// EmitSchedule - Emit the machine code in scheduled order. Return the new
848 /// InsertPos and MachineBasicBlock that contains this insertion
849 /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
850 /// not necessarily refer to returned BB. The emitter may split blocks.
851 MachineBasicBlock
*ScheduleDAGSDNodes::
852 EmitSchedule(MachineBasicBlock::iterator
&InsertPos
) {
853 InstrEmitter
Emitter(DAG
->getTarget(), BB
, InsertPos
);
854 InstrEmitter::VRBaseMapType VRBaseMap
;
855 SmallDenseMap
<SUnit
*, Register
, 16> CopyVRBaseMap
;
856 SmallVector
<std::pair
<unsigned, MachineInstr
*>, 32> Orders
;
857 SmallSet
<Register
, 8> Seen
;
858 bool HasDbg
= DAG
->hasDebugValues();
860 // Emit a node, and determine where its first instruction is for debuginfo.
861 // Zero, one, or multiple instructions can be created when emitting a node.
863 [&](SDNode
*Node
, bool IsClone
, bool IsCloned
,
864 InstrEmitter::VRBaseMapType
&VRBaseMap
) -> MachineInstr
* {
865 // Fetch instruction prior to this, or end() if nonexistant.
866 auto GetPrevInsn
= [&](MachineBasicBlock::iterator I
) {
867 if (I
== BB
->begin())
870 return std::prev(Emitter
.getInsertPos());
873 MachineBasicBlock::iterator Before
= GetPrevInsn(Emitter
.getInsertPos());
874 Emitter
.EmitNode(Node
, IsClone
, IsCloned
, VRBaseMap
);
875 MachineBasicBlock::iterator After
= GetPrevInsn(Emitter
.getInsertPos());
877 // If the iterator did not change, no instructions were inserted.
882 if (Before
== BB
->end()) {
883 // There were no prior instructions; the new ones must start at the
884 // beginning of the block.
885 MI
= &Emitter
.getBlock()->instr_front();
887 // Return first instruction after the pre-existing instructions.
888 MI
= &*std::next(Before
);
891 if (MI
->isCandidateForAdditionalCallInfo()) {
892 if (DAG
->getTarget().Options
.EmitCallSiteInfo
)
893 MF
.addCallSiteInfo(MI
, DAG
->getCallSiteInfo(Node
));
895 if (auto CalledGlobal
= DAG
->getCalledGlobal(Node
))
896 if (CalledGlobal
->Callee
)
897 MF
.addCalledGlobal(MI
, *CalledGlobal
);
900 if (DAG
->getNoMergeSiteInfo(Node
)) {
901 MI
->setFlag(MachineInstr::MIFlag::NoMerge
);
904 if (MDNode
*MD
= DAG
->getPCSections(Node
))
905 MI
->setPCSections(MF
, MD
);
907 // Set MMRAs on _all_ added instructions.
908 if (MDNode
*MMRA
= DAG
->getMMRAMetadata(Node
)) {
909 for (MachineBasicBlock::iterator It
= MI
->getIterator(),
910 End
= std::next(After
);
912 It
->setMMRAMetadata(MF
, MMRA
);
918 // If this is the first BB, emit byval parameter dbg_value's.
919 if (HasDbg
&& BB
->getParent()->begin() == MachineFunction::iterator(BB
)) {
920 SDDbgInfo::DbgIterator PDI
= DAG
->ByvalParmDbgBegin();
921 SDDbgInfo::DbgIterator PDE
= DAG
->ByvalParmDbgEnd();
922 for (; PDI
!= PDE
; ++PDI
) {
923 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*PDI
, VRBaseMap
);
925 BB
->insert(InsertPos
, DbgMI
);
926 // We re-emit the dbg_value closer to its use, too, after instructions
927 // are emitted to the BB.
928 (*PDI
)->clearIsEmitted();
933 for (SUnit
*SU
: Sequence
) {
935 // Null SUnit* is a noop.
936 TII
->insertNoop(*Emitter
.getBlock(), InsertPos
);
940 // For pre-regalloc scheduling, create instructions corresponding to the
941 // SDNode and any glued SDNodes and append them to the block.
942 if (!SU
->getNode()) {
944 EmitPhysRegCopy(SU
, CopyVRBaseMap
, InsertPos
);
948 SmallVector
<SDNode
*, 4> GluedNodes
;
949 for (SDNode
*N
= SU
->getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
950 GluedNodes
.push_back(N
);
951 while (!GluedNodes
.empty()) {
952 SDNode
*N
= GluedNodes
.back();
953 auto NewInsn
= EmitNode(N
, SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
954 // Remember the source order of the inserted instruction.
956 ProcessSourceNode(N
, DAG
, Emitter
, VRBaseMap
, Orders
, Seen
, NewInsn
);
958 if (MDNode
*MD
= DAG
->getHeapAllocSite(N
))
959 if (NewInsn
&& NewInsn
->isCall())
960 NewInsn
->setHeapAllocMarker(MF
, MD
);
962 GluedNodes
.pop_back();
965 EmitNode(SU
->getNode(), SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
966 // Remember the source order of the inserted instruction.
968 ProcessSourceNode(SU
->getNode(), DAG
, Emitter
, VRBaseMap
, Orders
, Seen
,
971 if (MDNode
*MD
= DAG
->getHeapAllocSite(SU
->getNode())) {
972 if (NewInsn
&& NewInsn
->isCall())
973 NewInsn
->setHeapAllocMarker(MF
, MD
);
977 // Insert all the dbg_values which have not already been inserted in source
980 MachineBasicBlock::iterator BBBegin
= BB
->getFirstNonPHI();
982 // Sort the source order instructions and use the order to insert debug
983 // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
984 // regardless of the host's implementation fo std::sort.
985 llvm::stable_sort(Orders
, less_first());
986 std::stable_sort(DAG
->DbgBegin(), DAG
->DbgEnd(),
987 [](const SDDbgValue
*LHS
, const SDDbgValue
*RHS
) {
988 return LHS
->getOrder() < RHS
->getOrder();
991 SDDbgInfo::DbgIterator DI
= DAG
->DbgBegin();
992 SDDbgInfo::DbgIterator DE
= DAG
->DbgEnd();
993 // Now emit the rest according to source order.
994 unsigned LastOrder
= 0;
995 for (unsigned i
= 0, e
= Orders
.size(); i
!= e
&& DI
!= DE
; ++i
) {
996 unsigned Order
= Orders
[i
].first
;
997 MachineInstr
*MI
= Orders
[i
].second
;
998 // Insert all SDDbgValue's whose order(s) are before "Order".
1000 for (; DI
!= DE
; ++DI
) {
1001 if ((*DI
)->getOrder() < LastOrder
|| (*DI
)->getOrder() >= Order
)
1003 if ((*DI
)->isEmitted())
1006 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
);
1009 // Insert to start of the BB (after PHIs).
1010 BB
->insert(BBBegin
, DbgMI
);
1012 // Insert at the instruction, which may be in a different
1013 // block, if the block was split by a custom inserter.
1014 MachineBasicBlock::iterator Pos
= MI
;
1015 MI
->getParent()->insert(Pos
, DbgMI
);
1021 // Add trailing DbgValue's before the terminator. FIXME: May want to add
1022 // some of them before one or more conditional branches?
1023 SmallVector
<MachineInstr
*, 8> DbgMIs
;
1024 for (; DI
!= DE
; ++DI
) {
1025 if ((*DI
)->isEmitted())
1027 assert((*DI
)->getOrder() >= LastOrder
&&
1028 "emitting DBG_VALUE out of order");
1029 if (MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
))
1030 DbgMIs
.push_back(DbgMI
);
1033 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
1034 MachineBasicBlock::iterator Pos
= InsertBB
->getFirstTerminator();
1035 InsertBB
->insert(Pos
, DbgMIs
.begin(), DbgMIs
.end());
1037 SDDbgInfo::DbgLabelIterator DLI
= DAG
->DbgLabelBegin();
1038 SDDbgInfo::DbgLabelIterator DLE
= DAG
->DbgLabelEnd();
1039 // Now emit the rest according to source order.
1041 for (const auto &InstrOrder
: Orders
) {
1042 unsigned Order
= InstrOrder
.first
;
1043 MachineInstr
*MI
= InstrOrder
.second
;
1047 // Insert all SDDbgLabel's whose order(s) are before "Order".
1048 for (; DLI
!= DLE
&&
1049 (*DLI
)->getOrder() >= LastOrder
&& (*DLI
)->getOrder() < Order
;
1051 MachineInstr
*DbgMI
= Emitter
.EmitDbgLabel(*DLI
);
1054 // Insert to start of the BB (after PHIs).
1055 BB
->insert(BBBegin
, DbgMI
);
1057 // Insert at the instruction, which may be in a different
1058 // block, if the block was split by a custom inserter.
1059 MachineBasicBlock::iterator Pos
= MI
;
1060 MI
->getParent()->insert(Pos
, DbgMI
);
1071 InsertPos
= Emitter
.getInsertPos();
1072 // In some cases, DBG_VALUEs might be inserted after the first terminator,
1073 // which results in an invalid MBB. If that happens, move the DBG_VALUEs
1074 // before the first terminator.
1075 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
1076 auto FirstTerm
= InsertBB
->getFirstTerminator();
1077 if (FirstTerm
!= InsertBB
->end()) {
1078 assert(!FirstTerm
->isDebugValue() &&
1079 "first terminator cannot be a debug value");
1080 for (MachineInstr
&MI
: make_early_inc_range(
1081 make_range(std::next(FirstTerm
), InsertBB
->end()))) {
1082 // Only scan up to insertion point.
1083 if (&MI
== InsertPos
)
1086 if (!MI
.isDebugValue())
1089 // The DBG_VALUE was referencing a value produced by a terminator. By
1090 // moving the DBG_VALUE, the referenced value also needs invalidating.
1091 MI
.getOperand(0).ChangeToRegister(0, false);
1092 MI
.moveBefore(&*FirstTerm
);
1098 /// Return the basic block label.
1099 std::string
ScheduleDAGSDNodes::getDAGName() const {
1100 return "sunit-dag." + BB
->getFullName();