1 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements the ScheduleDAG class, which is a base class used by
10 // scheduling implementation classes.
12 //===----------------------------------------------------------------------===//
14 #include "ScheduleDAGSDNodes.h"
15 #include "InstrEmitter.h"
16 #include "SDNodeDbgValue.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/MC/MCInstrItineraries.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Target/TargetMachine.h"
37 #define DEBUG_TYPE "pre-RA-sched"
39 STATISTIC(LoadsClustered
, "Number of loads clustered together");
41 // This allows the latency-based scheduler to notice high latency instructions
42 // without a target itinerary. The choice of number here has more to do with
43 // balancing scheduler heuristics than with the actual machine latency.
44 static cl::opt
<int> HighLatencyCycles(
45 "sched-high-latency-cycles", cl::Hidden
, cl::init(10),
46 cl::desc("Roughly estimate the number of cycles that 'long latency'"
47 "instructions take for targets with no itinerary"));
49 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction
&mf
)
50 : ScheduleDAG(mf
), BB(nullptr), DAG(nullptr),
51 InstrItins(mf
.getSubtarget().getInstrItineraryData()) {}
53 /// Run - perform scheduling.
55 void ScheduleDAGSDNodes::Run(SelectionDAG
*dag
, MachineBasicBlock
*bb
) {
59 // Clear the scheduler's SUnit DAG.
60 ScheduleDAG::clearDAG();
63 // Invoke the target's selection of scheduler.
67 /// NewSUnit - Creates a new SUnit and return a ptr to it.
69 SUnit
*ScheduleDAGSDNodes::newSUnit(SDNode
*N
) {
71 const SUnit
*Addr
= nullptr;
75 SUnits
.emplace_back(N
, (unsigned)SUnits
.size());
76 assert((Addr
== nullptr || Addr
== &SUnits
[0]) &&
77 "SUnits std::vector reallocated on the fly!");
78 SUnits
.back().OrigNode
= &SUnits
.back();
79 SUnit
*SU
= &SUnits
.back();
80 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
82 (N
->isMachineOpcode() &&
83 N
->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
))
84 SU
->SchedulingPref
= Sched::None
;
86 SU
->SchedulingPref
= TLI
.getSchedulingPreference(N
);
90 SUnit
*ScheduleDAGSDNodes::Clone(SUnit
*Old
) {
91 SUnit
*SU
= newSUnit(Old
->getNode());
92 SU
->OrigNode
= Old
->OrigNode
;
93 SU
->Latency
= Old
->Latency
;
94 SU
->isVRegCycle
= Old
->isVRegCycle
;
95 SU
->isCall
= Old
->isCall
;
96 SU
->isCallOp
= Old
->isCallOp
;
97 SU
->isTwoAddress
= Old
->isTwoAddress
;
98 SU
->isCommutable
= Old
->isCommutable
;
99 SU
->hasPhysRegDefs
= Old
->hasPhysRegDefs
;
100 SU
->hasPhysRegClobbers
= Old
->hasPhysRegClobbers
;
101 SU
->isScheduleHigh
= Old
->isScheduleHigh
;
102 SU
->isScheduleLow
= Old
->isScheduleLow
;
103 SU
->SchedulingPref
= Old
->SchedulingPref
;
104 Old
->isCloned
= true;
108 /// CheckForPhysRegDependency - Check if the dependency between def and use of
109 /// a specified operand is a physical register dependency. If so, returns the
110 /// register and the cost of copying the register.
111 static void CheckForPhysRegDependency(SDNode
*Def
, SDNode
*User
, unsigned Op
,
112 const TargetRegisterInfo
*TRI
,
113 const TargetInstrInfo
*TII
,
114 unsigned &PhysReg
, int &Cost
) {
115 if (Op
!= 2 || User
->getOpcode() != ISD::CopyToReg
)
118 unsigned Reg
= cast
<RegisterSDNode
>(User
->getOperand(1))->getReg();
119 if (Register::isVirtualRegister(Reg
))
122 unsigned ResNo
= User
->getOperand(2).getResNo();
123 if (Def
->getOpcode() == ISD::CopyFromReg
&&
124 cast
<RegisterSDNode
>(Def
->getOperand(1))->getReg() == Reg
) {
126 } else if (Def
->isMachineOpcode()) {
127 const MCInstrDesc
&II
= TII
->get(Def
->getMachineOpcode());
128 if (ResNo
>= II
.getNumDefs() && II
.hasImplicitDefOfPhysReg(Reg
))
133 const TargetRegisterClass
*RC
=
134 TRI
->getMinimalPhysRegClass(Reg
, Def
->getSimpleValueType(ResNo
));
135 Cost
= RC
->getCopyCost();
139 // Helper for AddGlue to clone node operands.
140 static void CloneNodeWithValues(SDNode
*N
, SelectionDAG
*DAG
, ArrayRef
<EVT
> VTs
,
141 SDValue ExtraOper
= SDValue()) {
142 SmallVector
<SDValue
, 8> Ops(N
->op_begin(), N
->op_end());
143 if (ExtraOper
.getNode())
144 Ops
.push_back(ExtraOper
);
146 SDVTList VTList
= DAG
->getVTList(VTs
);
147 MachineSDNode
*MN
= dyn_cast
<MachineSDNode
>(N
);
149 // Store memory references.
150 SmallVector
<MachineMemOperand
*, 2> MMOs
;
152 MMOs
.assign(MN
->memoperands_begin(), MN
->memoperands_end());
154 DAG
->MorphNodeTo(N
, N
->getOpcode(), VTList
, Ops
);
156 // Reset the memory references
158 DAG
->setNodeMemRefs(MN
, MMOs
);
161 static bool AddGlue(SDNode
*N
, SDValue Glue
, bool AddGlue
, SelectionDAG
*DAG
) {
162 SDNode
*GlueDestNode
= Glue
.getNode();
164 // Don't add glue from a node to itself.
165 if (GlueDestNode
== N
) return false;
167 // Don't add a glue operand to something that already uses glue.
169 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
172 // Don't add glue to something that already has a glue value.
173 if (N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
) return false;
175 SmallVector
<EVT
, 4> VTs(N
->values());
177 VTs
.push_back(MVT::Glue
);
179 CloneNodeWithValues(N
, DAG
, VTs
, Glue
);
184 // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
185 // node even though simply shrinking the value list is sufficient.
186 static void RemoveUnusedGlue(SDNode
*N
, SelectionDAG
*DAG
) {
187 assert((N
->getValueType(N
->getNumValues() - 1) == MVT::Glue
&&
188 !N
->hasAnyUseOfValue(N
->getNumValues() - 1)) &&
189 "expected an unused glue value");
191 CloneNodeWithValues(N
, DAG
,
192 makeArrayRef(N
->value_begin(), N
->getNumValues() - 1));
195 /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
196 /// This function finds loads of the same base and different offsets. If the
197 /// offsets are not far apart (target specific), it add MVT::Glue inputs and
198 /// outputs to ensure they are scheduled together and in order. This
199 /// optimization may benefit some targets by improving cache locality.
200 void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode
*Node
) {
202 unsigned NumOps
= Node
->getNumOperands();
203 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Other
)
204 Chain
= Node
->getOperand(NumOps
-1);
208 // Skip any load instruction that has a tied input. There may be an additional
209 // dependency requiring a different order than by increasing offsets, and the
210 // added glue may introduce a cycle.
211 auto hasTiedInput
= [this](const SDNode
*N
) {
212 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
213 for (unsigned I
= 0; I
!= MCID
.getNumOperands(); ++I
) {
214 if (MCID
.getOperandConstraint(I
, MCOI::TIED_TO
) != -1)
221 // Look for other loads of the same chain. Find loads that are loading from
222 // the same base pointer and different offsets.
223 SmallPtrSet
<SDNode
*, 16> Visited
;
224 SmallVector
<int64_t, 4> Offsets
;
225 DenseMap
<long long, SDNode
*> O2SMap
; // Map from offset to SDNode.
226 bool Cluster
= false;
229 if (hasTiedInput(Base
))
232 // This algorithm requires a reasonably low use count before finding a match
233 // to avoid uselessly blowing up compile time in large blocks.
234 unsigned UseCount
= 0;
235 for (SDNode::use_iterator I
= Chain
->use_begin(), E
= Chain
->use_end();
236 I
!= E
&& UseCount
< 100; ++I
, ++UseCount
) {
237 if (I
.getUse().getResNo() != Chain
.getResNo())
241 if (User
== Node
|| !Visited
.insert(User
).second
)
243 int64_t Offset1
, Offset2
;
244 if (!TII
->areLoadsFromSameBasePtr(Base
, User
, Offset1
, Offset2
) ||
245 Offset1
== Offset2
||
246 hasTiedInput(User
)) {
247 // FIXME: Should be ok if they addresses are identical. But earlier
248 // optimizations really should have eliminated one of the loads.
251 if (O2SMap
.insert(std::make_pair(Offset1
, Base
)).second
)
252 Offsets
.push_back(Offset1
);
253 O2SMap
.insert(std::make_pair(Offset2
, User
));
254 Offsets
.push_back(Offset2
);
255 if (Offset2
< Offset1
)
258 // Reset UseCount to allow more matches.
265 // Sort them in increasing order.
268 // Check if the loads are close enough.
269 SmallVector
<SDNode
*, 4> Loads
;
270 unsigned NumLoads
= 0;
271 int64_t BaseOff
= Offsets
[0];
272 SDNode
*BaseLoad
= O2SMap
[BaseOff
];
273 Loads
.push_back(BaseLoad
);
274 for (unsigned i
= 1, e
= Offsets
.size(); i
!= e
; ++i
) {
275 int64_t Offset
= Offsets
[i
];
276 SDNode
*Load
= O2SMap
[Offset
];
277 if (!TII
->shouldScheduleLoadsNear(BaseLoad
, Load
, BaseOff
, Offset
,NumLoads
))
278 break; // Stop right here. Ignore loads that are further away.
279 Loads
.push_back(Load
);
286 // Cluster loads by adding MVT::Glue outputs and inputs. This also
287 // ensure they are scheduled in order of increasing addresses.
288 SDNode
*Lead
= Loads
[0];
290 if (AddGlue(Lead
, InGlue
, true, DAG
))
291 InGlue
= SDValue(Lead
, Lead
->getNumValues() - 1);
292 for (unsigned I
= 1, E
= Loads
.size(); I
!= E
; ++I
) {
293 bool OutGlue
= I
< E
- 1;
294 SDNode
*Load
= Loads
[I
];
296 // If AddGlue fails, we could leave an unsused glue value. This should not
298 if (AddGlue(Load
, InGlue
, OutGlue
, DAG
)) {
300 InGlue
= SDValue(Load
, Load
->getNumValues() - 1);
304 else if (!OutGlue
&& InGlue
.getNode())
305 RemoveUnusedGlue(InGlue
.getNode(), DAG
);
309 /// ClusterNodes - Cluster certain nodes which should be scheduled together.
311 void ScheduleDAGSDNodes::ClusterNodes() {
312 for (SDNode
&NI
: DAG
->allnodes()) {
314 if (!Node
|| !Node
->isMachineOpcode())
317 unsigned Opc
= Node
->getMachineOpcode();
318 const MCInstrDesc
&MCID
= TII
->get(Opc
);
320 // Cluster loads from "near" addresses into combined SUnits.
321 ClusterNeighboringLoads(Node
);
325 void ScheduleDAGSDNodes::BuildSchedUnits() {
326 // During scheduling, the NodeId field of SDNode is used to map SDNodes
327 // to their associated SUnits by holding SUnits table indices. A value
328 // of -1 means the SDNode does not yet have an associated SUnit.
329 unsigned NumNodes
= 0;
330 for (SDNode
&NI
: DAG
->allnodes()) {
335 // Reserve entries in the vector for each of the SUnits we are creating. This
336 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
338 // FIXME: Multiply by 2 because we may clone nodes during scheduling.
339 // This is a temporary workaround.
340 SUnits
.reserve(NumNodes
* 2);
342 // Add all nodes in depth first order.
343 SmallVector
<SDNode
*, 64> Worklist
;
344 SmallPtrSet
<SDNode
*, 32> Visited
;
345 Worklist
.push_back(DAG
->getRoot().getNode());
346 Visited
.insert(DAG
->getRoot().getNode());
348 SmallVector
<SUnit
*, 8> CallSUnits
;
349 while (!Worklist
.empty()) {
350 SDNode
*NI
= Worklist
.pop_back_val();
352 // Add all operands to the worklist unless they've already been added.
353 for (const SDValue
&Op
: NI
->op_values())
354 if (Visited
.insert(Op
.getNode()).second
)
355 Worklist
.push_back(Op
.getNode());
357 if (isPassiveNode(NI
)) // Leaf node, e.g. a TargetImmediate.
360 // If this node has already been processed, stop now.
361 if (NI
->getNodeId() != -1) continue;
363 SUnit
*NodeSUnit
= newSUnit(NI
);
365 // See if anything is glued to this node, if so, add them to glued
366 // nodes. Nodes can have at most one glue input and one glue output. Glue
367 // is required to be the last operand and result of a node.
369 // Scan up to find glued preds.
371 while (N
->getNumOperands() &&
372 N
->getOperand(N
->getNumOperands()-1).getValueType() == MVT::Glue
) {
373 N
= N
->getOperand(N
->getNumOperands()-1).getNode();
374 assert(N
->getNodeId() == -1 && "Node already inserted!");
375 N
->setNodeId(NodeSUnit
->NodeNum
);
376 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
377 NodeSUnit
->isCall
= true;
380 // Scan down to find any glued succs.
382 while (N
->getValueType(N
->getNumValues()-1) == MVT::Glue
) {
383 SDValue
GlueVal(N
, N
->getNumValues()-1);
385 // There are either zero or one users of the Glue result.
386 bool HasGlueUse
= false;
387 for (SDNode
*U
: N
->uses())
388 if (GlueVal
.isOperandOf(U
)) {
390 assert(N
->getNodeId() == -1 && "Node already inserted!");
391 N
->setNodeId(NodeSUnit
->NodeNum
);
393 if (N
->isMachineOpcode() && TII
->get(N
->getMachineOpcode()).isCall())
394 NodeSUnit
->isCall
= true;
397 if (!HasGlueUse
) break;
400 if (NodeSUnit
->isCall
)
401 CallSUnits
.push_back(NodeSUnit
);
403 // Schedule zero-latency TokenFactor below any nodes that may increase the
404 // schedule height. Otherwise, ancestors of the TokenFactor may appear to
405 // have false stalls.
406 if (NI
->getOpcode() == ISD::TokenFactor
)
407 NodeSUnit
->isScheduleLow
= true;
409 // If there are glue operands involved, N is now the bottom-most node
410 // of the sequence of nodes that are glued together.
412 NodeSUnit
->setNode(N
);
413 assert(N
->getNodeId() == -1 && "Node already inserted!");
414 N
->setNodeId(NodeSUnit
->NodeNum
);
416 // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
417 InitNumRegDefsLeft(NodeSUnit
);
419 // Assign the Latency field of NodeSUnit using target-provided information.
420 computeLatency(NodeSUnit
);
423 // Find all call operands.
424 while (!CallSUnits
.empty()) {
425 SUnit
*SU
= CallSUnits
.pop_back_val();
426 for (const SDNode
*SUNode
= SU
->getNode(); SUNode
;
427 SUNode
= SUNode
->getGluedNode()) {
428 if (SUNode
->getOpcode() != ISD::CopyToReg
)
430 SDNode
*SrcN
= SUNode
->getOperand(2).getNode();
431 if (isPassiveNode(SrcN
)) continue; // Not scheduled.
432 SUnit
*SrcSU
= &SUnits
[SrcN
->getNodeId()];
433 SrcSU
->isCallOp
= true;
438 void ScheduleDAGSDNodes::AddSchedEdges() {
439 const TargetSubtargetInfo
&ST
= MF
.getSubtarget();
441 // Check to see if the scheduler cares about latencies.
442 bool UnitLatencies
= forceUnitLatencies();
444 // Pass 2: add the preds, succs, etc.
445 for (SUnit
&SU
: SUnits
) {
446 SDNode
*MainNode
= SU
.getNode();
448 if (MainNode
->isMachineOpcode()) {
449 unsigned Opc
= MainNode
->getMachineOpcode();
450 const MCInstrDesc
&MCID
= TII
->get(Opc
);
451 for (unsigned i
= 0; i
!= MCID
.getNumOperands(); ++i
) {
452 if (MCID
.getOperandConstraint(i
, MCOI::TIED_TO
) != -1) {
453 SU
.isTwoAddress
= true;
457 if (MCID
.isCommutable())
458 SU
.isCommutable
= true;
461 // Find all predecessors and successors of the group.
462 for (SDNode
*N
= SU
.getNode(); N
; N
= N
->getGluedNode()) {
463 if (N
->isMachineOpcode() &&
464 TII
->get(N
->getMachineOpcode()).getImplicitDefs()) {
465 SU
.hasPhysRegClobbers
= true;
466 unsigned NumUsed
= InstrEmitter::CountResults(N
);
467 while (NumUsed
!= 0 && !N
->hasAnyUseOfValue(NumUsed
- 1))
468 --NumUsed
; // Skip over unused values at the end.
469 if (NumUsed
> TII
->get(N
->getMachineOpcode()).getNumDefs())
470 SU
.hasPhysRegDefs
= true;
473 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
474 SDNode
*OpN
= N
->getOperand(i
).getNode();
475 unsigned DefIdx
= N
->getOperand(i
).getResNo();
476 if (isPassiveNode(OpN
)) continue; // Not scheduled.
477 SUnit
*OpSU
= &SUnits
[OpN
->getNodeId()];
478 assert(OpSU
&& "Node has no SUnit!");
480 continue; // In the same group.
482 EVT OpVT
= N
->getOperand(i
).getValueType();
483 assert(OpVT
!= MVT::Glue
&& "Glued nodes should be in same sunit!");
484 bool isChain
= OpVT
== MVT::Other
;
486 unsigned PhysReg
= 0;
488 // Determine if this is a physical register dependency.
489 CheckForPhysRegDependency(OpN
, N
, i
, TRI
, TII
, PhysReg
, Cost
);
490 assert((PhysReg
== 0 || !isChain
) &&
491 "Chain dependence via physreg data?");
492 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
493 // emits a copy from the physical register to a virtual register unless
494 // it requires a cross class copy (cost < 0). That means we are only
495 // treating "expensive to copy" register dependency as physical register
496 // dependency. This may change in the future though.
497 if (Cost
>= 0 && !StressSched
)
500 // If this is a ctrl dep, latency is 1.
501 unsigned OpLatency
= isChain
? 1 : OpSU
->Latency
;
502 // Special-case TokenFactor chains as zero-latency.
503 if(isChain
&& OpN
->getOpcode() == ISD::TokenFactor
)
506 SDep Dep
= isChain
? SDep(OpSU
, SDep::Barrier
)
507 : SDep(OpSU
, SDep::Data
, PhysReg
);
508 Dep
.setLatency(OpLatency
);
509 if (!isChain
&& !UnitLatencies
) {
510 computeOperandLatency(OpN
, N
, i
, Dep
);
511 ST
.adjustSchedDependency(OpSU
, DefIdx
, &SU
, i
, Dep
);
514 if (!SU
.addPred(Dep
) && !Dep
.isCtrl() && OpSU
->NumRegDefsLeft
> 1) {
515 // Multiple register uses are combined in the same SUnit. For example,
516 // we could have a set of glued nodes with all their defs consumed by
517 // another set of glued nodes. Register pressure tracking sees this as
518 // a single use, so to keep pressure balanced we reduce the defs.
520 // We can't tell (without more book-keeping) if this results from
521 // glued nodes or duplicate operands. As long as we don't reduce
522 // NumRegDefsLeft to zero, we handle the common cases well.
523 --OpSU
->NumRegDefsLeft
;
530 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
531 /// are input. This SUnit graph is similar to the SelectionDAG, but
532 /// excludes nodes that aren't interesting to scheduling, and represents
533 /// glued together nodes with a single SUnit.
534 void ScheduleDAGSDNodes::BuildSchedGraph(AAResults
*AA
) {
535 // Cluster certain nodes which should be scheduled together.
537 // Populate the SUnits array.
539 // Compute all the scheduling dependencies between nodes.
543 // Initialize NumNodeDefs for the current Node's opcode.
544 void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
545 // Check for phys reg copy.
549 if (!Node
->isMachineOpcode()) {
550 if (Node
->getOpcode() == ISD::CopyFromReg
)
556 unsigned POpc
= Node
->getMachineOpcode();
557 if (POpc
== TargetOpcode::IMPLICIT_DEF
) {
558 // No register need be allocated for this.
562 if (POpc
== TargetOpcode::PATCHPOINT
&&
563 Node
->getValueType(0) == MVT::Other
) {
564 // PATCHPOINT is defined to have one result, but it might really have none
565 // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
570 unsigned NRegDefs
= SchedDAG
->TII
->get(Node
->getMachineOpcode()).getNumDefs();
571 // Some instructions define regs that are not represented in the selection DAG
572 // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
573 NodeNumDefs
= std::min(Node
->getNumValues(), NRegDefs
);
577 // Construct a RegDefIter for this SUnit and find the first valid value.
578 ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit
*SU
,
579 const ScheduleDAGSDNodes
*SD
)
580 : SchedDAG(SD
), Node(SU
->getNode()), DefIdx(0), NodeNumDefs(0) {
585 // Advance to the next valid value defined by the SUnit.
586 void ScheduleDAGSDNodes::RegDefIter::Advance() {
587 for (;Node
;) { // Visit all glued nodes.
588 for (;DefIdx
< NodeNumDefs
; ++DefIdx
) {
589 if (!Node
->hasAnyUseOfValue(DefIdx
))
591 ValueType
= Node
->getSimpleValueType(DefIdx
);
593 return; // Found a normal regdef.
595 Node
= Node
->getGluedNode();
597 return; // No values left to visit.
603 void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit
*SU
) {
604 assert(SU
->NumRegDefsLeft
== 0 && "expect a new node");
605 for (RegDefIter
I(SU
, this); I
.IsValid(); I
.Advance()) {
606 assert(SU
->NumRegDefsLeft
< USHRT_MAX
&& "overflow is ok but unexpected");
607 ++SU
->NumRegDefsLeft
;
611 void ScheduleDAGSDNodes::computeLatency(SUnit
*SU
) {
612 SDNode
*N
= SU
->getNode();
614 // TokenFactor operands are considered zero latency, and some schedulers
615 // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
616 // whenever node latency is nonzero.
617 if (N
&& N
->getOpcode() == ISD::TokenFactor
) {
622 // Check to see if the scheduler cares about latencies.
623 if (forceUnitLatencies()) {
628 if (!InstrItins
|| InstrItins
->isEmpty()) {
629 if (N
&& N
->isMachineOpcode() &&
630 TII
->isHighLatencyDef(N
->getMachineOpcode()))
631 SU
->Latency
= HighLatencyCycles
;
637 // Compute the latency for the node. We use the sum of the latencies for
638 // all nodes glued together into this SUnit.
640 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode())
641 if (N
->isMachineOpcode())
642 SU
->Latency
+= TII
->getInstrLatency(InstrItins
, N
);
645 void ScheduleDAGSDNodes::computeOperandLatency(SDNode
*Def
, SDNode
*Use
,
646 unsigned OpIdx
, SDep
& dep
) const{
647 // Check to see if the scheduler cares about latencies.
648 if (forceUnitLatencies())
651 if (dep
.getKind() != SDep::Data
)
654 unsigned DefIdx
= Use
->getOperand(OpIdx
).getResNo();
655 if (Use
->isMachineOpcode())
656 // Adjust the use operand index by num of defs.
657 OpIdx
+= TII
->get(Use
->getMachineOpcode()).getNumDefs();
658 int Latency
= TII
->getOperandLatency(InstrItins
, Def
, DefIdx
, Use
, OpIdx
);
659 if (Latency
> 1 && Use
->getOpcode() == ISD::CopyToReg
&&
661 unsigned Reg
= cast
<RegisterSDNode
>(Use
->getOperand(1))->getReg();
662 if (Register::isVirtualRegister(Reg
))
663 // This copy is a liveout value. It is likely coalesced, so reduce the
664 // latency so not to penalize the def.
665 // FIXME: need target specific adjustment here?
666 Latency
= (Latency
> 1) ? Latency
- 1 : 1;
669 dep
.setLatency(Latency
);
672 void ScheduleDAGSDNodes::dumpNode(const SUnit
&SU
) const {
673 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
678 dbgs() << "PHYS REG COPY\n";
682 SU
.getNode()->dump(DAG
);
684 SmallVector
<SDNode
*, 4> GluedNodes
;
685 for (SDNode
*N
= SU
.getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
686 GluedNodes
.push_back(N
);
687 while (!GluedNodes
.empty()) {
689 GluedNodes
.back()->dump(DAG
);
691 GluedNodes
.pop_back();
696 void ScheduleDAGSDNodes::dump() const {
697 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
698 if (EntrySU
.getNode() != nullptr)
699 dumpNodeAll(EntrySU
);
700 for (const SUnit
&SU
: SUnits
)
702 if (ExitSU
.getNode() != nullptr)
707 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
708 void ScheduleDAGSDNodes::dumpSchedule() const {
709 for (const SUnit
*SU
: Sequence
) {
713 dbgs() << "**** NOOP ****\n";
719 /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
720 /// their state is consistent with the nodes listed in Sequence.
722 void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp
) {
723 unsigned ScheduledNodes
= ScheduleDAG::VerifyScheduledDAG(isBottomUp
);
724 unsigned Noops
= llvm::count(Sequence
, nullptr);
725 assert(Sequence
.size() - Noops
== ScheduledNodes
&&
726 "The number of nodes scheduled doesn't match the expected number!");
730 /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
732 ProcessSDDbgValues(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
733 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*> > &Orders
,
734 DenseMap
<SDValue
, Register
> &VRBaseMap
, unsigned Order
) {
735 if (!N
->getHasDebugValue())
738 /// Returns true if \p DV has any VReg operand locations which don't exist in
740 auto HasUnknownVReg
= [&VRBaseMap
](SDDbgValue
*DV
) {
741 for (const SDDbgOperand
&L
: DV
->getLocationOps()) {
742 if (L
.getKind() == SDDbgOperand::SDNODE
&&
743 VRBaseMap
.count({L
.getSDNode(), L
.getResNo()}) == 0)
749 // Opportunistically insert immediate dbg_value uses, i.e. those with the same
750 // source order number as N.
751 MachineBasicBlock
*BB
= Emitter
.getBlock();
752 MachineBasicBlock::iterator InsertPos
= Emitter
.getInsertPos();
753 for (auto DV
: DAG
->GetDbgValues(N
)) {
756 unsigned DVOrder
= DV
->getOrder();
757 if (Order
!= 0 && DVOrder
!= Order
)
759 // If DV has any VReg location operands which haven't been mapped then
760 // either that node is no longer available or we just haven't visited the
761 // node yet. In the former case we should emit an undef dbg_value, but we
762 // can do it later. And for the latter we'll want to wait until all
763 // dependent nodes have been visited.
764 if (!DV
->isInvalidated() && HasUnknownVReg(DV
))
766 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(DV
, VRBaseMap
);
769 Orders
.push_back({DVOrder
, DbgMI
});
770 BB
->insert(InsertPos
, DbgMI
);
774 // ProcessSourceNode - Process nodes with source order numbers. These are added
775 // to a vector which EmitSchedule uses to determine how to insert dbg_value
776 // instructions in the right order.
778 ProcessSourceNode(SDNode
*N
, SelectionDAG
*DAG
, InstrEmitter
&Emitter
,
779 DenseMap
<SDValue
, Register
> &VRBaseMap
,
780 SmallVectorImpl
<std::pair
<unsigned, MachineInstr
*>> &Orders
,
781 SmallSet
<Register
, 8> &Seen
, MachineInstr
*NewInsn
) {
782 unsigned Order
= N
->getIROrder();
783 if (!Order
|| Seen
.count(Order
)) {
784 // Process any valid SDDbgValues even if node does not have any order
786 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, 0);
790 // If a new instruction was generated for this Order number, record it.
791 // Otherwise, leave this order number unseen: we will either find later
792 // instructions for it, or leave it unseen if there were no instructions at
796 Orders
.push_back({Order
, NewInsn
});
799 // Even if no instruction was generated, a Value may have become defined via
800 // earlier nodes. Try to process them now.
801 ProcessSDDbgValues(N
, DAG
, Emitter
, Orders
, VRBaseMap
, Order
);
804 void ScheduleDAGSDNodes::
805 EmitPhysRegCopy(SUnit
*SU
, DenseMap
<SUnit
*, Register
> &VRBaseMap
,
806 MachineBasicBlock::iterator InsertPos
) {
807 for (const SDep
&Pred
: SU
->Preds
) {
809 continue; // ignore chain preds
810 if (Pred
.getSUnit()->CopyDstRC
) {
811 // Copy to physical register.
812 DenseMap
<SUnit
*, Register
>::iterator VRI
=
813 VRBaseMap
.find(Pred
.getSUnit());
814 assert(VRI
!= VRBaseMap
.end() && "Node emitted out of order - late");
815 // Find the destination physical register.
817 for (const SDep
&Succ
: SU
->Succs
) {
819 continue; // ignore chain preds
825 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), Reg
)
826 .addReg(VRI
->second
);
828 // Copy from physical register.
829 assert(Pred
.getReg() && "Unknown physical register!");
830 Register VRBase
= MRI
.createVirtualRegister(SU
->CopyDstRC
);
831 bool isNew
= VRBaseMap
.insert(std::make_pair(SU
, VRBase
)).second
;
832 (void)isNew
; // Silence compiler warning.
833 assert(isNew
&& "Node emitted out of order - early");
834 BuildMI(*BB
, InsertPos
, DebugLoc(), TII
->get(TargetOpcode::COPY
), VRBase
)
835 .addReg(Pred
.getReg());
841 /// EmitSchedule - Emit the machine code in scheduled order. Return the new
842 /// InsertPos and MachineBasicBlock that contains this insertion
843 /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
844 /// not necessarily refer to returned BB. The emitter may split blocks.
845 MachineBasicBlock
*ScheduleDAGSDNodes::
846 EmitSchedule(MachineBasicBlock::iterator
&InsertPos
) {
847 InstrEmitter
Emitter(DAG
->getTarget(), BB
, InsertPos
);
848 DenseMap
<SDValue
, Register
> VRBaseMap
;
849 DenseMap
<SUnit
*, Register
> CopyVRBaseMap
;
850 SmallVector
<std::pair
<unsigned, MachineInstr
*>, 32> Orders
;
851 SmallSet
<Register
, 8> Seen
;
852 bool HasDbg
= DAG
->hasDebugValues();
854 // Emit a node, and determine where its first instruction is for debuginfo.
855 // Zero, one, or multiple instructions can be created when emitting a node.
857 [&](SDNode
*Node
, bool IsClone
, bool IsCloned
,
858 DenseMap
<SDValue
, Register
> &VRBaseMap
) -> MachineInstr
* {
859 // Fetch instruction prior to this, or end() if nonexistant.
860 auto GetPrevInsn
= [&](MachineBasicBlock::iterator I
) {
861 if (I
== BB
->begin())
864 return std::prev(Emitter
.getInsertPos());
867 MachineBasicBlock::iterator Before
= GetPrevInsn(Emitter
.getInsertPos());
868 Emitter
.EmitNode(Node
, IsClone
, IsCloned
, VRBaseMap
);
869 MachineBasicBlock::iterator After
= GetPrevInsn(Emitter
.getInsertPos());
871 // If the iterator did not change, no instructions were inserted.
876 if (Before
== BB
->end()) {
877 // There were no prior instructions; the new ones must start at the
878 // beginning of the block.
879 MI
= &Emitter
.getBlock()->instr_front();
881 // Return first instruction after the pre-existing instructions.
882 MI
= &*std::next(Before
);
885 if (MI
->isCandidateForCallSiteEntry() &&
886 DAG
->getTarget().Options
.EmitCallSiteInfo
)
887 MF
.addCallArgsForwardingRegs(MI
, DAG
->getSDCallSiteInfo(Node
));
889 if (DAG
->getNoMergeSiteInfo(Node
)) {
890 MI
->setFlag(MachineInstr::MIFlag::NoMerge
);
896 // If this is the first BB, emit byval parameter dbg_value's.
897 if (HasDbg
&& BB
->getParent()->begin() == MachineFunction::iterator(BB
)) {
898 SDDbgInfo::DbgIterator PDI
= DAG
->ByvalParmDbgBegin();
899 SDDbgInfo::DbgIterator PDE
= DAG
->ByvalParmDbgEnd();
900 for (; PDI
!= PDE
; ++PDI
) {
901 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*PDI
, VRBaseMap
);
903 BB
->insert(InsertPos
, DbgMI
);
904 // We re-emit the dbg_value closer to its use, too, after instructions
905 // are emitted to the BB.
906 (*PDI
)->clearIsEmitted();
911 for (SUnit
*SU
: Sequence
) {
913 // Null SUnit* is a noop.
914 TII
->insertNoop(*Emitter
.getBlock(), InsertPos
);
918 // For pre-regalloc scheduling, create instructions corresponding to the
919 // SDNode and any glued SDNodes and append them to the block.
920 if (!SU
->getNode()) {
922 EmitPhysRegCopy(SU
, CopyVRBaseMap
, InsertPos
);
926 SmallVector
<SDNode
*, 4> GluedNodes
;
927 for (SDNode
*N
= SU
->getNode()->getGluedNode(); N
; N
= N
->getGluedNode())
928 GluedNodes
.push_back(N
);
929 while (!GluedNodes
.empty()) {
930 SDNode
*N
= GluedNodes
.back();
931 auto NewInsn
= EmitNode(N
, SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
932 // Remember the source order of the inserted instruction.
934 ProcessSourceNode(N
, DAG
, Emitter
, VRBaseMap
, Orders
, Seen
, NewInsn
);
936 if (MDNode
*MD
= DAG
->getHeapAllocSite(N
))
937 if (NewInsn
&& NewInsn
->isCall())
938 NewInsn
->setHeapAllocMarker(MF
, MD
);
940 GluedNodes
.pop_back();
943 EmitNode(SU
->getNode(), SU
->OrigNode
!= SU
, SU
->isCloned
, VRBaseMap
);
944 // Remember the source order of the inserted instruction.
946 ProcessSourceNode(SU
->getNode(), DAG
, Emitter
, VRBaseMap
, Orders
, Seen
,
949 if (MDNode
*MD
= DAG
->getHeapAllocSite(SU
->getNode())) {
950 if (NewInsn
&& NewInsn
->isCall())
951 NewInsn
->setHeapAllocMarker(MF
, MD
);
955 // Insert all the dbg_values which have not already been inserted in source
958 MachineBasicBlock::iterator BBBegin
= BB
->getFirstNonPHI();
960 // Sort the source order instructions and use the order to insert debug
961 // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
962 // regardless of the host's implementation fo std::sort.
963 llvm::stable_sort(Orders
, less_first());
964 std::stable_sort(DAG
->DbgBegin(), DAG
->DbgEnd(),
965 [](const SDDbgValue
*LHS
, const SDDbgValue
*RHS
) {
966 return LHS
->getOrder() < RHS
->getOrder();
969 SDDbgInfo::DbgIterator DI
= DAG
->DbgBegin();
970 SDDbgInfo::DbgIterator DE
= DAG
->DbgEnd();
971 // Now emit the rest according to source order.
972 unsigned LastOrder
= 0;
973 for (unsigned i
= 0, e
= Orders
.size(); i
!= e
&& DI
!= DE
; ++i
) {
974 unsigned Order
= Orders
[i
].first
;
975 MachineInstr
*MI
= Orders
[i
].second
;
976 // Insert all SDDbgValue's whose order(s) are before "Order".
978 for (; DI
!= DE
; ++DI
) {
979 if ((*DI
)->getOrder() < LastOrder
|| (*DI
)->getOrder() >= Order
)
981 if ((*DI
)->isEmitted())
984 MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
);
987 // Insert to start of the BB (after PHIs).
988 BB
->insert(BBBegin
, DbgMI
);
990 // Insert at the instruction, which may be in a different
991 // block, if the block was split by a custom inserter.
992 MachineBasicBlock::iterator Pos
= MI
;
993 MI
->getParent()->insert(Pos
, DbgMI
);
999 // Add trailing DbgValue's before the terminator. FIXME: May want to add
1000 // some of them before one or more conditional branches?
1001 SmallVector
<MachineInstr
*, 8> DbgMIs
;
1002 for (; DI
!= DE
; ++DI
) {
1003 if ((*DI
)->isEmitted())
1005 assert((*DI
)->getOrder() >= LastOrder
&&
1006 "emitting DBG_VALUE out of order");
1007 if (MachineInstr
*DbgMI
= Emitter
.EmitDbgValue(*DI
, VRBaseMap
))
1008 DbgMIs
.push_back(DbgMI
);
1011 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
1012 MachineBasicBlock::iterator Pos
= InsertBB
->getFirstTerminator();
1013 InsertBB
->insert(Pos
, DbgMIs
.begin(), DbgMIs
.end());
1015 SDDbgInfo::DbgLabelIterator DLI
= DAG
->DbgLabelBegin();
1016 SDDbgInfo::DbgLabelIterator DLE
= DAG
->DbgLabelEnd();
1017 // Now emit the rest according to source order.
1019 for (const auto &InstrOrder
: Orders
) {
1020 unsigned Order
= InstrOrder
.first
;
1021 MachineInstr
*MI
= InstrOrder
.second
;
1025 // Insert all SDDbgLabel's whose order(s) are before "Order".
1026 for (; DLI
!= DLE
&&
1027 (*DLI
)->getOrder() >= LastOrder
&& (*DLI
)->getOrder() < Order
;
1029 MachineInstr
*DbgMI
= Emitter
.EmitDbgLabel(*DLI
);
1032 // Insert to start of the BB (after PHIs).
1033 BB
->insert(BBBegin
, DbgMI
);
1035 // Insert at the instruction, which may be in a different
1036 // block, if the block was split by a custom inserter.
1037 MachineBasicBlock::iterator Pos
= MI
;
1038 MI
->getParent()->insert(Pos
, DbgMI
);
1049 InsertPos
= Emitter
.getInsertPos();
1050 // In some cases, DBG_VALUEs might be inserted after the first terminator,
1051 // which results in an invalid MBB. If that happens, move the DBG_VALUEs
1052 // before the first terminator.
1053 MachineBasicBlock
*InsertBB
= Emitter
.getBlock();
1054 auto FirstTerm
= InsertBB
->getFirstTerminator();
1055 if (FirstTerm
!= InsertBB
->end()) {
1056 assert(!FirstTerm
->isDebugValue() &&
1057 "first terminator cannot be a debug value");
1058 for (MachineInstr
&MI
: make_early_inc_range(
1059 make_range(std::next(FirstTerm
), InsertBB
->end()))) {
1060 if (!MI
.isDebugValue())
1063 if (&MI
== InsertPos
)
1064 InsertPos
= std::prev(InsertPos
->getIterator());
1066 // The DBG_VALUE was referencing a value produced by a terminator. By
1067 // moving the DBG_VALUE, the referenced value also needs invalidating.
1068 MI
.getOperand(0).ChangeToRegister(0, false);
1069 MI
.moveBefore(&*FirstTerm
);
1075 /// Return the basic block label.
1076 std::string
ScheduleDAGSDNodes::getDAGName() const {
1077 return "sunit-dag." + BB
->getFullName();