1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
30 //===----------------------------------------------------------------------===//
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetVector.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
42 #include "llvm/ADT/iterator_range.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/MemoryLocation.h"
45 #include "llvm/Analysis/ValueTracking.h"
46 #include "llvm/CodeGen/DFAPacketizer.h"
47 #include "llvm/CodeGen/LiveIntervals.h"
48 #include "llvm/CodeGen/MachineBasicBlock.h"
49 #include "llvm/CodeGen/MachineDominators.h"
50 #include "llvm/CodeGen/MachineFunction.h"
51 #include "llvm/CodeGen/MachineFunctionPass.h"
52 #include "llvm/CodeGen/MachineInstr.h"
53 #include "llvm/CodeGen/MachineInstrBuilder.h"
54 #include "llvm/CodeGen/MachineLoopInfo.h"
55 #include "llvm/CodeGen/MachineMemOperand.h"
56 #include "llvm/CodeGen/MachineOperand.h"
57 #include "llvm/CodeGen/MachinePipeliner.h"
58 #include "llvm/CodeGen/MachineRegisterInfo.h"
59 #include "llvm/CodeGen/RegisterPressure.h"
60 #include "llvm/CodeGen/ScheduleDAG.h"
61 #include "llvm/CodeGen/ScheduleDAGMutation.h"
62 #include "llvm/CodeGen/TargetOpcodes.h"
63 #include "llvm/CodeGen/TargetRegisterInfo.h"
64 #include "llvm/CodeGen/TargetSubtargetInfo.h"
65 #include "llvm/Config/llvm-config.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/DebugLoc.h"
68 #include "llvm/IR/Function.h"
69 #include "llvm/MC/LaneBitmask.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 #include "llvm/MC/MCInstrItineraries.h"
72 #include "llvm/MC/MCRegisterInfo.h"
73 #include "llvm/Pass.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
94 #define DEBUG_TYPE "pipeliner"
96 STATISTIC(NumTrytoPipeline
, "Number of loops that we attempt to pipeline");
97 STATISTIC(NumPipelined
, "Number of loops software pipelined");
98 STATISTIC(NumNodeOrderIssues
, "Number of node order issues found");
99 STATISTIC(NumFailBranch
, "Pipeliner abort due to unknown branch");
100 STATISTIC(NumFailLoop
, "Pipeliner abort due to unsupported loop");
101 STATISTIC(NumFailPreheader
, "Pipeliner abort due to missing preheader");
102 STATISTIC(NumFailLargeMaxMII
, "Pipeliner abort due to MaxMII too large");
103 STATISTIC(NumFailZeroMII
, "Pipeliner abort due to zero MII");
104 STATISTIC(NumFailNoSchedule
, "Pipeliner abort due to no schedule found");
105 STATISTIC(NumFailZeroStage
, "Pipeliner abort due to zero stage");
106 STATISTIC(NumFailLargeMaxStage
, "Pipeliner abort due to too many stages");
108 /// A command line option to turn software pipelining on or off.
109 static cl::opt
<bool> EnableSWP("enable-pipeliner", cl::Hidden
, cl::init(true),
111 cl::desc("Enable Software Pipelining"));
113 /// A command line option to enable SWP at -Os.
114 static cl::opt
<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
115 cl::desc("Enable SWP at Os."), cl::Hidden
,
118 /// A command line argument to limit minimum initial interval for pipelining.
119 static cl::opt
<int> SwpMaxMii("pipeliner-max-mii",
120 cl::desc("Size limit for the MII."),
121 cl::Hidden
, cl::init(27));
123 /// A command line argument to limit the number of stages in the pipeline.
125 SwpMaxStages("pipeliner-max-stages",
126 cl::desc("Maximum stages allowed in the generated scheduled."),
127 cl::Hidden
, cl::init(3));
129 /// A command line option to disable the pruning of chain dependences due to
130 /// an unrelated Phi.
132 SwpPruneDeps("pipeliner-prune-deps",
133 cl::desc("Prune dependences between unrelated Phi nodes."),
134 cl::Hidden
, cl::init(true));
136 /// A command line option to disable the pruning of loop carried order
139 SwpPruneLoopCarried("pipeliner-prune-loop-carried",
140 cl::desc("Prune loop carried order dependences."),
141 cl::Hidden
, cl::init(true));
144 static cl::opt
<int> SwpLoopLimit("pipeliner-max", cl::Hidden
, cl::init(-1));
147 static cl::opt
<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
148 cl::ReallyHidden
, cl::init(false),
149 cl::ZeroOrMore
, cl::desc("Ignore RecMII"));
151 static cl::opt
<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden
,
153 static cl::opt
<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden
,
158 // A command line option to enable the CopyToPhi DAG mutation.
160 SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden
,
161 cl::init(true), cl::ZeroOrMore
,
162 cl::desc("Enable CopyToPhi DAG Mutation"));
164 } // end namespace llvm
166 unsigned SwingSchedulerDAG::Circuits::MaxPaths
= 5;
167 char MachinePipeliner::ID
= 0;
169 int MachinePipeliner::NumTries
= 0;
171 char &llvm::MachinePipelinerID
= MachinePipeliner::ID
;
173 INITIALIZE_PASS_BEGIN(MachinePipeliner
, DEBUG_TYPE
,
174 "Modulo Software Pipelining", false, false)
175 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
176 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
177 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
178 INITIALIZE_PASS_DEPENDENCY(LiveIntervals
)
179 INITIALIZE_PASS_END(MachinePipeliner
, DEBUG_TYPE
,
180 "Modulo Software Pipelining", false, false)
182 /// The "main" function for implementing Swing Modulo Scheduling.
183 bool MachinePipeliner::runOnMachineFunction(MachineFunction
&mf
) {
184 if (skipFunction(mf
.getFunction()))
190 if (mf
.getFunction().getAttributes().hasAttribute(
191 AttributeList::FunctionIndex
, Attribute::OptimizeForSize
) &&
192 !EnableSWPOptSize
.getPosition())
195 if (!mf
.getSubtarget().enableMachinePipeliner())
198 // Cannot pipeline loops without instruction itineraries if we are using
199 // DFA for the pipeliner.
200 if (mf
.getSubtarget().useDFAforSMS() &&
201 (!mf
.getSubtarget().getInstrItineraryData() ||
202 mf
.getSubtarget().getInstrItineraryData()->isEmpty()))
206 MLI
= &getAnalysis
<MachineLoopInfo
>();
207 MDT
= &getAnalysis
<MachineDominatorTree
>();
208 TII
= MF
->getSubtarget().getInstrInfo();
209 RegClassInfo
.runOnMachineFunction(*MF
);
217 /// Attempt to perform the SMS algorithm on the specified loop. This function is
218 /// the main entry point for the algorithm. The function identifies candidate
219 /// loops, calculates the minimum initiation interval, and attempts to schedule
221 bool MachinePipeliner::scheduleLoop(MachineLoop
&L
) {
222 bool Changed
= false;
223 for (auto &InnerLoop
: L
)
224 Changed
|= scheduleLoop(*InnerLoop
);
227 // Stop trying after reaching the limit (if any).
228 int Limit
= SwpLoopLimit
;
230 if (NumTries
>= SwpLoopLimit
)
236 setPragmaPipelineOptions(L
);
237 if (!canPipelineLoop(L
)) {
238 LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
244 Changed
= swingModuloScheduler(L
);
249 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop
&L
) {
250 MachineBasicBlock
*LBLK
= L
.getTopBlock();
255 const BasicBlock
*BBLK
= LBLK
->getBasicBlock();
259 const Instruction
*TI
= BBLK
->getTerminator();
263 MDNode
*LoopID
= TI
->getMetadata(LLVMContext::MD_loop
);
264 if (LoopID
== nullptr)
267 assert(LoopID
->getNumOperands() > 0 && "requires atleast one operand");
268 assert(LoopID
->getOperand(0) == LoopID
&& "invalid loop");
270 for (unsigned i
= 1, e
= LoopID
->getNumOperands(); i
< e
; ++i
) {
271 MDNode
*MD
= dyn_cast
<MDNode
>(LoopID
->getOperand(i
));
276 MDString
*S
= dyn_cast
<MDString
>(MD
->getOperand(0));
281 if (S
->getString() == "llvm.loop.pipeline.initiationinterval") {
282 assert(MD
->getNumOperands() == 2 &&
283 "Pipeline initiation interval hint metadata should have two operands.");
285 mdconst::extract
<ConstantInt
>(MD
->getOperand(1))->getZExtValue();
286 assert(II_setByPragma
>= 1 && "Pipeline initiation interval must be positive.");
287 } else if (S
->getString() == "llvm.loop.pipeline.disable") {
288 disabledByPragma
= true;
293 /// Return true if the loop can be software pipelined. The algorithm is
294 /// restricted to loops with a single basic block. Make sure that the
295 /// branch in the loop can be analyzed.
296 bool MachinePipeliner::canPipelineLoop(MachineLoop
&L
) {
297 if (L
.getNumBlocks() != 1)
300 if (disabledByPragma
)
303 // Check if the branch can't be understood because we can't do pipelining
304 // if that's the case.
308 if (TII
->analyzeBranch(*L
.getHeader(), LI
.TBB
, LI
.FBB
, LI
.BrCond
)) {
310 dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
315 LI
.LoopInductionVar
= nullptr;
316 LI
.LoopCompare
= nullptr;
317 if (TII
->analyzeLoop(L
, LI
.LoopInductionVar
, LI
.LoopCompare
)) {
319 dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
324 if (!L
.getLoopPreheader()) {
326 dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
331 // Remove any subregisters from inputs to phi nodes.
332 preprocessPhiNodes(*L
.getHeader());
336 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock
&B
) {
337 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
338 SlotIndexes
&Slots
= *getAnalysis
<LiveIntervals
>().getSlotIndexes();
340 for (MachineInstr
&PI
: make_range(B
.begin(), B
.getFirstNonPHI())) {
341 MachineOperand
&DefOp
= PI
.getOperand(0);
342 assert(DefOp
.getSubReg() == 0);
343 auto *RC
= MRI
.getRegClass(DefOp
.getReg());
345 for (unsigned i
= 1, n
= PI
.getNumOperands(); i
!= n
; i
+= 2) {
346 MachineOperand
&RegOp
= PI
.getOperand(i
);
347 if (RegOp
.getSubReg() == 0)
350 // If the operand uses a subregister, replace it with a new register
351 // without subregisters, and generate a copy to the new register.
352 Register NewReg
= MRI
.createVirtualRegister(RC
);
353 MachineBasicBlock
&PredB
= *PI
.getOperand(i
+1).getMBB();
354 MachineBasicBlock::iterator At
= PredB
.getFirstTerminator();
355 const DebugLoc
&DL
= PredB
.findDebugLoc(At
);
356 auto Copy
= BuildMI(PredB
, At
, DL
, TII
->get(TargetOpcode::COPY
), NewReg
)
357 .addReg(RegOp
.getReg(), getRegState(RegOp
),
359 Slots
.insertMachineInstrInMaps(*Copy
);
360 RegOp
.setReg(NewReg
);
366 /// The SMS algorithm consists of the following main steps:
367 /// 1. Computation and analysis of the dependence graph.
368 /// 2. Ordering of the nodes (instructions).
369 /// 3. Attempt to Schedule the loop.
370 bool MachinePipeliner::swingModuloScheduler(MachineLoop
&L
) {
371 assert(L
.getBlocks().size() == 1 && "SMS works on single blocks only.");
373 SwingSchedulerDAG
SMS(*this, L
, getAnalysis
<LiveIntervals
>(), RegClassInfo
,
376 MachineBasicBlock
*MBB
= L
.getHeader();
377 // The kernel should not include any terminator instructions. These
378 // will be added back later.
381 // Compute the number of 'real' instructions in the basic block by
382 // ignoring terminators.
383 unsigned size
= MBB
->size();
384 for (MachineBasicBlock::iterator I
= MBB
->getFirstTerminator(),
385 E
= MBB
->instr_end();
389 SMS
.enterRegion(MBB
, MBB
->begin(), MBB
->getFirstTerminator(), size
);
394 return SMS
.hasNewSchedule();
397 void SwingSchedulerDAG::setMII(unsigned ResMII
, unsigned RecMII
) {
398 if (II_setByPragma
> 0)
399 MII
= II_setByPragma
;
401 MII
= std::max(ResMII
, RecMII
);
404 void SwingSchedulerDAG::setMAX_II() {
405 if (II_setByPragma
> 0)
406 MAX_II
= II_setByPragma
;
411 /// We override the schedule function in ScheduleDAGInstrs to implement the
412 /// scheduling part of the Swing Modulo Scheduling algorithm.
413 void SwingSchedulerDAG::schedule() {
414 AliasAnalysis
*AA
= &Pass
.getAnalysis
<AAResultsWrapperPass
>().getAAResults();
416 addLoopCarriedDependences(AA
);
417 updatePhiDependences();
418 Topo
.InitDAGTopologicalSorting();
423 NodeSetType NodeSets
;
424 findCircuits(NodeSets
);
425 NodeSetType Circuits
= NodeSets
;
427 // Calculate the MII.
428 unsigned ResMII
= calculateResMII();
429 unsigned RecMII
= calculateRecMII(NodeSets
);
433 // This flag is used for testing and can cause correctness problems.
437 setMII(ResMII
, RecMII
);
440 LLVM_DEBUG(dbgs() << "MII = " << MII
<< " MAX_II = " << MAX_II
441 << " (rec=" << RecMII
<< ", res=" << ResMII
<< ")\n");
443 // Can't schedule a loop without a valid MII.
447 << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
452 // Don't pipeline large loops.
453 if (SwpMaxMii
!= -1 && (int)MII
> SwpMaxMii
) {
454 LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
455 << ", we don't pipleline large loops\n");
456 NumFailLargeMaxMII
++;
460 computeNodeFunctions(NodeSets
);
462 registerPressureFilter(NodeSets
);
464 colocateNodeSets(NodeSets
);
466 checkNodeSets(NodeSets
);
469 for (auto &I
: NodeSets
) {
470 dbgs() << " Rec NodeSet ";
475 llvm::stable_sort(NodeSets
, std::greater
<NodeSet
>());
477 groupRemainingNodes(NodeSets
);
479 removeDuplicateNodes(NodeSets
);
482 for (auto &I
: NodeSets
) {
483 dbgs() << " NodeSet ";
488 computeNodeOrder(NodeSets
);
490 // check for node order issues
491 checkValidNodeOrder(Circuits
);
493 SMSchedule
Schedule(Pass
.MF
);
494 Scheduled
= schedulePipeline(Schedule
);
497 LLVM_DEBUG(dbgs() << "No schedule found, return\n");
502 unsigned numStages
= Schedule
.getMaxStageCount();
503 // No need to generate pipeline if there are no overlapped iterations.
504 if (numStages
== 0) {
506 dbgs() << "No overlapped iterations, no need to generate pipeline\n");
510 // Check that the maximum stage count is less than user-defined limit.
511 if (SwpMaxStages
> -1 && (int)numStages
> SwpMaxStages
) {
512 LLVM_DEBUG(dbgs() << "numStages:" << numStages
<< ">" << SwpMaxStages
513 << " : too many stages, abort\n");
514 NumFailLargeMaxStage
++;
518 generatePipelinedLoop(Schedule
);
522 /// Clean up after the software pipeliner runs.
523 void SwingSchedulerDAG::finishBlock() {
524 for (MachineInstr
*I
: NewMIs
)
525 MF
.DeleteMachineInstr(I
);
528 // Call the superclass.
529 ScheduleDAGInstrs::finishBlock();
532 /// Return the register values for the operands of a Phi instruction.
533 /// This function assume the instruction is a Phi.
534 static void getPhiRegs(MachineInstr
&Phi
, MachineBasicBlock
*Loop
,
535 unsigned &InitVal
, unsigned &LoopVal
) {
536 assert(Phi
.isPHI() && "Expecting a Phi.");
540 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
541 if (Phi
.getOperand(i
+ 1).getMBB() != Loop
)
542 InitVal
= Phi
.getOperand(i
).getReg();
544 LoopVal
= Phi
.getOperand(i
).getReg();
546 assert(InitVal
!= 0 && LoopVal
!= 0 && "Unexpected Phi structure.");
549 /// Return the Phi register value that comes from the incoming block.
550 static unsigned getInitPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
551 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
552 if (Phi
.getOperand(i
+ 1).getMBB() != LoopBB
)
553 return Phi
.getOperand(i
).getReg();
557 /// Return the Phi register value that comes the loop block.
558 static unsigned getLoopPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
559 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
560 if (Phi
.getOperand(i
+ 1).getMBB() == LoopBB
)
561 return Phi
.getOperand(i
).getReg();
565 /// Return true if SUb can be reached from SUa following the chain edges.
566 static bool isSuccOrder(SUnit
*SUa
, SUnit
*SUb
) {
567 SmallPtrSet
<SUnit
*, 8> Visited
;
568 SmallVector
<SUnit
*, 8> Worklist
;
569 Worklist
.push_back(SUa
);
570 while (!Worklist
.empty()) {
571 const SUnit
*SU
= Worklist
.pop_back_val();
572 for (auto &SI
: SU
->Succs
) {
573 SUnit
*SuccSU
= SI
.getSUnit();
574 if (SI
.getKind() == SDep::Order
) {
575 if (Visited
.count(SuccSU
))
579 Worklist
.push_back(SuccSU
);
580 Visited
.insert(SuccSU
);
587 /// Return true if the instruction causes a chain between memory
588 /// references before and after it.
589 static bool isDependenceBarrier(MachineInstr
&MI
, AliasAnalysis
*AA
) {
590 return MI
.isCall() || MI
.mayRaiseFPException() ||
591 MI
.hasUnmodeledSideEffects() ||
592 (MI
.hasOrderedMemoryRef() &&
593 (!MI
.mayLoad() || !MI
.isDereferenceableInvariantLoad(AA
)));
596 /// Return the underlying objects for the memory references of an instruction.
597 /// This function calls the code in ValueTracking, but first checks that the
598 /// instruction has a memory operand.
599 static void getUnderlyingObjects(const MachineInstr
*MI
,
600 SmallVectorImpl
<const Value
*> &Objs
,
601 const DataLayout
&DL
) {
602 if (!MI
->hasOneMemOperand())
604 MachineMemOperand
*MM
= *MI
->memoperands_begin();
607 GetUnderlyingObjects(MM
->getValue(), Objs
, DL
);
608 for (const Value
*V
: Objs
) {
609 if (!isIdentifiedObject(V
)) {
617 /// Add a chain edge between a load and store if the store can be an
618 /// alias of the load on a subsequent iteration, i.e., a loop carried
619 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
620 /// but that code doesn't create loop carried dependences.
621 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis
*AA
) {
622 MapVector
<const Value
*, SmallVector
<SUnit
*, 4>> PendingLoads
;
623 Value
*UnknownValue
=
624 UndefValue::get(Type::getVoidTy(MF
.getFunction().getContext()));
625 for (auto &SU
: SUnits
) {
626 MachineInstr
&MI
= *SU
.getInstr();
627 if (isDependenceBarrier(MI
, AA
))
628 PendingLoads
.clear();
629 else if (MI
.mayLoad()) {
630 SmallVector
<const Value
*, 4> Objs
;
631 getUnderlyingObjects(&MI
, Objs
, MF
.getDataLayout());
633 Objs
.push_back(UnknownValue
);
634 for (auto V
: Objs
) {
635 SmallVector
<SUnit
*, 4> &SUs
= PendingLoads
[V
];
638 } else if (MI
.mayStore()) {
639 SmallVector
<const Value
*, 4> Objs
;
640 getUnderlyingObjects(&MI
, Objs
, MF
.getDataLayout());
642 Objs
.push_back(UnknownValue
);
643 for (auto V
: Objs
) {
644 MapVector
<const Value
*, SmallVector
<SUnit
*, 4>>::iterator I
=
645 PendingLoads
.find(V
);
646 if (I
== PendingLoads
.end())
648 for (auto Load
: I
->second
) {
649 if (isSuccOrder(Load
, &SU
))
651 MachineInstr
&LdMI
= *Load
->getInstr();
652 // First, perform the cheaper check that compares the base register.
653 // If they are the same and the load offset is less than the store
654 // offset, then mark the dependence as loop carried potentially.
655 const MachineOperand
*BaseOp1
, *BaseOp2
;
656 int64_t Offset1
, Offset2
;
657 if (TII
->getMemOperandWithOffset(LdMI
, BaseOp1
, Offset1
, TRI
) &&
658 TII
->getMemOperandWithOffset(MI
, BaseOp2
, Offset2
, TRI
)) {
659 if (BaseOp1
->isIdenticalTo(*BaseOp2
) &&
660 (int)Offset1
< (int)Offset2
) {
661 assert(TII
->areMemAccessesTriviallyDisjoint(LdMI
, MI
, AA
) &&
662 "What happened to the chain edge?");
663 SDep
Dep(Load
, SDep::Barrier
);
669 // Second, the more expensive check that uses alias analysis on the
670 // base registers. If they alias, and the load offset is less than
671 // the store offset, the mark the dependence as loop carried.
673 SDep
Dep(Load
, SDep::Barrier
);
678 MachineMemOperand
*MMO1
= *LdMI
.memoperands_begin();
679 MachineMemOperand
*MMO2
= *MI
.memoperands_begin();
680 if (!MMO1
->getValue() || !MMO2
->getValue()) {
681 SDep
Dep(Load
, SDep::Barrier
);
686 if (MMO1
->getValue() == MMO2
->getValue() &&
687 MMO1
->getOffset() <= MMO2
->getOffset()) {
688 SDep
Dep(Load
, SDep::Barrier
);
693 AliasResult AAResult
= AA
->alias(
694 MemoryLocation(MMO1
->getValue(), LocationSize::unknown(),
696 MemoryLocation(MMO2
->getValue(), LocationSize::unknown(),
699 if (AAResult
!= NoAlias
) {
700 SDep
Dep(Load
, SDep::Barrier
);
710 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
711 /// processes dependences for PHIs. This function adds true dependences
712 /// from a PHI to a use, and a loop carried dependence from the use to the
713 /// PHI. The loop carried dependence is represented as an anti dependence
714 /// edge. This function also removes chain dependences between unrelated
716 void SwingSchedulerDAG::updatePhiDependences() {
717 SmallVector
<SDep
, 4> RemoveDeps
;
718 const TargetSubtargetInfo
&ST
= MF
.getSubtarget
<TargetSubtargetInfo
>();
720 // Iterate over each DAG node.
721 for (SUnit
&I
: SUnits
) {
723 // Set to true if the instruction has an operand defined by a Phi.
724 unsigned HasPhiUse
= 0;
725 unsigned HasPhiDef
= 0;
726 MachineInstr
*MI
= I
.getInstr();
727 // Iterate over each operand, and we process the definitions.
728 for (MachineInstr::mop_iterator MOI
= MI
->operands_begin(),
729 MOE
= MI
->operands_end();
733 Register Reg
= MOI
->getReg();
735 // If the register is used by a Phi, then create an anti dependence.
736 for (MachineRegisterInfo::use_instr_iterator
737 UI
= MRI
.use_instr_begin(Reg
),
738 UE
= MRI
.use_instr_end();
740 MachineInstr
*UseMI
= &*UI
;
741 SUnit
*SU
= getSUnit(UseMI
);
742 if (SU
!= nullptr && UseMI
->isPHI()) {
744 SDep
Dep(SU
, SDep::Anti
, Reg
);
749 // Add a chain edge to a dependent Phi that isn't an existing
751 if (SU
->NodeNum
< I
.NodeNum
&& !I
.isPred(SU
))
752 I
.addPred(SDep(SU
, SDep::Barrier
));
756 } else if (MOI
->isUse()) {
757 // If the register is defined by a Phi, then create a true dependence.
758 MachineInstr
*DefMI
= MRI
.getUniqueVRegDef(Reg
);
759 if (DefMI
== nullptr)
761 SUnit
*SU
= getSUnit(DefMI
);
762 if (SU
!= nullptr && DefMI
->isPHI()) {
764 SDep
Dep(SU
, SDep::Data
, Reg
);
766 ST
.adjustSchedDependency(SU
, &I
, Dep
);
770 // Add a chain edge to a dependent Phi that isn't an existing
772 if (SU
->NodeNum
< I
.NodeNum
&& !I
.isPred(SU
))
773 I
.addPred(SDep(SU
, SDep::Barrier
));
778 // Remove order dependences from an unrelated Phi.
781 for (auto &PI
: I
.Preds
) {
782 MachineInstr
*PMI
= PI
.getSUnit()->getInstr();
783 if (PMI
->isPHI() && PI
.getKind() == SDep::Order
) {
784 if (I
.getInstr()->isPHI()) {
785 if (PMI
->getOperand(0).getReg() == HasPhiUse
)
787 if (getLoopPhiReg(*PMI
, PMI
->getParent()) == HasPhiDef
)
790 RemoveDeps
.push_back(PI
);
793 for (int i
= 0, e
= RemoveDeps
.size(); i
!= e
; ++i
)
794 I
.removePred(RemoveDeps
[i
]);
798 /// Iterate over each DAG node and see if we can change any dependences
799 /// in order to reduce the recurrence MII.
800 void SwingSchedulerDAG::changeDependences() {
801 // See if an instruction can use a value from the previous iteration.
802 // If so, we update the base and offset of the instruction and change
804 for (SUnit
&I
: SUnits
) {
805 unsigned BasePos
= 0, OffsetPos
= 0, NewBase
= 0;
806 int64_t NewOffset
= 0;
807 if (!canUseLastOffsetValue(I
.getInstr(), BasePos
, OffsetPos
, NewBase
,
811 // Get the MI and SUnit for the instruction that defines the original base.
812 Register OrigBase
= I
.getInstr()->getOperand(BasePos
).getReg();
813 MachineInstr
*DefMI
= MRI
.getUniqueVRegDef(OrigBase
);
816 SUnit
*DefSU
= getSUnit(DefMI
);
819 // Get the MI and SUnit for the instruction that defins the new base.
820 MachineInstr
*LastMI
= MRI
.getUniqueVRegDef(NewBase
);
823 SUnit
*LastSU
= getSUnit(LastMI
);
827 if (Topo
.IsReachable(&I
, LastSU
))
830 // Remove the dependence. The value now depends on a prior iteration.
831 SmallVector
<SDep
, 4> Deps
;
832 for (SUnit::pred_iterator P
= I
.Preds
.begin(), E
= I
.Preds
.end(); P
!= E
;
834 if (P
->getSUnit() == DefSU
)
836 for (int i
= 0, e
= Deps
.size(); i
!= e
; i
++) {
837 Topo
.RemovePred(&I
, Deps
[i
].getSUnit());
838 I
.removePred(Deps
[i
]);
840 // Remove the chain dependence between the instructions.
842 for (auto &P
: LastSU
->Preds
)
843 if (P
.getSUnit() == &I
&& P
.getKind() == SDep::Order
)
845 for (int i
= 0, e
= Deps
.size(); i
!= e
; i
++) {
846 Topo
.RemovePred(LastSU
, Deps
[i
].getSUnit());
847 LastSU
->removePred(Deps
[i
]);
850 // Add a dependence between the new instruction and the instruction
851 // that defines the new base.
852 SDep
Dep(&I
, SDep::Anti
, NewBase
);
853 Topo
.AddPred(LastSU
, &I
);
854 LastSU
->addPred(Dep
);
856 // Remember the base and offset information so that we can update the
857 // instruction during code generation.
858 InstrChanges
[&I
] = std::make_pair(NewBase
, NewOffset
);
864 // FuncUnitSorter - Comparison operator used to sort instructions by
865 // the number of functional unit choices.
866 struct FuncUnitSorter
{
867 const InstrItineraryData
*InstrItins
;
868 const MCSubtargetInfo
*STI
;
869 DenseMap
<unsigned, unsigned> Resources
;
871 FuncUnitSorter(const TargetSubtargetInfo
&TSI
)
872 : InstrItins(TSI
.getInstrItineraryData()), STI(&TSI
) {}
874 // Compute the number of functional unit alternatives needed
875 // at each stage, and take the minimum value. We prioritize the
876 // instructions by the least number of choices first.
877 unsigned minFuncUnits(const MachineInstr
*Inst
, unsigned &F
) const {
878 unsigned SchedClass
= Inst
->getDesc().getSchedClass();
879 unsigned min
= UINT_MAX
;
880 if (InstrItins
&& !InstrItins
->isEmpty()) {
881 for (const InstrStage
&IS
:
882 make_range(InstrItins
->beginStage(SchedClass
),
883 InstrItins
->endStage(SchedClass
))) {
884 unsigned funcUnits
= IS
.getUnits();
885 unsigned numAlternatives
= countPopulation(funcUnits
);
886 if (numAlternatives
< min
) {
887 min
= numAlternatives
;
893 if (STI
&& STI
->getSchedModel().hasInstrSchedModel()) {
894 const MCSchedClassDesc
*SCDesc
=
895 STI
->getSchedModel().getSchedClassDesc(SchedClass
);
896 if (!SCDesc
->isValid())
897 // No valid Schedule Class Desc for schedClass, should be
898 // Pseudo/PostRAPseudo
901 for (const MCWriteProcResEntry
&PRE
:
902 make_range(STI
->getWriteProcResBegin(SCDesc
),
903 STI
->getWriteProcResEnd(SCDesc
))) {
906 const MCProcResourceDesc
*ProcResource
=
907 STI
->getSchedModel().getProcResource(PRE
.ProcResourceIdx
);
908 unsigned NumUnits
= ProcResource
->NumUnits
;
909 if (NumUnits
< min
) {
911 F
= PRE
.ProcResourceIdx
;
916 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
919 // Compute the critical resources needed by the instruction. This
920 // function records the functional units needed by instructions that
921 // must use only one functional unit. We use this as a tie breaker
922 // for computing the resource MII. The instrutions that require
923 // the same, highly used, functional unit have high priority.
924 void calcCriticalResources(MachineInstr
&MI
) {
925 unsigned SchedClass
= MI
.getDesc().getSchedClass();
926 if (InstrItins
&& !InstrItins
->isEmpty()) {
927 for (const InstrStage
&IS
:
928 make_range(InstrItins
->beginStage(SchedClass
),
929 InstrItins
->endStage(SchedClass
))) {
930 unsigned FuncUnits
= IS
.getUnits();
931 if (countPopulation(FuncUnits
) == 1)
932 Resources
[FuncUnits
]++;
936 if (STI
&& STI
->getSchedModel().hasInstrSchedModel()) {
937 const MCSchedClassDesc
*SCDesc
=
938 STI
->getSchedModel().getSchedClassDesc(SchedClass
);
939 if (!SCDesc
->isValid())
940 // No valid Schedule Class Desc for schedClass, should be
941 // Pseudo/PostRAPseudo
944 for (const MCWriteProcResEntry
&PRE
:
945 make_range(STI
->getWriteProcResBegin(SCDesc
),
946 STI
->getWriteProcResEnd(SCDesc
))) {
949 Resources
[PRE
.ProcResourceIdx
]++;
953 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
956 /// Return true if IS1 has less priority than IS2.
957 bool operator()(const MachineInstr
*IS1
, const MachineInstr
*IS2
) const {
958 unsigned F1
= 0, F2
= 0;
959 unsigned MFUs1
= minFuncUnits(IS1
, F1
);
960 unsigned MFUs2
= minFuncUnits(IS2
, F2
);
962 return Resources
.lookup(F1
) < Resources
.lookup(F2
);
963 return MFUs1
> MFUs2
;
967 } // end anonymous namespace
969 /// Calculate the resource constrained minimum initiation interval for the
970 /// specified loop. We use the DFA to model the resources needed for
971 /// each instruction, and we ignore dependences. A different DFA is created
972 /// for each cycle that is required. When adding a new instruction, we attempt
973 /// to add it to each existing DFA, until a legal space is found. If the
974 /// instruction cannot be reserved in an existing DFA, we create a new one.
975 unsigned SwingSchedulerDAG::calculateResMII() {
977 LLVM_DEBUG(dbgs() << "calculateResMII:\n");
978 SmallVector
<ResourceManager
*, 8> Resources
;
979 MachineBasicBlock
*MBB
= Loop
.getHeader();
980 Resources
.push_back(new ResourceManager(&MF
.getSubtarget()));
982 // Sort the instructions by the number of available choices for scheduling,
983 // least to most. Use the number of critical resources as the tie breaker.
984 FuncUnitSorter FUS
= FuncUnitSorter(MF
.getSubtarget());
985 for (MachineBasicBlock::iterator I
= MBB
->getFirstNonPHI(),
986 E
= MBB
->getFirstTerminator();
988 FUS
.calcCriticalResources(*I
);
989 PriorityQueue
<MachineInstr
*, std::vector
<MachineInstr
*>, FuncUnitSorter
>
992 for (MachineBasicBlock::iterator I
= MBB
->getFirstNonPHI(),
993 E
= MBB
->getFirstTerminator();
995 FuncUnitOrder
.push(&*I
);
997 while (!FuncUnitOrder
.empty()) {
998 MachineInstr
*MI
= FuncUnitOrder
.top();
1000 if (TII
->isZeroCost(MI
->getOpcode()))
1002 // Attempt to reserve the instruction in an existing DFA. At least one
1003 // DFA is needed for each cycle.
1004 unsigned NumCycles
= getSUnit(MI
)->Latency
;
1005 unsigned ReservedCycles
= 0;
1006 SmallVectorImpl
<ResourceManager
*>::iterator RI
= Resources
.begin();
1007 SmallVectorImpl
<ResourceManager
*>::iterator RE
= Resources
.end();
1009 dbgs() << "Trying to reserve resource for " << NumCycles
1010 << " cycles for \n";
1013 for (unsigned C
= 0; C
< NumCycles
; ++C
)
1015 if ((*RI
)->canReserveResources(*MI
)) {
1016 (*RI
)->reserveResources(*MI
);
1022 LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1023 << ", NumCycles:" << NumCycles
<< "\n");
1024 // Add new DFAs, if needed, to reserve resources.
1025 for (unsigned C
= ReservedCycles
; C
< NumCycles
; ++C
) {
1026 LLVM_DEBUG(if (SwpDebugResource
) dbgs()
1027 << "NewResource created to reserve resources"
1029 ResourceManager
*NewResource
= new ResourceManager(&MF
.getSubtarget());
1030 assert(NewResource
->canReserveResources(*MI
) && "Reserve error.");
1031 NewResource
->reserveResources(*MI
);
1032 Resources
.push_back(NewResource
);
1035 int Resmii
= Resources
.size();
1036 LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii
<< "\n");
1037 // Delete the memory for each of the DFAs that were created earlier.
1038 for (ResourceManager
*RI
: Resources
) {
1039 ResourceManager
*D
= RI
;
1046 /// Calculate the recurrence-constrainted minimum initiation interval.
1047 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1048 /// for each circuit. The II needs to satisfy the inequality
1049 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1050 /// II that satisfies the inequality, and the RecMII is the maximum
1051 /// of those values.
1052 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType
&NodeSets
) {
1053 unsigned RecMII
= 0;
1055 for (NodeSet
&Nodes
: NodeSets
) {
1059 unsigned Delay
= Nodes
.getLatency();
1060 unsigned Distance
= 1;
1062 // ii = ceil(delay / distance)
1063 unsigned CurMII
= (Delay
+ Distance
- 1) / Distance
;
1064 Nodes
.setRecMII(CurMII
);
1065 if (CurMII
> RecMII
)
1072 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1073 /// but we do this to find the circuits, and then change them back.
1074 static void swapAntiDependences(std::vector
<SUnit
> &SUnits
) {
1075 SmallVector
<std::pair
<SUnit
*, SDep
>, 8> DepsAdded
;
1076 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
1077 SUnit
*SU
= &SUnits
[i
];
1078 for (SUnit::pred_iterator IP
= SU
->Preds
.begin(), EP
= SU
->Preds
.end();
1080 if (IP
->getKind() != SDep::Anti
)
1082 DepsAdded
.push_back(std::make_pair(SU
, *IP
));
1085 for (SmallVector
<std::pair
<SUnit
*, SDep
>, 8>::iterator I
= DepsAdded
.begin(),
1086 E
= DepsAdded
.end();
1088 // Remove this anti dependency and add one in the reverse direction.
1089 SUnit
*SU
= I
->first
;
1090 SDep
&D
= I
->second
;
1091 SUnit
*TargetSU
= D
.getSUnit();
1092 unsigned Reg
= D
.getReg();
1093 unsigned Lat
= D
.getLatency();
1095 SDep
Dep(SU
, SDep::Anti
, Reg
);
1096 Dep
.setLatency(Lat
);
1097 TargetSU
->addPred(Dep
);
1101 /// Create the adjacency structure of the nodes in the graph.
1102 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1103 SwingSchedulerDAG
*DAG
) {
1104 BitVector
Added(SUnits
.size());
1105 DenseMap
<int, int> OutputDeps
;
1106 for (int i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
1108 // Add any successor to the adjacency matrix and exclude duplicates.
1109 for (auto &SI
: SUnits
[i
].Succs
) {
1110 // Only create a back-edge on the first and last nodes of a dependence
1111 // chain. This records any chains and adds them later.
1112 if (SI
.getKind() == SDep::Output
) {
1113 int N
= SI
.getSUnit()->NodeNum
;
1115 auto Dep
= OutputDeps
.find(BackEdge
);
1116 if (Dep
!= OutputDeps
.end()) {
1117 BackEdge
= Dep
->second
;
1118 OutputDeps
.erase(Dep
);
1120 OutputDeps
[N
] = BackEdge
;
1122 // Do not process a boundary node, an artificial node.
1123 // A back-edge is processed only if it goes to a Phi.
1124 if (SI
.getSUnit()->isBoundaryNode() || SI
.isArtificial() ||
1125 (SI
.getKind() == SDep::Anti
&& !SI
.getSUnit()->getInstr()->isPHI()))
1127 int N
= SI
.getSUnit()->NodeNum
;
1128 if (!Added
.test(N
)) {
1129 AdjK
[i
].push_back(N
);
1133 // A chain edge between a store and a load is treated as a back-edge in the
1134 // adjacency matrix.
1135 for (auto &PI
: SUnits
[i
].Preds
) {
1136 if (!SUnits
[i
].getInstr()->mayStore() ||
1137 !DAG
->isLoopCarriedDep(&SUnits
[i
], PI
, false))
1139 if (PI
.getKind() == SDep::Order
&& PI
.getSUnit()->getInstr()->mayLoad()) {
1140 int N
= PI
.getSUnit()->NodeNum
;
1141 if (!Added
.test(N
)) {
1142 AdjK
[i
].push_back(N
);
1148 // Add back-edges in the adjacency matrix for the output dependences.
1149 for (auto &OD
: OutputDeps
)
1150 if (!Added
.test(OD
.second
)) {
1151 AdjK
[OD
.first
].push_back(OD
.second
);
1152 Added
.set(OD
.second
);
1156 /// Identify an elementary circuit in the dependence graph starting at the
1158 bool SwingSchedulerDAG::Circuits::circuit(int V
, int S
, NodeSetType
&NodeSets
,
1160 SUnit
*SV
= &SUnits
[V
];
1165 for (auto W
: AdjK
[V
]) {
1166 if (NumPaths
> MaxPaths
)
1172 NodeSets
.push_back(NodeSet(Stack
.begin(), Stack
.end()));
1176 } else if (!Blocked
.test(W
)) {
1177 if (circuit(W
, S
, NodeSets
,
1178 Node2Idx
->at(W
) < Node2Idx
->at(V
) ? true : HasBackedge
))
1186 for (auto W
: AdjK
[V
]) {
1189 if (B
[W
].count(SV
) == 0)
1197 /// Unblock a node in the circuit finding algorithm.
1198 void SwingSchedulerDAG::Circuits::unblock(int U
) {
1200 SmallPtrSet
<SUnit
*, 4> &BU
= B
[U
];
1201 while (!BU
.empty()) {
1202 SmallPtrSet
<SUnit
*, 4>::iterator SI
= BU
.begin();
1203 assert(SI
!= BU
.end() && "Invalid B set.");
1206 if (Blocked
.test(W
->NodeNum
))
1207 unblock(W
->NodeNum
);
1211 /// Identify all the elementary circuits in the dependence graph using
1212 /// Johnson's circuit algorithm.
1213 void SwingSchedulerDAG::findCircuits(NodeSetType
&NodeSets
) {
1214 // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1215 // but we do this to find the circuits, and then change them back.
1216 swapAntiDependences(SUnits
);
1218 Circuits
Cir(SUnits
, Topo
);
1219 // Create the adjacency structure.
1220 Cir
.createAdjacencyStructure(this);
1221 for (int i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
1223 Cir
.circuit(i
, i
, NodeSets
);
1226 // Change the dependences back so that we've created a DAG again.
1227 swapAntiDependences(SUnits
);
1230 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1231 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1232 // additional copies that are needed across iterations. An artificial dependence
1233 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1235 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1236 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1237 // PHI-------True-Dep------> USEOfPhi
1239 // The mutation creates
1240 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1242 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1243 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1244 // late to avoid additional copies across iterations. The possible scheduling
1246 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1248 void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs
*DAG
) {
1249 for (SUnit
&SU
: DAG
->SUnits
) {
1250 // Find the COPY/REG_SEQUENCE instruction.
1251 if (!SU
.getInstr()->isCopy() && !SU
.getInstr()->isRegSequence())
1254 // Record the loop carried PHIs.
1255 SmallVector
<SUnit
*, 4> PHISUs
;
1256 // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1257 SmallVector
<SUnit
*, 4> SrcSUs
;
1259 for (auto &Dep
: SU
.Preds
) {
1260 SUnit
*TmpSU
= Dep
.getSUnit();
1261 MachineInstr
*TmpMI
= TmpSU
->getInstr();
1262 SDep::Kind DepKind
= Dep
.getKind();
1263 // Save the loop carried PHI.
1264 if (DepKind
== SDep::Anti
&& TmpMI
->isPHI())
1265 PHISUs
.push_back(TmpSU
);
1266 // Save the source of COPY/REG_SEQUENCE.
1267 // If the source has no pre-decessors, we will end up creating cycles.
1268 else if (DepKind
== SDep::Data
&& !TmpMI
->isPHI() && TmpSU
->NumPreds
> 0)
1269 SrcSUs
.push_back(TmpSU
);
1272 if (PHISUs
.size() == 0 || SrcSUs
.size() == 0)
1275 // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1276 // SUnit to the container.
1277 SmallVector
<SUnit
*, 8> UseSUs
;
1278 for (auto I
= PHISUs
.begin(); I
!= PHISUs
.end(); ++I
) {
1279 for (auto &Dep
: (*I
)->Succs
) {
1280 if (Dep
.getKind() != SDep::Data
)
1283 SUnit
*TmpSU
= Dep
.getSUnit();
1284 MachineInstr
*TmpMI
= TmpSU
->getInstr();
1285 if (TmpMI
->isPHI() || TmpMI
->isRegSequence()) {
1286 PHISUs
.push_back(TmpSU
);
1289 UseSUs
.push_back(TmpSU
);
1293 if (UseSUs
.size() == 0)
1296 SwingSchedulerDAG
*SDAG
= cast
<SwingSchedulerDAG
>(DAG
);
1297 // Add the artificial dependencies if it does not form a cycle.
1298 for (auto I
: UseSUs
) {
1299 for (auto Src
: SrcSUs
) {
1300 if (!SDAG
->Topo
.IsReachable(I
, Src
) && Src
!= I
) {
1301 Src
->addPred(SDep(I
, SDep::Artificial
));
1302 SDAG
->Topo
.AddPred(Src
, I
);
1309 /// Return true for DAG nodes that we ignore when computing the cost functions.
1310 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1311 /// in the calculation of the ASAP, ALAP, etc functions.
1312 static bool ignoreDependence(const SDep
&D
, bool isPred
) {
1313 if (D
.isArtificial())
1315 return D
.getKind() == SDep::Anti
&& isPred
;
1318 /// Compute several functions need to order the nodes for scheduling.
1319 /// ASAP - Earliest time to schedule a node.
1320 /// ALAP - Latest time to schedule a node.
1321 /// MOV - Mobility function, difference between ALAP and ASAP.
1322 /// D - Depth of each node.
1323 /// H - Height of each node.
1324 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType
&NodeSets
) {
1325 ScheduleInfo
.resize(SUnits
.size());
1328 for (ScheduleDAGTopologicalSort::const_iterator I
= Topo
.begin(),
1331 const SUnit
&SU
= SUnits
[*I
];
1337 // Compute ASAP and ZeroLatencyDepth.
1338 for (ScheduleDAGTopologicalSort::const_iterator I
= Topo
.begin(),
1342 int zeroLatencyDepth
= 0;
1343 SUnit
*SU
= &SUnits
[*I
];
1344 for (SUnit::const_pred_iterator IP
= SU
->Preds
.begin(),
1345 EP
= SU
->Preds
.end();
1347 SUnit
*pred
= IP
->getSUnit();
1348 if (IP
->getLatency() == 0)
1350 std::max(zeroLatencyDepth
, getZeroLatencyDepth(pred
) + 1);
1351 if (ignoreDependence(*IP
, true))
1353 asap
= std::max(asap
, (int)(getASAP(pred
) + IP
->getLatency() -
1354 getDistance(pred
, SU
, *IP
) * MII
));
1356 maxASAP
= std::max(maxASAP
, asap
);
1357 ScheduleInfo
[*I
].ASAP
= asap
;
1358 ScheduleInfo
[*I
].ZeroLatencyDepth
= zeroLatencyDepth
;
1361 // Compute ALAP, ZeroLatencyHeight, and MOV.
1362 for (ScheduleDAGTopologicalSort::const_reverse_iterator I
= Topo
.rbegin(),
1366 int zeroLatencyHeight
= 0;
1367 SUnit
*SU
= &SUnits
[*I
];
1368 for (SUnit::const_succ_iterator IS
= SU
->Succs
.begin(),
1369 ES
= SU
->Succs
.end();
1371 SUnit
*succ
= IS
->getSUnit();
1372 if (IS
->getLatency() == 0)
1374 std::max(zeroLatencyHeight
, getZeroLatencyHeight(succ
) + 1);
1375 if (ignoreDependence(*IS
, true))
1377 alap
= std::min(alap
, (int)(getALAP(succ
) - IS
->getLatency() +
1378 getDistance(SU
, succ
, *IS
) * MII
));
1381 ScheduleInfo
[*I
].ALAP
= alap
;
1382 ScheduleInfo
[*I
].ZeroLatencyHeight
= zeroLatencyHeight
;
1385 // After computing the node functions, compute the summary for each node set.
1386 for (NodeSet
&I
: NodeSets
)
1387 I
.computeNodeSetInfo(this);
1390 for (unsigned i
= 0; i
< SUnits
.size(); i
++) {
1391 dbgs() << "\tNode " << i
<< ":\n";
1392 dbgs() << "\t ASAP = " << getASAP(&SUnits
[i
]) << "\n";
1393 dbgs() << "\t ALAP = " << getALAP(&SUnits
[i
]) << "\n";
1394 dbgs() << "\t MOV = " << getMOV(&SUnits
[i
]) << "\n";
1395 dbgs() << "\t D = " << getDepth(&SUnits
[i
]) << "\n";
1396 dbgs() << "\t H = " << getHeight(&SUnits
[i
]) << "\n";
1397 dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits
[i
]) << "\n";
1398 dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits
[i
]) << "\n";
1403 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1404 /// as the predecessors of the elements of NodeOrder that are not also in
1406 static bool pred_L(SetVector
<SUnit
*> &NodeOrder
,
1407 SmallSetVector
<SUnit
*, 8> &Preds
,
1408 const NodeSet
*S
= nullptr) {
1410 for (SetVector
<SUnit
*>::iterator I
= NodeOrder
.begin(), E
= NodeOrder
.end();
1412 for (SUnit::pred_iterator PI
= (*I
)->Preds
.begin(), PE
= (*I
)->Preds
.end();
1414 if (S
&& S
->count(PI
->getSUnit()) == 0)
1416 if (ignoreDependence(*PI
, true))
1418 if (NodeOrder
.count(PI
->getSUnit()) == 0)
1419 Preds
.insert(PI
->getSUnit());
1421 // Back-edges are predecessors with an anti-dependence.
1422 for (SUnit::const_succ_iterator IS
= (*I
)->Succs
.begin(),
1423 ES
= (*I
)->Succs
.end();
1425 if (IS
->getKind() != SDep::Anti
)
1427 if (S
&& S
->count(IS
->getSUnit()) == 0)
1429 if (NodeOrder
.count(IS
->getSUnit()) == 0)
1430 Preds
.insert(IS
->getSUnit());
1433 return !Preds
.empty();
1436 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1437 /// as the successors of the elements of NodeOrder that are not also in
1439 static bool succ_L(SetVector
<SUnit
*> &NodeOrder
,
1440 SmallSetVector
<SUnit
*, 8> &Succs
,
1441 const NodeSet
*S
= nullptr) {
1443 for (SetVector
<SUnit
*>::iterator I
= NodeOrder
.begin(), E
= NodeOrder
.end();
1445 for (SUnit::succ_iterator SI
= (*I
)->Succs
.begin(), SE
= (*I
)->Succs
.end();
1447 if (S
&& S
->count(SI
->getSUnit()) == 0)
1449 if (ignoreDependence(*SI
, false))
1451 if (NodeOrder
.count(SI
->getSUnit()) == 0)
1452 Succs
.insert(SI
->getSUnit());
1454 for (SUnit::const_pred_iterator PI
= (*I
)->Preds
.begin(),
1455 PE
= (*I
)->Preds
.end();
1457 if (PI
->getKind() != SDep::Anti
)
1459 if (S
&& S
->count(PI
->getSUnit()) == 0)
1461 if (NodeOrder
.count(PI
->getSUnit()) == 0)
1462 Succs
.insert(PI
->getSUnit());
1465 return !Succs
.empty();
1468 /// Return true if there is a path from the specified node to any of the nodes
1469 /// in DestNodes. Keep track and return the nodes in any path.
1470 static bool computePath(SUnit
*Cur
, SetVector
<SUnit
*> &Path
,
1471 SetVector
<SUnit
*> &DestNodes
,
1472 SetVector
<SUnit
*> &Exclude
,
1473 SmallPtrSet
<SUnit
*, 8> &Visited
) {
1474 if (Cur
->isBoundaryNode())
1476 if (Exclude
.count(Cur
) != 0)
1478 if (DestNodes
.count(Cur
) != 0)
1480 if (!Visited
.insert(Cur
).second
)
1481 return Path
.count(Cur
) != 0;
1482 bool FoundPath
= false;
1483 for (auto &SI
: Cur
->Succs
)
1484 FoundPath
|= computePath(SI
.getSUnit(), Path
, DestNodes
, Exclude
, Visited
);
1485 for (auto &PI
: Cur
->Preds
)
1486 if (PI
.getKind() == SDep::Anti
)
1488 computePath(PI
.getSUnit(), Path
, DestNodes
, Exclude
, Visited
);
1494 /// Return true if Set1 is a subset of Set2.
1495 template <class S1Ty
, class S2Ty
> static bool isSubset(S1Ty
&Set1
, S2Ty
&Set2
) {
1496 for (typename
S1Ty::iterator I
= Set1
.begin(), E
= Set1
.end(); I
!= E
; ++I
)
1497 if (Set2
.count(*I
) == 0)
1502 /// Compute the live-out registers for the instructions in a node-set.
1503 /// The live-out registers are those that are defined in the node-set,
1504 /// but not used. Except for use operands of Phis.
1505 static void computeLiveOuts(MachineFunction
&MF
, RegPressureTracker
&RPTracker
,
1507 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
1508 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1509 SmallVector
<RegisterMaskPair
, 8> LiveOutRegs
;
1510 SmallSet
<unsigned, 4> Uses
;
1511 for (SUnit
*SU
: NS
) {
1512 const MachineInstr
*MI
= SU
->getInstr();
1515 for (const MachineOperand
&MO
: MI
->operands())
1516 if (MO
.isReg() && MO
.isUse()) {
1517 Register Reg
= MO
.getReg();
1518 if (Register::isVirtualRegister(Reg
))
1520 else if (MRI
.isAllocatable(Reg
))
1521 for (MCRegUnitIterator
Units(Reg
, TRI
); Units
.isValid(); ++Units
)
1522 Uses
.insert(*Units
);
1525 for (SUnit
*SU
: NS
)
1526 for (const MachineOperand
&MO
: SU
->getInstr()->operands())
1527 if (MO
.isReg() && MO
.isDef() && !MO
.isDead()) {
1528 Register Reg
= MO
.getReg();
1529 if (Register::isVirtualRegister(Reg
)) {
1530 if (!Uses
.count(Reg
))
1531 LiveOutRegs
.push_back(RegisterMaskPair(Reg
,
1532 LaneBitmask::getNone()));
1533 } else if (MRI
.isAllocatable(Reg
)) {
1534 for (MCRegUnitIterator
Units(Reg
, TRI
); Units
.isValid(); ++Units
)
1535 if (!Uses
.count(*Units
))
1536 LiveOutRegs
.push_back(RegisterMaskPair(*Units
,
1537 LaneBitmask::getNone()));
1540 RPTracker
.addLiveRegs(LiveOutRegs
);
1543 /// A heuristic to filter nodes in recurrent node-sets if the register
1544 /// pressure of a set is too high.
1545 void SwingSchedulerDAG::registerPressureFilter(NodeSetType
&NodeSets
) {
1546 for (auto &NS
: NodeSets
) {
1547 // Skip small node-sets since they won't cause register pressure problems.
1550 IntervalPressure RecRegPressure
;
1551 RegPressureTracker
RecRPTracker(RecRegPressure
);
1552 RecRPTracker
.init(&MF
, &RegClassInfo
, &LIS
, BB
, BB
->end(), false, true);
1553 computeLiveOuts(MF
, RecRPTracker
, NS
);
1554 RecRPTracker
.closeBottom();
1556 std::vector
<SUnit
*> SUnits(NS
.begin(), NS
.end());
1557 llvm::sort(SUnits
, [](const SUnit
*A
, const SUnit
*B
) {
1558 return A
->NodeNum
> B
->NodeNum
;
1561 for (auto &SU
: SUnits
) {
1562 // Since we're computing the register pressure for a subset of the
1563 // instructions in a block, we need to set the tracker for each
1564 // instruction in the node-set. The tracker is set to the instruction
1565 // just after the one we're interested in.
1566 MachineBasicBlock::const_iterator CurInstI
= SU
->getInstr();
1567 RecRPTracker
.setPos(std::next(CurInstI
));
1569 RegPressureDelta RPDelta
;
1570 ArrayRef
<PressureChange
> CriticalPSets
;
1571 RecRPTracker
.getMaxUpwardPressureDelta(SU
->getInstr(), nullptr, RPDelta
,
1573 RecRegPressure
.MaxSetPressure
);
1574 if (RPDelta
.Excess
.isValid()) {
1576 dbgs() << "Excess register pressure: SU(" << SU
->NodeNum
<< ") "
1577 << TRI
->getRegPressureSetName(RPDelta
.Excess
.getPSet())
1578 << ":" << RPDelta
.Excess
.getUnitInc());
1579 NS
.setExceedPressure(SU
);
1582 RecRPTracker
.recede();
1587 /// A heuristic to colocate node sets that have the same set of
1589 void SwingSchedulerDAG::colocateNodeSets(NodeSetType
&NodeSets
) {
1590 unsigned Colocate
= 0;
1591 for (int i
= 0, e
= NodeSets
.size(); i
< e
; ++i
) {
1592 NodeSet
&N1
= NodeSets
[i
];
1593 SmallSetVector
<SUnit
*, 8> S1
;
1594 if (N1
.empty() || !succ_L(N1
, S1
))
1596 for (int j
= i
+ 1; j
< e
; ++j
) {
1597 NodeSet
&N2
= NodeSets
[j
];
1598 if (N1
.compareRecMII(N2
) != 0)
1600 SmallSetVector
<SUnit
*, 8> S2
;
1601 if (N2
.empty() || !succ_L(N2
, S2
))
1603 if (isSubset(S1
, S2
) && S1
.size() == S2
.size()) {
1604 N1
.setColocate(++Colocate
);
1605 N2
.setColocate(Colocate
);
1612 /// Check if the existing node-sets are profitable. If not, then ignore the
1613 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1614 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1615 /// then it's best to try to schedule all instructions together instead of
1616 /// starting with the recurrent node-sets.
1617 void SwingSchedulerDAG::checkNodeSets(NodeSetType
&NodeSets
) {
1618 // Look for loops with a large MII.
1621 // Check if the node-set contains only a simple add recurrence.
1622 for (auto &NS
: NodeSets
) {
1623 if (NS
.getRecMII() > 2)
1625 if (NS
.getMaxDepth() > MII
)
1629 LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1633 /// Add the nodes that do not belong to a recurrence set into groups
1634 /// based upon connected componenets.
1635 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType
&NodeSets
) {
1636 SetVector
<SUnit
*> NodesAdded
;
1637 SmallPtrSet
<SUnit
*, 8> Visited
;
1638 // Add the nodes that are on a path between the previous node sets and
1639 // the current node set.
1640 for (NodeSet
&I
: NodeSets
) {
1641 SmallSetVector
<SUnit
*, 8> N
;
1642 // Add the nodes from the current node set to the previous node set.
1644 SetVector
<SUnit
*> Path
;
1645 for (SUnit
*NI
: N
) {
1647 computePath(NI
, Path
, NodesAdded
, I
, Visited
);
1650 I
.insert(Path
.begin(), Path
.end());
1652 // Add the nodes from the previous node set to the current node set.
1654 if (succ_L(NodesAdded
, N
)) {
1655 SetVector
<SUnit
*> Path
;
1656 for (SUnit
*NI
: N
) {
1658 computePath(NI
, Path
, I
, NodesAdded
, Visited
);
1661 I
.insert(Path
.begin(), Path
.end());
1663 NodesAdded
.insert(I
.begin(), I
.end());
1666 // Create a new node set with the connected nodes of any successor of a node
1667 // in a recurrent set.
1669 SmallSetVector
<SUnit
*, 8> N
;
1670 if (succ_L(NodesAdded
, N
))
1672 addConnectedNodes(I
, NewSet
, NodesAdded
);
1673 if (!NewSet
.empty())
1674 NodeSets
.push_back(NewSet
);
1676 // Create a new node set with the connected nodes of any predecessor of a node
1677 // in a recurrent set.
1679 if (pred_L(NodesAdded
, N
))
1681 addConnectedNodes(I
, NewSet
, NodesAdded
);
1682 if (!NewSet
.empty())
1683 NodeSets
.push_back(NewSet
);
1685 // Create new nodes sets with the connected nodes any remaining node that
1686 // has no predecessor.
1687 for (unsigned i
= 0; i
< SUnits
.size(); ++i
) {
1688 SUnit
*SU
= &SUnits
[i
];
1689 if (NodesAdded
.count(SU
) == 0) {
1691 addConnectedNodes(SU
, NewSet
, NodesAdded
);
1692 if (!NewSet
.empty())
1693 NodeSets
.push_back(NewSet
);
1698 /// Add the node to the set, and add all of its connected nodes to the set.
1699 void SwingSchedulerDAG::addConnectedNodes(SUnit
*SU
, NodeSet
&NewSet
,
1700 SetVector
<SUnit
*> &NodesAdded
) {
1702 NodesAdded
.insert(SU
);
1703 for (auto &SI
: SU
->Succs
) {
1704 SUnit
*Successor
= SI
.getSUnit();
1705 if (!SI
.isArtificial() && NodesAdded
.count(Successor
) == 0)
1706 addConnectedNodes(Successor
, NewSet
, NodesAdded
);
1708 for (auto &PI
: SU
->Preds
) {
1709 SUnit
*Predecessor
= PI
.getSUnit();
1710 if (!PI
.isArtificial() && NodesAdded
.count(Predecessor
) == 0)
1711 addConnectedNodes(Predecessor
, NewSet
, NodesAdded
);
1715 /// Return true if Set1 contains elements in Set2. The elements in common
1716 /// are returned in a different container.
1717 static bool isIntersect(SmallSetVector
<SUnit
*, 8> &Set1
, const NodeSet
&Set2
,
1718 SmallSetVector
<SUnit
*, 8> &Result
) {
1720 for (unsigned i
= 0, e
= Set1
.size(); i
!= e
; ++i
) {
1721 SUnit
*SU
= Set1
[i
];
1722 if (Set2
.count(SU
) != 0)
1725 return !Result
.empty();
1728 /// Merge the recurrence node sets that have the same initial node.
1729 void SwingSchedulerDAG::fuseRecs(NodeSetType
&NodeSets
) {
1730 for (NodeSetType::iterator I
= NodeSets
.begin(), E
= NodeSets
.end(); I
!= E
;
1733 for (NodeSetType::iterator J
= I
+ 1; J
!= E
;) {
1735 if (NI
.getNode(0)->NodeNum
== NJ
.getNode(0)->NodeNum
) {
1736 if (NJ
.compareRecMII(NI
) > 0)
1737 NI
.setRecMII(NJ
.getRecMII());
1738 for (NodeSet::iterator NII
= J
->begin(), ENI
= J
->end(); NII
!= ENI
;
1750 /// Remove nodes that have been scheduled in previous NodeSets.
1751 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType
&NodeSets
) {
1752 for (NodeSetType::iterator I
= NodeSets
.begin(), E
= NodeSets
.end(); I
!= E
;
1754 for (NodeSetType::iterator J
= I
+ 1; J
!= E
;) {
1755 J
->remove_if([&](SUnit
*SUJ
) { return I
->count(SUJ
); });
1766 /// Compute an ordered list of the dependence graph nodes, which
1767 /// indicates the order that the nodes will be scheduled. This is a
1768 /// two-level algorithm. First, a partial order is created, which
1769 /// consists of a list of sets ordered from highest to lowest priority.
1770 void SwingSchedulerDAG::computeNodeOrder(NodeSetType
&NodeSets
) {
1771 SmallSetVector
<SUnit
*, 8> R
;
1774 for (auto &Nodes
: NodeSets
) {
1775 LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes
.size() << "\n");
1777 SmallSetVector
<SUnit
*, 8> N
;
1778 if (pred_L(NodeOrder
, N
) && isSubset(N
, Nodes
)) {
1779 R
.insert(N
.begin(), N
.end());
1781 LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1782 } else if (succ_L(NodeOrder
, N
) && isSubset(N
, Nodes
)) {
1783 R
.insert(N
.begin(), N
.end());
1785 LLVM_DEBUG(dbgs() << " Top down (succs) ");
1786 } else if (isIntersect(N
, Nodes
, R
)) {
1787 // If some of the successors are in the existing node-set, then use the
1788 // top-down ordering.
1790 LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1791 } else if (NodeSets
.size() == 1) {
1792 for (auto &N
: Nodes
)
1793 if (N
->Succs
.size() == 0)
1796 LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1798 // Find the node with the highest ASAP.
1799 SUnit
*maxASAP
= nullptr;
1800 for (SUnit
*SU
: Nodes
) {
1801 if (maxASAP
== nullptr || getASAP(SU
) > getASAP(maxASAP
) ||
1802 (getASAP(SU
) == getASAP(maxASAP
) && SU
->NodeNum
> maxASAP
->NodeNum
))
1807 LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1810 while (!R
.empty()) {
1811 if (Order
== TopDown
) {
1812 // Choose the node with the maximum height. If more than one, choose
1813 // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1814 // choose the node with the lowest MOV.
1815 while (!R
.empty()) {
1816 SUnit
*maxHeight
= nullptr;
1817 for (SUnit
*I
: R
) {
1818 if (maxHeight
== nullptr || getHeight(I
) > getHeight(maxHeight
))
1820 else if (getHeight(I
) == getHeight(maxHeight
) &&
1821 getZeroLatencyHeight(I
) > getZeroLatencyHeight(maxHeight
))
1823 else if (getHeight(I
) == getHeight(maxHeight
) &&
1824 getZeroLatencyHeight(I
) ==
1825 getZeroLatencyHeight(maxHeight
) &&
1826 getMOV(I
) < getMOV(maxHeight
))
1829 NodeOrder
.insert(maxHeight
);
1830 LLVM_DEBUG(dbgs() << maxHeight
->NodeNum
<< " ");
1831 R
.remove(maxHeight
);
1832 for (const auto &I
: maxHeight
->Succs
) {
1833 if (Nodes
.count(I
.getSUnit()) == 0)
1835 if (NodeOrder
.count(I
.getSUnit()) != 0)
1837 if (ignoreDependence(I
, false))
1839 R
.insert(I
.getSUnit());
1841 // Back-edges are predecessors with an anti-dependence.
1842 for (const auto &I
: maxHeight
->Preds
) {
1843 if (I
.getKind() != SDep::Anti
)
1845 if (Nodes
.count(I
.getSUnit()) == 0)
1847 if (NodeOrder
.count(I
.getSUnit()) != 0)
1849 R
.insert(I
.getSUnit());
1853 LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1854 SmallSetVector
<SUnit
*, 8> N
;
1855 if (pred_L(NodeOrder
, N
, &Nodes
))
1856 R
.insert(N
.begin(), N
.end());
1858 // Choose the node with the maximum depth. If more than one, choose
1859 // the node with the maximum ZeroLatencyDepth. If still more than one,
1860 // choose the node with the lowest MOV.
1861 while (!R
.empty()) {
1862 SUnit
*maxDepth
= nullptr;
1863 for (SUnit
*I
: R
) {
1864 if (maxDepth
== nullptr || getDepth(I
) > getDepth(maxDepth
))
1866 else if (getDepth(I
) == getDepth(maxDepth
) &&
1867 getZeroLatencyDepth(I
) > getZeroLatencyDepth(maxDepth
))
1869 else if (getDepth(I
) == getDepth(maxDepth
) &&
1870 getZeroLatencyDepth(I
) == getZeroLatencyDepth(maxDepth
) &&
1871 getMOV(I
) < getMOV(maxDepth
))
1874 NodeOrder
.insert(maxDepth
);
1875 LLVM_DEBUG(dbgs() << maxDepth
->NodeNum
<< " ");
1877 if (Nodes
.isExceedSU(maxDepth
)) {
1880 R
.insert(Nodes
.getNode(0));
1883 for (const auto &I
: maxDepth
->Preds
) {
1884 if (Nodes
.count(I
.getSUnit()) == 0)
1886 if (NodeOrder
.count(I
.getSUnit()) != 0)
1888 R
.insert(I
.getSUnit());
1890 // Back-edges are predecessors with an anti-dependence.
1891 for (const auto &I
: maxDepth
->Succs
) {
1892 if (I
.getKind() != SDep::Anti
)
1894 if (Nodes
.count(I
.getSUnit()) == 0)
1896 if (NodeOrder
.count(I
.getSUnit()) != 0)
1898 R
.insert(I
.getSUnit());
1902 LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1903 SmallSetVector
<SUnit
*, 8> N
;
1904 if (succ_L(NodeOrder
, N
, &Nodes
))
1905 R
.insert(N
.begin(), N
.end());
1908 LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1912 dbgs() << "Node order: ";
1913 for (SUnit
*I
: NodeOrder
)
1914 dbgs() << " " << I
->NodeNum
<< " ";
1919 /// Process the nodes in the computed order and create the pipelined schedule
1920 /// of the instructions, if possible. Return true if a schedule is found.
1921 bool SwingSchedulerDAG::schedulePipeline(SMSchedule
&Schedule
) {
1923 if (NodeOrder
.empty()){
1924 LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
1928 bool scheduleFound
= false;
1930 // Keep increasing II until a valid schedule is found.
1931 for (II
= MII
; II
<= MAX_II
&& !scheduleFound
; ++II
) {
1933 Schedule
.setInitiationInterval(II
);
1934 LLVM_DEBUG(dbgs() << "Try to schedule with " << II
<< "\n");
1936 SetVector
<SUnit
*>::iterator NI
= NodeOrder
.begin();
1937 SetVector
<SUnit
*>::iterator NE
= NodeOrder
.end();
1941 // Compute the schedule time for the instruction, which is based
1942 // upon the scheduled time for any predecessors/successors.
1943 int EarlyStart
= INT_MIN
;
1944 int LateStart
= INT_MAX
;
1945 // These values are set when the size of the schedule window is limited
1946 // due to chain dependences.
1947 int SchedEnd
= INT_MAX
;
1948 int SchedStart
= INT_MIN
;
1949 Schedule
.computeStart(SU
, &EarlyStart
, &LateStart
, &SchedEnd
, &SchedStart
,
1953 dbgs() << "Inst (" << SU
->NodeNum
<< ") ";
1954 SU
->getInstr()->dump();
1958 dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart
,
1959 LateStart
, SchedEnd
, SchedStart
);
1962 if (EarlyStart
> LateStart
|| SchedEnd
< EarlyStart
||
1963 SchedStart
> LateStart
)
1964 scheduleFound
= false;
1965 else if (EarlyStart
!= INT_MIN
&& LateStart
== INT_MAX
) {
1966 SchedEnd
= std::min(SchedEnd
, EarlyStart
+ (int)II
- 1);
1967 scheduleFound
= Schedule
.insert(SU
, EarlyStart
, SchedEnd
, II
);
1968 } else if (EarlyStart
== INT_MIN
&& LateStart
!= INT_MAX
) {
1969 SchedStart
= std::max(SchedStart
, LateStart
- (int)II
+ 1);
1970 scheduleFound
= Schedule
.insert(SU
, LateStart
, SchedStart
, II
);
1971 } else if (EarlyStart
!= INT_MIN
&& LateStart
!= INT_MAX
) {
1973 std::min(SchedEnd
, std::min(LateStart
, EarlyStart
+ (int)II
- 1));
1974 // When scheduling a Phi it is better to start at the late cycle and go
1975 // backwards. The default order may insert the Phi too far away from
1976 // its first dependence.
1977 if (SU
->getInstr()->isPHI())
1978 scheduleFound
= Schedule
.insert(SU
, SchedEnd
, EarlyStart
, II
);
1980 scheduleFound
= Schedule
.insert(SU
, EarlyStart
, SchedEnd
, II
);
1982 int FirstCycle
= Schedule
.getFirstCycle();
1983 scheduleFound
= Schedule
.insert(SU
, FirstCycle
+ getASAP(SU
),
1984 FirstCycle
+ getASAP(SU
) + II
- 1, II
);
1986 // Even if we find a schedule, make sure the schedule doesn't exceed the
1987 // allowable number of stages. We keep trying if this happens.
1989 if (SwpMaxStages
> -1 &&
1990 Schedule
.getMaxStageCount() > (unsigned)SwpMaxStages
)
1991 scheduleFound
= false;
1995 dbgs() << "\tCan't schedule\n";
1997 } while (++NI
!= NE
&& scheduleFound
);
1999 // If a schedule is found, check if it is a valid schedule too.
2001 scheduleFound
= Schedule
.isValidSchedule(this);
2004 LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
<< " (II=" << II
2008 Schedule
.finalizeSchedule(this);
2012 return scheduleFound
&& Schedule
.getMaxStageCount() > 0;
2015 /// Given a schedule for the loop, generate a new version of the loop,
2016 /// and replace the old version. This function generates a prolog
2017 /// that contains the initial iterations in the pipeline, and kernel
2018 /// loop, and the epilogue that contains the code for the final
2020 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule
&Schedule
) {
2021 // Create a new basic block for the kernel and add it to the CFG.
2022 MachineBasicBlock
*KernelBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
2024 unsigned MaxStageCount
= Schedule
.getMaxStageCount();
2026 // Remember the registers that are used in different stages. The index is
2027 // the iteration, or stage, that the instruction is scheduled in. This is
2028 // a map between register names in the original block and the names created
2029 // in each stage of the pipelined loop.
2030 ValueMapTy
*VRMap
= new ValueMapTy
[(MaxStageCount
+ 1) * 2];
2031 InstrMapTy InstrMap
;
2033 SmallVector
<MachineBasicBlock
*, 4> PrologBBs
;
2035 MachineBasicBlock
*PreheaderBB
= MLI
->getLoopFor(BB
)->getLoopPreheader();
2036 assert(PreheaderBB
!= nullptr &&
2037 "Need to add code to handle loops w/o preheader");
2038 // Generate the prolog instructions that set up the pipeline.
2039 generateProlog(Schedule
, MaxStageCount
, KernelBB
, VRMap
, PrologBBs
);
2040 MF
.insert(BB
->getIterator(), KernelBB
);
2042 // Rearrange the instructions to generate the new, pipelined loop,
2043 // and update register names as needed.
2044 for (int Cycle
= Schedule
.getFirstCycle(),
2045 LastCycle
= Schedule
.getFinalCycle();
2046 Cycle
<= LastCycle
; ++Cycle
) {
2047 std::deque
<SUnit
*> &CycleInstrs
= Schedule
.getInstructions(Cycle
);
2048 // This inner loop schedules each instruction in the cycle.
2049 for (SUnit
*CI
: CycleInstrs
) {
2050 if (CI
->getInstr()->isPHI())
2052 unsigned StageNum
= Schedule
.stageScheduled(getSUnit(CI
->getInstr()));
2053 MachineInstr
*NewMI
= cloneInstr(CI
->getInstr(), MaxStageCount
, StageNum
);
2054 updateInstruction(NewMI
, false, MaxStageCount
, StageNum
, Schedule
, VRMap
);
2055 KernelBB
->push_back(NewMI
);
2056 InstrMap
[NewMI
] = CI
->getInstr();
2060 // Copy any terminator instructions to the new kernel, and update
2062 for (MachineBasicBlock::iterator I
= BB
->getFirstTerminator(),
2063 E
= BB
->instr_end();
2065 MachineInstr
*NewMI
= MF
.CloneMachineInstr(&*I
);
2066 updateInstruction(NewMI
, false, MaxStageCount
, 0, Schedule
, VRMap
);
2067 KernelBB
->push_back(NewMI
);
2068 InstrMap
[NewMI
] = &*I
;
2071 KernelBB
->transferSuccessors(BB
);
2072 KernelBB
->replaceSuccessor(BB
, KernelBB
);
2074 generateExistingPhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, Schedule
,
2075 VRMap
, InstrMap
, MaxStageCount
, MaxStageCount
, false);
2076 generatePhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, Schedule
, VRMap
,
2077 InstrMap
, MaxStageCount
, MaxStageCount
, false);
2079 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB
->dump(););
2081 SmallVector
<MachineBasicBlock
*, 4> EpilogBBs
;
2082 // Generate the epilog instructions to complete the pipeline.
2083 generateEpilog(Schedule
, MaxStageCount
, KernelBB
, VRMap
, EpilogBBs
,
2086 // We need this step because the register allocation doesn't handle some
2087 // situations well, so we insert copies to help out.
2088 splitLifetimes(KernelBB
, EpilogBBs
, Schedule
);
2090 // Remove dead instructions due to loop induction variables.
2091 removeDeadInstructions(KernelBB
, EpilogBBs
);
2093 // Add branches between prolog and epilog blocks.
2094 addBranches(*PreheaderBB
, PrologBBs
, KernelBB
, EpilogBBs
, Schedule
, VRMap
);
2096 // Remove the original loop since it's no longer referenced.
2098 LIS
.RemoveMachineInstrFromMaps(I
);
2100 BB
->eraseFromParent();
2105 /// Generate the pipeline prolog code.
2106 void SwingSchedulerDAG::generateProlog(SMSchedule
&Schedule
, unsigned LastStage
,
2107 MachineBasicBlock
*KernelBB
,
2109 MBBVectorTy
&PrologBBs
) {
2110 MachineBasicBlock
*PreheaderBB
= MLI
->getLoopFor(BB
)->getLoopPreheader();
2111 assert(PreheaderBB
!= nullptr &&
2112 "Need to add code to handle loops w/o preheader");
2113 MachineBasicBlock
*PredBB
= PreheaderBB
;
2114 InstrMapTy InstrMap
;
2116 // Generate a basic block for each stage, not including the last stage,
2117 // which will be generated in the kernel. Each basic block may contain
2118 // instructions from multiple stages/iterations.
2119 for (unsigned i
= 0; i
< LastStage
; ++i
) {
2120 // Create and insert the prolog basic block prior to the original loop
2121 // basic block. The original loop is removed later.
2122 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
2123 PrologBBs
.push_back(NewBB
);
2124 MF
.insert(BB
->getIterator(), NewBB
);
2125 NewBB
->transferSuccessors(PredBB
);
2126 PredBB
->addSuccessor(NewBB
);
2129 // Generate instructions for each appropriate stage. Process instructions
2130 // in original program order.
2131 for (int StageNum
= i
; StageNum
>= 0; --StageNum
) {
2132 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
2133 BBE
= BB
->getFirstTerminator();
2134 BBI
!= BBE
; ++BBI
) {
2135 if (Schedule
.isScheduledAtStage(getSUnit(&*BBI
), (unsigned)StageNum
)) {
2138 MachineInstr
*NewMI
=
2139 cloneAndChangeInstr(&*BBI
, i
, (unsigned)StageNum
, Schedule
);
2140 updateInstruction(NewMI
, false, i
, (unsigned)StageNum
, Schedule
,
2142 NewBB
->push_back(NewMI
);
2143 InstrMap
[NewMI
] = &*BBI
;
2147 rewritePhiValues(NewBB
, i
, Schedule
, VRMap
, InstrMap
);
2149 dbgs() << "prolog:\n";
2154 PredBB
->replaceSuccessor(BB
, KernelBB
);
2156 // Check if we need to remove the branch from the preheader to the original
2157 // loop, and replace it with a branch to the new loop.
2158 unsigned numBranches
= TII
->removeBranch(*PreheaderBB
);
2160 SmallVector
<MachineOperand
, 0> Cond
;
2161 TII
->insertBranch(*PreheaderBB
, PrologBBs
[0], nullptr, Cond
, DebugLoc());
2165 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2166 /// that were started in either the prolog or the kernel. We create a basic
2167 /// block for each stage that needs to complete.
2168 void SwingSchedulerDAG::generateEpilog(SMSchedule
&Schedule
, unsigned LastStage
,
2169 MachineBasicBlock
*KernelBB
,
2171 MBBVectorTy
&EpilogBBs
,
2172 MBBVectorTy
&PrologBBs
) {
2173 // We need to change the branch from the kernel to the first epilog block, so
2174 // this call to analyze branch uses the kernel rather than the original BB.
2175 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
2176 SmallVector
<MachineOperand
, 4> Cond
;
2177 bool checkBranch
= TII
->analyzeBranch(*KernelBB
, TBB
, FBB
, Cond
);
2178 assert(!checkBranch
&& "generateEpilog must be able to analyze the branch");
2182 MachineBasicBlock::succ_iterator LoopExitI
= KernelBB
->succ_begin();
2183 if (*LoopExitI
== KernelBB
)
2185 assert(LoopExitI
!= KernelBB
->succ_end() && "Expecting a successor");
2186 MachineBasicBlock
*LoopExitBB
= *LoopExitI
;
2188 MachineBasicBlock
*PredBB
= KernelBB
;
2189 MachineBasicBlock
*EpilogStart
= LoopExitBB
;
2190 InstrMapTy InstrMap
;
2192 // Generate a basic block for each stage, not including the last stage,
2193 // which was generated for the kernel. Each basic block may contain
2194 // instructions from multiple stages/iterations.
2195 int EpilogStage
= LastStage
+ 1;
2196 for (unsigned i
= LastStage
; i
>= 1; --i
, ++EpilogStage
) {
2197 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock();
2198 EpilogBBs
.push_back(NewBB
);
2199 MF
.insert(BB
->getIterator(), NewBB
);
2201 PredBB
->replaceSuccessor(LoopExitBB
, NewBB
);
2202 NewBB
->addSuccessor(LoopExitBB
);
2204 if (EpilogStart
== LoopExitBB
)
2205 EpilogStart
= NewBB
;
2207 // Add instructions to the epilog depending on the current block.
2208 // Process instructions in original program order.
2209 for (unsigned StageNum
= i
; StageNum
<= LastStage
; ++StageNum
) {
2210 for (auto &BBI
: *BB
) {
2213 MachineInstr
*In
= &BBI
;
2214 if (Schedule
.isScheduledAtStage(getSUnit(In
), StageNum
)) {
2215 // Instructions with memoperands in the epilog are updated with
2216 // conservative values.
2217 MachineInstr
*NewMI
= cloneInstr(In
, UINT_MAX
, 0);
2218 updateInstruction(NewMI
, i
== 1, EpilogStage
, 0, Schedule
, VRMap
);
2219 NewBB
->push_back(NewMI
);
2220 InstrMap
[NewMI
] = In
;
2224 generateExistingPhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, Schedule
,
2225 VRMap
, InstrMap
, LastStage
, EpilogStage
, i
== 1);
2226 generatePhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, Schedule
, VRMap
,
2227 InstrMap
, LastStage
, EpilogStage
, i
== 1);
2231 dbgs() << "epilog:\n";
2236 // Fix any Phi nodes in the loop exit block.
2237 for (MachineInstr
&MI
: *LoopExitBB
) {
2240 for (unsigned i
= 2, e
= MI
.getNumOperands() + 1; i
!= e
; i
+= 2) {
2241 MachineOperand
&MO
= MI
.getOperand(i
);
2242 if (MO
.getMBB() == BB
)
2247 // Create a branch to the new epilog from the kernel.
2248 // Remove the original branch and add a new branch to the epilog.
2249 TII
->removeBranch(*KernelBB
);
2250 TII
->insertBranch(*KernelBB
, KernelBB
, EpilogStart
, Cond
, DebugLoc());
2251 // Add a branch to the loop exit.
2252 if (EpilogBBs
.size() > 0) {
2253 MachineBasicBlock
*LastEpilogBB
= EpilogBBs
.back();
2254 SmallVector
<MachineOperand
, 4> Cond1
;
2255 TII
->insertBranch(*LastEpilogBB
, LoopExitBB
, nullptr, Cond1
, DebugLoc());
2259 /// Replace all uses of FromReg that appear outside the specified
2260 /// basic block with ToReg.
2261 static void replaceRegUsesAfterLoop(unsigned FromReg
, unsigned ToReg
,
2262 MachineBasicBlock
*MBB
,
2263 MachineRegisterInfo
&MRI
,
2264 LiveIntervals
&LIS
) {
2265 for (MachineRegisterInfo::use_iterator I
= MRI
.use_begin(FromReg
),
2268 MachineOperand
&O
= *I
;
2270 if (O
.getParent()->getParent() != MBB
)
2273 if (!LIS
.hasInterval(ToReg
))
2274 LIS
.createEmptyInterval(ToReg
);
2277 /// Return true if the register has a use that occurs outside the
2279 static bool hasUseAfterLoop(unsigned Reg
, MachineBasicBlock
*BB
,
2280 MachineRegisterInfo
&MRI
) {
2281 for (MachineRegisterInfo::use_iterator I
= MRI
.use_begin(Reg
),
2284 if (I
->getParent()->getParent() != BB
)
2289 /// Generate Phis for the specific block in the generated pipelined code.
2290 /// This function looks at the Phis from the original code to guide the
2291 /// creation of new Phis.
2292 void SwingSchedulerDAG::generateExistingPhis(
2293 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
2294 MachineBasicBlock
*KernelBB
, SMSchedule
&Schedule
, ValueMapTy
*VRMap
,
2295 InstrMapTy
&InstrMap
, unsigned LastStageNum
, unsigned CurStageNum
,
2297 // Compute the stage number for the initial value of the Phi, which
2298 // comes from the prolog. The prolog to use depends on to which kernel/
2299 // epilog that we're adding the Phi.
2300 unsigned PrologStage
= 0;
2301 unsigned PrevStage
= 0;
2302 bool InKernel
= (LastStageNum
== CurStageNum
);
2304 PrologStage
= LastStageNum
- 1;
2305 PrevStage
= CurStageNum
;
2307 PrologStage
= LastStageNum
- (CurStageNum
- LastStageNum
);
2308 PrevStage
= LastStageNum
+ (CurStageNum
- LastStageNum
) - 1;
2311 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
2312 BBE
= BB
->getFirstNonPHI();
2313 BBI
!= BBE
; ++BBI
) {
2314 Register Def
= BBI
->getOperand(0).getReg();
2316 unsigned InitVal
= 0;
2317 unsigned LoopVal
= 0;
2318 getPhiRegs(*BBI
, BB
, InitVal
, LoopVal
);
2320 unsigned PhiOp1
= 0;
2321 // The Phi value from the loop body typically is defined in the loop, but
2322 // not always. So, we need to check if the value is defined in the loop.
2323 unsigned PhiOp2
= LoopVal
;
2324 if (VRMap
[LastStageNum
].count(LoopVal
))
2325 PhiOp2
= VRMap
[LastStageNum
][LoopVal
];
2327 int StageScheduled
= Schedule
.stageScheduled(getSUnit(&*BBI
));
2329 Schedule
.stageScheduled(getSUnit(MRI
.getVRegDef(LoopVal
)));
2330 unsigned NumStages
= Schedule
.getStagesForReg(Def
, CurStageNum
);
2331 if (NumStages
== 0) {
2332 // We don't need to generate a Phi anymore, but we need to rename any uses
2333 // of the Phi value.
2334 unsigned NewReg
= VRMap
[PrevStage
][LoopVal
];
2335 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, CurStageNum
, 0, &*BBI
,
2336 Def
, InitVal
, NewReg
);
2337 if (VRMap
[CurStageNum
].count(LoopVal
))
2338 VRMap
[CurStageNum
][Def
] = VRMap
[CurStageNum
][LoopVal
];
2340 // Adjust the number of Phis needed depending on the number of prologs left,
2341 // and the distance from where the Phi is first scheduled. The number of
2342 // Phis cannot exceed the number of prolog stages. Each stage can
2343 // potentially define two values.
2344 unsigned MaxPhis
= PrologStage
+ 2;
2345 if (!InKernel
&& (int)PrologStage
<= LoopValStage
)
2346 MaxPhis
= std::max((int)MaxPhis
- (int)LoopValStage
, 1);
2347 unsigned NumPhis
= std::min(NumStages
, MaxPhis
);
2349 unsigned NewReg
= 0;
2350 unsigned AccessStage
= (LoopValStage
!= -1) ? LoopValStage
: StageScheduled
;
2351 // In the epilog, we may need to look back one stage to get the correct
2352 // Phi name because the epilog and prolog blocks execute the same stage.
2353 // The correct name is from the previous block only when the Phi has
2354 // been completely scheduled prior to the epilog, and Phi value is not
2355 // needed in multiple stages.
2357 if (!InKernel
&& StageScheduled
>= LoopValStage
&& AccessStage
== 0 &&
2360 // Adjust the computations below when the phi and the loop definition
2361 // are scheduled in different stages.
2362 if (InKernel
&& LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
2363 StageDiff
= StageScheduled
- LoopValStage
;
2364 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
2365 // If the Phi hasn't been scheduled, then use the initial Phi operand
2366 // value. Otherwise, use the scheduled version of the instruction. This
2367 // is a little complicated when a Phi references another Phi.
2368 if (np
> PrologStage
|| StageScheduled
>= (int)LastStageNum
)
2370 // Check if the Phi has already been scheduled in a prolog stage.
2371 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
&&
2372 VRMap
[PrologStage
- StageDiff
- np
].count(LoopVal
) != 0)
2373 PhiOp1
= VRMap
[PrologStage
- StageDiff
- np
][LoopVal
];
2374 // Check if the Phi has already been scheduled, but the loop instruction
2375 // is either another Phi, or doesn't occur in the loop.
2376 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
) {
2377 // If the Phi references another Phi, we need to examine the other
2378 // Phi to get the correct value.
2380 MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
);
2382 while (InstOp1
&& InstOp1
->isPHI() && InstOp1
->getParent() == BB
) {
2383 int PhiStage
= Schedule
.stageScheduled(getSUnit(InstOp1
));
2384 if ((int)(PrologStage
- StageDiff
- np
) < PhiStage
+ Indirects
)
2385 PhiOp1
= getInitPhiReg(*InstOp1
, BB
);
2387 PhiOp1
= getLoopPhiReg(*InstOp1
, BB
);
2388 InstOp1
= MRI
.getVRegDef(PhiOp1
);
2389 int PhiOpStage
= Schedule
.stageScheduled(getSUnit(InstOp1
));
2390 int StageAdj
= (PhiOpStage
!= -1 ? PhiStage
- PhiOpStage
: 0);
2391 if (PhiOpStage
!= -1 && PrologStage
- StageAdj
>= Indirects
+ np
&&
2392 VRMap
[PrologStage
- StageAdj
- Indirects
- np
].count(PhiOp1
)) {
2393 PhiOp1
= VRMap
[PrologStage
- StageAdj
- Indirects
- np
][PhiOp1
];
2400 // If this references a generated Phi in the kernel, get the Phi operand
2401 // from the incoming block.
2402 if (MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
))
2403 if (InstOp1
->isPHI() && InstOp1
->getParent() == KernelBB
)
2404 PhiOp1
= getInitPhiReg(*InstOp1
, KernelBB
);
2406 MachineInstr
*PhiInst
= MRI
.getVRegDef(LoopVal
);
2407 bool LoopDefIsPhi
= PhiInst
&& PhiInst
->isPHI();
2408 // In the epilog, a map lookup is needed to get the value from the kernel,
2409 // or previous epilog block. How is does this depends on if the
2410 // instruction is scheduled in the previous block.
2412 int StageDiffAdj
= 0;
2413 if (LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
2414 StageDiffAdj
= StageScheduled
- LoopValStage
;
2415 // Use the loop value defined in the kernel, unless the kernel
2416 // contains the last definition of the Phi.
2417 if (np
== 0 && PrevStage
== LastStageNum
&&
2418 (StageScheduled
!= 0 || LoopValStage
!= 0) &&
2419 VRMap
[PrevStage
- StageDiffAdj
].count(LoopVal
))
2420 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
][LoopVal
];
2421 // Use the value defined by the Phi. We add one because we switch
2422 // from looking at the loop value to the Phi definition.
2423 else if (np
> 0 && PrevStage
== LastStageNum
&&
2424 VRMap
[PrevStage
- np
+ 1].count(Def
))
2425 PhiOp2
= VRMap
[PrevStage
- np
+ 1][Def
];
2426 // Use the loop value defined in the kernel.
2427 else if (static_cast<unsigned>(LoopValStage
) > PrologStage
+ 1 &&
2428 VRMap
[PrevStage
- StageDiffAdj
- np
].count(LoopVal
))
2429 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
- np
][LoopVal
];
2430 // Use the value defined by the Phi, unless we're generating the first
2431 // epilog and the Phi refers to a Phi in a different stage.
2432 else if (VRMap
[PrevStage
- np
].count(Def
) &&
2433 (!LoopDefIsPhi
|| (PrevStage
!= LastStageNum
) || (LoopValStage
== StageScheduled
)))
2434 PhiOp2
= VRMap
[PrevStage
- np
][Def
];
2437 // Check if we can reuse an existing Phi. This occurs when a Phi
2438 // references another Phi, and the other Phi is scheduled in an
2439 // earlier stage. We can try to reuse an existing Phi up until the last
2440 // stage of the current Phi.
2442 if (static_cast<int>(PrologStage
- np
) >= StageScheduled
) {
2443 int LVNumStages
= Schedule
.getStagesForPhi(LoopVal
);
2444 int StageDiff
= (StageScheduled
- LoopValStage
);
2445 LVNumStages
-= StageDiff
;
2446 // Make sure the loop value Phi has been processed already.
2447 if (LVNumStages
> (int)np
&& VRMap
[CurStageNum
].count(LoopVal
)) {
2449 unsigned ReuseStage
= CurStageNum
;
2450 if (Schedule
.isLoopCarried(this, *PhiInst
))
2451 ReuseStage
-= LVNumStages
;
2452 // Check if the Phi to reuse has been generated yet. If not, then
2453 // there is nothing to reuse.
2454 if (VRMap
[ReuseStage
- np
].count(LoopVal
)) {
2455 NewReg
= VRMap
[ReuseStage
- np
][LoopVal
];
2457 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, CurStageNum
, np
,
2458 &*BBI
, Def
, NewReg
);
2459 // Update the map with the new Phi name.
2460 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
2462 if (VRMap
[LastStageNum
- np
- 1].count(LoopVal
))
2463 PhiOp2
= VRMap
[LastStageNum
- np
- 1][LoopVal
];
2465 if (IsLast
&& np
== NumPhis
- 1)
2466 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
2471 if (InKernel
&& StageDiff
> 0 &&
2472 VRMap
[CurStageNum
- StageDiff
- np
].count(LoopVal
))
2473 PhiOp2
= VRMap
[CurStageNum
- StageDiff
- np
][LoopVal
];
2476 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
2477 NewReg
= MRI
.createVirtualRegister(RC
);
2479 MachineInstrBuilder NewPhi
=
2480 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
2481 TII
->get(TargetOpcode::PHI
), NewReg
);
2482 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
2483 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
2485 InstrMap
[NewPhi
] = &*BBI
;
2487 // We define the Phis after creating the new pipelined code, so
2488 // we need to rename the Phi values in scheduled instructions.
2490 unsigned PrevReg
= 0;
2491 if (InKernel
&& VRMap
[PrevStage
- np
].count(LoopVal
))
2492 PrevReg
= VRMap
[PrevStage
- np
][LoopVal
];
2493 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, CurStageNum
, np
, &*BBI
,
2494 Def
, NewReg
, PrevReg
);
2495 // If the Phi has been scheduled, use the new name for rewriting.
2496 if (VRMap
[CurStageNum
- np
].count(Def
)) {
2497 unsigned R
= VRMap
[CurStageNum
- np
][Def
];
2498 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, CurStageNum
, np
, &*BBI
,
2502 // Check if we need to rename any uses that occurs after the loop. The
2503 // register to replace depends on whether the Phi is scheduled in the
2505 if (IsLast
&& np
== NumPhis
- 1)
2506 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
2508 // In the kernel, a dependent Phi uses the value from this Phi.
2512 // Update the map with the new Phi name.
2513 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
2516 while (NumPhis
++ < NumStages
) {
2517 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, CurStageNum
, NumPhis
,
2518 &*BBI
, Def
, NewReg
, 0);
2521 // Check if we need to rename a Phi that has been eliminated due to
2523 if (NumStages
== 0 && IsLast
&& VRMap
[CurStageNum
].count(LoopVal
))
2524 replaceRegUsesAfterLoop(Def
, VRMap
[CurStageNum
][LoopVal
], BB
, MRI
, LIS
);
2528 /// Generate Phis for the specified block in the generated pipelined code.
2529 /// These are new Phis needed because the definition is scheduled after the
2530 /// use in the pipelined sequence.
2531 void SwingSchedulerDAG::generatePhis(
2532 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
2533 MachineBasicBlock
*KernelBB
, SMSchedule
&Schedule
, ValueMapTy
*VRMap
,
2534 InstrMapTy
&InstrMap
, unsigned LastStageNum
, unsigned CurStageNum
,
2536 // Compute the stage number that contains the initial Phi value, and
2537 // the Phi from the previous stage.
2538 unsigned PrologStage
= 0;
2539 unsigned PrevStage
= 0;
2540 unsigned StageDiff
= CurStageNum
- LastStageNum
;
2541 bool InKernel
= (StageDiff
== 0);
2543 PrologStage
= LastStageNum
- 1;
2544 PrevStage
= CurStageNum
;
2546 PrologStage
= LastStageNum
- StageDiff
;
2547 PrevStage
= LastStageNum
+ StageDiff
- 1;
2550 for (MachineBasicBlock::iterator BBI
= BB
->getFirstNonPHI(),
2551 BBE
= BB
->instr_end();
2552 BBI
!= BBE
; ++BBI
) {
2553 for (unsigned i
= 0, e
= BBI
->getNumOperands(); i
!= e
; ++i
) {
2554 MachineOperand
&MO
= BBI
->getOperand(i
);
2555 if (!MO
.isReg() || !MO
.isDef() ||
2556 !Register::isVirtualRegister(MO
.getReg()))
2559 int StageScheduled
= Schedule
.stageScheduled(getSUnit(&*BBI
));
2560 assert(StageScheduled
!= -1 && "Expecting scheduled instruction.");
2561 Register Def
= MO
.getReg();
2562 unsigned NumPhis
= Schedule
.getStagesForReg(Def
, CurStageNum
);
2563 // An instruction scheduled in stage 0 and is used after the loop
2564 // requires a phi in the epilog for the last definition from either
2565 // the kernel or prolog.
2566 if (!InKernel
&& NumPhis
== 0 && StageScheduled
== 0 &&
2567 hasUseAfterLoop(Def
, BB
, MRI
))
2569 if (!InKernel
&& (unsigned)StageScheduled
> PrologStage
)
2572 unsigned PhiOp2
= VRMap
[PrevStage
][Def
];
2573 if (MachineInstr
*InstOp2
= MRI
.getVRegDef(PhiOp2
))
2574 if (InstOp2
->isPHI() && InstOp2
->getParent() == NewBB
)
2575 PhiOp2
= getLoopPhiReg(*InstOp2
, BB2
);
2576 // The number of Phis can't exceed the number of prolog stages. The
2577 // prolog stage number is zero based.
2578 if (NumPhis
> PrologStage
+ 1 - StageScheduled
)
2579 NumPhis
= PrologStage
+ 1 - StageScheduled
;
2580 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
2581 unsigned PhiOp1
= VRMap
[PrologStage
][Def
];
2582 if (np
<= PrologStage
)
2583 PhiOp1
= VRMap
[PrologStage
- np
][Def
];
2584 if (MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
)) {
2585 if (InstOp1
->isPHI() && InstOp1
->getParent() == KernelBB
)
2586 PhiOp1
= getInitPhiReg(*InstOp1
, KernelBB
);
2587 if (InstOp1
->isPHI() && InstOp1
->getParent() == NewBB
)
2588 PhiOp1
= getInitPhiReg(*InstOp1
, NewBB
);
2591 PhiOp2
= VRMap
[PrevStage
- np
][Def
];
2593 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
2594 Register NewReg
= MRI
.createVirtualRegister(RC
);
2596 MachineInstrBuilder NewPhi
=
2597 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
2598 TII
->get(TargetOpcode::PHI
), NewReg
);
2599 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
2600 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
2602 InstrMap
[NewPhi
] = &*BBI
;
2604 // Rewrite uses and update the map. The actions depend upon whether
2605 // we generating code for the kernel or epilog blocks.
2607 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, CurStageNum
, np
,
2608 &*BBI
, PhiOp1
, NewReg
);
2609 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, CurStageNum
, np
,
2610 &*BBI
, PhiOp2
, NewReg
);
2613 VRMap
[PrevStage
- np
- 1][Def
] = NewReg
;
2615 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
2616 if (np
== NumPhis
- 1)
2617 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, CurStageNum
, np
,
2618 &*BBI
, Def
, NewReg
);
2620 if (IsLast
&& np
== NumPhis
- 1)
2621 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
2627 /// Remove instructions that generate values with no uses.
2628 /// Typically, these are induction variable operations that generate values
2629 /// used in the loop itself. A dead instruction has a definition with
2630 /// no uses, or uses that occur in the original loop only.
2631 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock
*KernelBB
,
2632 MBBVectorTy
&EpilogBBs
) {
2633 // For each epilog block, check that the value defined by each instruction
2634 // is used. If not, delete it.
2635 for (MBBVectorTy::reverse_iterator MBB
= EpilogBBs
.rbegin(),
2636 MBE
= EpilogBBs
.rend();
2638 for (MachineBasicBlock::reverse_instr_iterator MI
= (*MBB
)->instr_rbegin(),
2639 ME
= (*MBB
)->instr_rend();
2641 // From DeadMachineInstructionElem. Don't delete inline assembly.
2642 if (MI
->isInlineAsm()) {
2646 bool SawStore
= false;
2647 // Check if it's safe to remove the instruction due to side effects.
2648 // We can, and want to, remove Phis here.
2649 if (!MI
->isSafeToMove(nullptr, SawStore
) && !MI
->isPHI()) {
2654 for (MachineInstr::mop_iterator MOI
= MI
->operands_begin(),
2655 MOE
= MI
->operands_end();
2656 MOI
!= MOE
; ++MOI
) {
2657 if (!MOI
->isReg() || !MOI
->isDef())
2659 Register reg
= MOI
->getReg();
2660 // Assume physical registers are used, unless they are marked dead.
2661 if (Register::isPhysicalRegister(reg
)) {
2662 used
= !MOI
->isDead();
2667 unsigned realUses
= 0;
2668 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(reg
),
2671 // Check if there are any uses that occur only in the original
2672 // loop. If so, that's not a real use.
2673 if (UI
->getParent()->getParent() != BB
) {
2684 LIS
.RemoveMachineInstrFromMaps(*MI
);
2685 MI
++->eraseFromParent();
2690 // In the kernel block, check if we can remove a Phi that generates a value
2691 // used in an instruction removed in the epilog block.
2692 for (MachineBasicBlock::iterator BBI
= KernelBB
->instr_begin(),
2693 BBE
= KernelBB
->getFirstNonPHI();
2695 MachineInstr
*MI
= &*BBI
;
2697 Register reg
= MI
->getOperand(0).getReg();
2698 if (MRI
.use_begin(reg
) == MRI
.use_end()) {
2699 LIS
.RemoveMachineInstrFromMaps(*MI
);
2700 MI
->eraseFromParent();
2705 /// For loop carried definitions, we split the lifetime of a virtual register
2706 /// that has uses past the definition in the next iteration. A copy with a new
2707 /// virtual register is inserted before the definition, which helps with
2708 /// generating a better register assignment.
2710 /// v1 = phi(a, v2) v1 = phi(a, v2)
2711 /// v2 = phi(b, v3) v2 = phi(b, v3)
2712 /// v3 = .. v4 = copy v1
2715 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock
*KernelBB
,
2716 MBBVectorTy
&EpilogBBs
,
2717 SMSchedule
&Schedule
) {
2718 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
2719 for (auto &PHI
: KernelBB
->phis()) {
2720 Register Def
= PHI
.getOperand(0).getReg();
2721 // Check for any Phi definition that used as an operand of another Phi
2722 // in the same block.
2723 for (MachineRegisterInfo::use_instr_iterator I
= MRI
.use_instr_begin(Def
),
2724 E
= MRI
.use_instr_end();
2726 if (I
->isPHI() && I
->getParent() == KernelBB
) {
2727 // Get the loop carried definition.
2728 unsigned LCDef
= getLoopPhiReg(PHI
, KernelBB
);
2731 MachineInstr
*MI
= MRI
.getVRegDef(LCDef
);
2732 if (!MI
|| MI
->getParent() != KernelBB
|| MI
->isPHI())
2734 // Search through the rest of the block looking for uses of the Phi
2735 // definition. If one occurs, then split the lifetime.
2736 unsigned SplitReg
= 0;
2737 for (auto &BBJ
: make_range(MachineBasicBlock::instr_iterator(MI
),
2738 KernelBB
->instr_end()))
2739 if (BBJ
.readsRegister(Def
)) {
2740 // We split the lifetime when we find the first use.
2741 if (SplitReg
== 0) {
2742 SplitReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Def
));
2743 BuildMI(*KernelBB
, MI
, MI
->getDebugLoc(),
2744 TII
->get(TargetOpcode::COPY
), SplitReg
)
2747 BBJ
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
2751 // Search through each of the epilog blocks for any uses to be renamed.
2752 for (auto &Epilog
: EpilogBBs
)
2753 for (auto &I
: *Epilog
)
2754 if (I
.readsRegister(Def
))
2755 I
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
2762 /// Remove the incoming block from the Phis in a basic block.
2763 static void removePhis(MachineBasicBlock
*BB
, MachineBasicBlock
*Incoming
) {
2764 for (MachineInstr
&MI
: *BB
) {
2767 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2)
2768 if (MI
.getOperand(i
+ 1).getMBB() == Incoming
) {
2769 MI
.RemoveOperand(i
+ 1);
2770 MI
.RemoveOperand(i
);
2776 /// Create branches from each prolog basic block to the appropriate epilog
2777 /// block. These edges are needed if the loop ends before reaching the
2779 void SwingSchedulerDAG::addBranches(MachineBasicBlock
&PreheaderBB
,
2780 MBBVectorTy
&PrologBBs
,
2781 MachineBasicBlock
*KernelBB
,
2782 MBBVectorTy
&EpilogBBs
,
2783 SMSchedule
&Schedule
, ValueMapTy
*VRMap
) {
2784 assert(PrologBBs
.size() == EpilogBBs
.size() && "Prolog/Epilog mismatch");
2785 MachineInstr
*IndVar
= Pass
.LI
.LoopInductionVar
;
2786 MachineInstr
*Cmp
= Pass
.LI
.LoopCompare
;
2787 MachineBasicBlock
*LastPro
= KernelBB
;
2788 MachineBasicBlock
*LastEpi
= KernelBB
;
2790 // Start from the blocks connected to the kernel and work "out"
2791 // to the first prolog and the last epilog blocks.
2792 SmallVector
<MachineInstr
*, 4> PrevInsts
;
2793 unsigned MaxIter
= PrologBBs
.size() - 1;
2794 unsigned LC
= UINT_MAX
;
2795 unsigned LCMin
= UINT_MAX
;
2796 for (unsigned i
= 0, j
= MaxIter
; i
<= MaxIter
; ++i
, --j
) {
2797 // Add branches to the prolog that go to the corresponding
2798 // epilog, and the fall-thru prolog/kernel block.
2799 MachineBasicBlock
*Prolog
= PrologBBs
[j
];
2800 MachineBasicBlock
*Epilog
= EpilogBBs
[i
];
2801 // We've executed one iteration, so decrement the loop count and check for
2803 SmallVector
<MachineOperand
, 4> Cond
;
2804 // Check if the LOOP0 has already been removed. If so, then there is no need
2805 // to reduce the trip count.
2807 LC
= TII
->reduceLoopCount(*Prolog
, PreheaderBB
, IndVar
, *Cmp
, Cond
,
2808 PrevInsts
, j
, MaxIter
);
2810 // Record the value of the first trip count, which is used to determine if
2811 // branches and blocks can be removed for constant trip counts.
2812 if (LCMin
== UINT_MAX
)
2815 unsigned numAdded
= 0;
2816 if (Register::isVirtualRegister(LC
)) {
2817 Prolog
->addSuccessor(Epilog
);
2818 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, LastPro
, Cond
, DebugLoc());
2819 } else if (j
>= LCMin
) {
2820 Prolog
->addSuccessor(Epilog
);
2821 Prolog
->removeSuccessor(LastPro
);
2822 LastEpi
->removeSuccessor(Epilog
);
2823 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, nullptr, Cond
, DebugLoc());
2824 removePhis(Epilog
, LastEpi
);
2825 // Remove the blocks that are no longer referenced.
2826 if (LastPro
!= LastEpi
) {
2828 LastEpi
->eraseFromParent();
2831 LastPro
->eraseFromParent();
2833 numAdded
= TII
->insertBranch(*Prolog
, LastPro
, nullptr, Cond
, DebugLoc());
2834 removePhis(Epilog
, Prolog
);
2838 for (MachineBasicBlock::reverse_instr_iterator I
= Prolog
->instr_rbegin(),
2839 E
= Prolog
->instr_rend();
2840 I
!= E
&& numAdded
> 0; ++I
, --numAdded
)
2841 updateInstruction(&*I
, false, j
, 0, Schedule
, VRMap
);
2845 /// Return true if we can compute the amount the instruction changes
2846 /// during each iteration. Set Delta to the amount of the change.
2847 bool SwingSchedulerDAG::computeDelta(MachineInstr
&MI
, unsigned &Delta
) {
2848 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
2849 const MachineOperand
*BaseOp
;
2851 if (!TII
->getMemOperandWithOffset(MI
, BaseOp
, Offset
, TRI
))
2854 if (!BaseOp
->isReg())
2857 Register BaseReg
= BaseOp
->getReg();
2859 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2860 // Check if there is a Phi. If so, get the definition in the loop.
2861 MachineInstr
*BaseDef
= MRI
.getVRegDef(BaseReg
);
2862 if (BaseDef
&& BaseDef
->isPHI()) {
2863 BaseReg
= getLoopPhiReg(*BaseDef
, MI
.getParent());
2864 BaseDef
= MRI
.getVRegDef(BaseReg
);
2870 if (!TII
->getIncrementValue(*BaseDef
, D
) && D
>= 0)
2877 /// Update the memory operand with a new offset when the pipeliner
2878 /// generates a new copy of the instruction that refers to a
2879 /// different memory location.
2880 void SwingSchedulerDAG::updateMemOperands(MachineInstr
&NewMI
,
2881 MachineInstr
&OldMI
, unsigned Num
) {
2884 // If the instruction has memory operands, then adjust the offset
2885 // when the instruction appears in different stages.
2886 if (NewMI
.memoperands_empty())
2888 SmallVector
<MachineMemOperand
*, 2> NewMMOs
;
2889 for (MachineMemOperand
*MMO
: NewMI
.memoperands()) {
2890 // TODO: Figure out whether isAtomic is really necessary (see D57601).
2891 if (MMO
->isVolatile() || MMO
->isAtomic() ||
2892 (MMO
->isInvariant() && MMO
->isDereferenceable()) ||
2893 (!MMO
->getValue())) {
2894 NewMMOs
.push_back(MMO
);
2898 if (Num
!= UINT_MAX
&& computeDelta(OldMI
, Delta
)) {
2899 int64_t AdjOffset
= Delta
* Num
;
2901 MF
.getMachineMemOperand(MMO
, AdjOffset
, MMO
->getSize()));
2904 MF
.getMachineMemOperand(MMO
, 0, MemoryLocation::UnknownSize
));
2907 NewMI
.setMemRefs(MF
, NewMMOs
);
2910 /// Clone the instruction for the new pipelined loop and update the
2911 /// memory operands, if needed.
2912 MachineInstr
*SwingSchedulerDAG::cloneInstr(MachineInstr
*OldMI
,
2913 unsigned CurStageNum
,
2914 unsigned InstStageNum
) {
2915 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
2916 // Check for tied operands in inline asm instructions. This should be handled
2917 // elsewhere, but I'm not sure of the best solution.
2918 if (OldMI
->isInlineAsm())
2919 for (unsigned i
= 0, e
= OldMI
->getNumOperands(); i
!= e
; ++i
) {
2920 const auto &MO
= OldMI
->getOperand(i
);
2921 if (MO
.isReg() && MO
.isUse())
2924 if (OldMI
->isRegTiedToUseOperand(i
, &UseIdx
))
2925 NewMI
->tieOperands(i
, UseIdx
);
2927 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
2931 /// Clone the instruction for the new pipelined loop. If needed, this
2932 /// function updates the instruction using the values saved in the
2933 /// InstrChanges structure.
2934 MachineInstr
*SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr
*OldMI
,
2935 unsigned CurStageNum
,
2936 unsigned InstStageNum
,
2937 SMSchedule
&Schedule
) {
2938 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
2939 DenseMap
<SUnit
*, std::pair
<unsigned, int64_t>>::iterator It
=
2940 InstrChanges
.find(getSUnit(OldMI
));
2941 if (It
!= InstrChanges
.end()) {
2942 std::pair
<unsigned, int64_t> RegAndOffset
= It
->second
;
2943 unsigned BasePos
, OffsetPos
;
2944 if (!TII
->getBaseAndOffsetPosition(*OldMI
, BasePos
, OffsetPos
))
2946 int64_t NewOffset
= OldMI
->getOperand(OffsetPos
).getImm();
2947 MachineInstr
*LoopDef
= findDefInLoop(RegAndOffset
.first
);
2948 if (Schedule
.stageScheduled(getSUnit(LoopDef
)) > (signed)InstStageNum
)
2949 NewOffset
+= RegAndOffset
.second
* (CurStageNum
- InstStageNum
);
2950 NewMI
->getOperand(OffsetPos
).setImm(NewOffset
);
2952 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
2956 /// Update the machine instruction with new virtual registers. This
2957 /// function may change the defintions and/or uses.
2958 void SwingSchedulerDAG::updateInstruction(MachineInstr
*NewMI
, bool LastDef
,
2959 unsigned CurStageNum
,
2960 unsigned InstrStageNum
,
2961 SMSchedule
&Schedule
,
2962 ValueMapTy
*VRMap
) {
2963 for (unsigned i
= 0, e
= NewMI
->getNumOperands(); i
!= e
; ++i
) {
2964 MachineOperand
&MO
= NewMI
->getOperand(i
);
2965 if (!MO
.isReg() || !Register::isVirtualRegister(MO
.getReg()))
2967 Register reg
= MO
.getReg();
2969 // Create a new virtual register for the definition.
2970 const TargetRegisterClass
*RC
= MRI
.getRegClass(reg
);
2971 Register NewReg
= MRI
.createVirtualRegister(RC
);
2973 VRMap
[CurStageNum
][reg
] = NewReg
;
2975 replaceRegUsesAfterLoop(reg
, NewReg
, BB
, MRI
, LIS
);
2976 } else if (MO
.isUse()) {
2977 MachineInstr
*Def
= MRI
.getVRegDef(reg
);
2978 // Compute the stage that contains the last definition for instruction.
2979 int DefStageNum
= Schedule
.stageScheduled(getSUnit(Def
));
2980 unsigned StageNum
= CurStageNum
;
2981 if (DefStageNum
!= -1 && (int)InstrStageNum
> DefStageNum
) {
2982 // Compute the difference in stages between the defintion and the use.
2983 unsigned StageDiff
= (InstrStageNum
- DefStageNum
);
2984 // Make an adjustment to get the last definition.
2985 StageNum
-= StageDiff
;
2987 if (VRMap
[StageNum
].count(reg
))
2988 MO
.setReg(VRMap
[StageNum
][reg
]);
2993 /// Return the instruction in the loop that defines the register.
2994 /// If the definition is a Phi, then follow the Phi operand to
2995 /// the instruction in the loop.
2996 MachineInstr
*SwingSchedulerDAG::findDefInLoop(unsigned Reg
) {
2997 SmallPtrSet
<MachineInstr
*, 8> Visited
;
2998 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
2999 while (Def
->isPHI()) {
3000 if (!Visited
.insert(Def
).second
)
3002 for (unsigned i
= 1, e
= Def
->getNumOperands(); i
< e
; i
+= 2)
3003 if (Def
->getOperand(i
+ 1).getMBB() == BB
) {
3004 Def
= MRI
.getVRegDef(Def
->getOperand(i
).getReg());
3011 /// Return the new name for the value from the previous stage.
3012 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum
, unsigned PhiStage
,
3013 unsigned LoopVal
, unsigned LoopStage
,
3015 MachineBasicBlock
*BB
) {
3016 unsigned PrevVal
= 0;
3017 if (StageNum
> PhiStage
) {
3018 MachineInstr
*LoopInst
= MRI
.getVRegDef(LoopVal
);
3019 if (PhiStage
== LoopStage
&& VRMap
[StageNum
- 1].count(LoopVal
))
3020 // The name is defined in the previous stage.
3021 PrevVal
= VRMap
[StageNum
- 1][LoopVal
];
3022 else if (VRMap
[StageNum
].count(LoopVal
))
3023 // The previous name is defined in the current stage when the instruction
3024 // order is swapped.
3025 PrevVal
= VRMap
[StageNum
][LoopVal
];
3026 else if (!LoopInst
->isPHI() || LoopInst
->getParent() != BB
)
3027 // The loop value hasn't yet been scheduled.
3029 else if (StageNum
== PhiStage
+ 1)
3030 // The loop value is another phi, which has not been scheduled.
3031 PrevVal
= getInitPhiReg(*LoopInst
, BB
);
3032 else if (StageNum
> PhiStage
+ 1 && LoopInst
->getParent() == BB
)
3033 // The loop value is another phi, which has been scheduled.
3035 getPrevMapVal(StageNum
- 1, PhiStage
, getLoopPhiReg(*LoopInst
, BB
),
3036 LoopStage
, VRMap
, BB
);
3041 /// Rewrite the Phi values in the specified block to use the mappings
3042 /// from the initial operand. Once the Phi is scheduled, we switch
3043 /// to using the loop value instead of the Phi value, so those names
3044 /// do not need to be rewritten.
3045 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock
*NewBB
,
3047 SMSchedule
&Schedule
,
3049 InstrMapTy
&InstrMap
) {
3050 for (auto &PHI
: BB
->phis()) {
3051 unsigned InitVal
= 0;
3052 unsigned LoopVal
= 0;
3053 getPhiRegs(PHI
, BB
, InitVal
, LoopVal
);
3054 Register PhiDef
= PHI
.getOperand(0).getReg();
3057 (unsigned)Schedule
.stageScheduled(getSUnit(MRI
.getVRegDef(PhiDef
)));
3058 unsigned LoopStage
=
3059 (unsigned)Schedule
.stageScheduled(getSUnit(MRI
.getVRegDef(LoopVal
)));
3060 unsigned NumPhis
= Schedule
.getStagesForPhi(PhiDef
);
3061 if (NumPhis
> StageNum
)
3063 for (unsigned np
= 0; np
<= NumPhis
; ++np
) {
3065 getPrevMapVal(StageNum
- np
, PhiStage
, LoopVal
, LoopStage
, VRMap
, BB
);
3068 rewriteScheduledInstr(NewBB
, Schedule
, InstrMap
, StageNum
- np
, np
, &PHI
,
3074 /// Rewrite a previously scheduled instruction to use the register value
3075 /// from the new instruction. Make sure the instruction occurs in the
3076 /// basic block, and we don't change the uses in the new instruction.
3077 void SwingSchedulerDAG::rewriteScheduledInstr(
3078 MachineBasicBlock
*BB
, SMSchedule
&Schedule
, InstrMapTy
&InstrMap
,
3079 unsigned CurStageNum
, unsigned PhiNum
, MachineInstr
*Phi
, unsigned OldReg
,
3080 unsigned NewReg
, unsigned PrevReg
) {
3081 bool InProlog
= (CurStageNum
< Schedule
.getMaxStageCount());
3082 int StagePhi
= Schedule
.stageScheduled(getSUnit(Phi
)) + PhiNum
;
3083 // Rewrite uses that have been scheduled already to use the new
3085 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(OldReg
),
3088 MachineOperand
&UseOp
= *UI
;
3089 MachineInstr
*UseMI
= UseOp
.getParent();
3091 if (UseMI
->getParent() != BB
)
3093 if (UseMI
->isPHI()) {
3094 if (!Phi
->isPHI() && UseMI
->getOperand(0).getReg() == NewReg
)
3096 if (getLoopPhiReg(*UseMI
, BB
) != OldReg
)
3099 InstrMapTy::iterator OrigInstr
= InstrMap
.find(UseMI
);
3100 assert(OrigInstr
!= InstrMap
.end() && "Instruction not scheduled.");
3101 SUnit
*OrigMISU
= getSUnit(OrigInstr
->second
);
3102 int StageSched
= Schedule
.stageScheduled(OrigMISU
);
3103 int CycleSched
= Schedule
.cycleScheduled(OrigMISU
);
3104 unsigned ReplaceReg
= 0;
3105 // This is the stage for the scheduled instruction.
3106 if (StagePhi
== StageSched
&& Phi
->isPHI()) {
3107 int CyclePhi
= Schedule
.cycleScheduled(getSUnit(Phi
));
3108 if (PrevReg
&& InProlog
)
3109 ReplaceReg
= PrevReg
;
3110 else if (PrevReg
&& !Schedule
.isLoopCarried(this, *Phi
) &&
3111 (CyclePhi
<= CycleSched
|| OrigMISU
->getInstr()->isPHI()))
3112 ReplaceReg
= PrevReg
;
3114 ReplaceReg
= NewReg
;
3116 // The scheduled instruction occurs before the scheduled Phi, and the
3117 // Phi is not loop carried.
3118 if (!InProlog
&& StagePhi
+ 1 == StageSched
&&
3119 !Schedule
.isLoopCarried(this, *Phi
))
3120 ReplaceReg
= NewReg
;
3121 if (StagePhi
> StageSched
&& Phi
->isPHI())
3122 ReplaceReg
= NewReg
;
3123 if (!InProlog
&& !Phi
->isPHI() && StagePhi
< StageSched
)
3124 ReplaceReg
= NewReg
;
3126 MRI
.constrainRegClass(ReplaceReg
, MRI
.getRegClass(OldReg
));
3127 UseOp
.setReg(ReplaceReg
);
3132 /// Check if we can change the instruction to use an offset value from the
3133 /// previous iteration. If so, return true and set the base and offset values
3134 /// so that we can rewrite the load, if necessary.
3135 /// v1 = Phi(v0, v3)
3137 /// v3 = post_store v1, 4, x
3138 /// This function enables the load to be rewritten as v2 = load v3, 4.
3139 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr
*MI
,
3141 unsigned &OffsetPos
,
3144 // Get the load instruction.
3145 if (TII
->isPostIncrement(*MI
))
3147 unsigned BasePosLd
, OffsetPosLd
;
3148 if (!TII
->getBaseAndOffsetPosition(*MI
, BasePosLd
, OffsetPosLd
))
3150 Register BaseReg
= MI
->getOperand(BasePosLd
).getReg();
3152 // Look for the Phi instruction.
3153 MachineRegisterInfo
&MRI
= MI
->getMF()->getRegInfo();
3154 MachineInstr
*Phi
= MRI
.getVRegDef(BaseReg
);
3155 if (!Phi
|| !Phi
->isPHI())
3157 // Get the register defined in the loop block.
3158 unsigned PrevReg
= getLoopPhiReg(*Phi
, MI
->getParent());
3162 // Check for the post-increment load/store instruction.
3163 MachineInstr
*PrevDef
= MRI
.getVRegDef(PrevReg
);
3164 if (!PrevDef
|| PrevDef
== MI
)
3167 if (!TII
->isPostIncrement(*PrevDef
))
3170 unsigned BasePos1
= 0, OffsetPos1
= 0;
3171 if (!TII
->getBaseAndOffsetPosition(*PrevDef
, BasePos1
, OffsetPos1
))
3174 // Make sure that the instructions do not access the same memory location in
3175 // the next iteration.
3176 int64_t LoadOffset
= MI
->getOperand(OffsetPosLd
).getImm();
3177 int64_t StoreOffset
= PrevDef
->getOperand(OffsetPos1
).getImm();
3178 MachineInstr
*NewMI
= MF
.CloneMachineInstr(MI
);
3179 NewMI
->getOperand(OffsetPosLd
).setImm(LoadOffset
+ StoreOffset
);
3180 bool Disjoint
= TII
->areMemAccessesTriviallyDisjoint(*NewMI
, *PrevDef
);
3181 MF
.DeleteMachineInstr(NewMI
);
3185 // Set the return value once we determine that we return true.
3186 BasePos
= BasePosLd
;
3187 OffsetPos
= OffsetPosLd
;
3189 Offset
= StoreOffset
;
3193 /// Apply changes to the instruction if needed. The changes are need
3194 /// to improve the scheduling and depend up on the final schedule.
3195 void SwingSchedulerDAG::applyInstrChange(MachineInstr
*MI
,
3196 SMSchedule
&Schedule
) {
3197 SUnit
*SU
= getSUnit(MI
);
3198 DenseMap
<SUnit
*, std::pair
<unsigned, int64_t>>::iterator It
=
3199 InstrChanges
.find(SU
);
3200 if (It
!= InstrChanges
.end()) {
3201 std::pair
<unsigned, int64_t> RegAndOffset
= It
->second
;
3202 unsigned BasePos
, OffsetPos
;
3203 if (!TII
->getBaseAndOffsetPosition(*MI
, BasePos
, OffsetPos
))
3205 Register BaseReg
= MI
->getOperand(BasePos
).getReg();
3206 MachineInstr
*LoopDef
= findDefInLoop(BaseReg
);
3207 int DefStageNum
= Schedule
.stageScheduled(getSUnit(LoopDef
));
3208 int DefCycleNum
= Schedule
.cycleScheduled(getSUnit(LoopDef
));
3209 int BaseStageNum
= Schedule
.stageScheduled(SU
);
3210 int BaseCycleNum
= Schedule
.cycleScheduled(SU
);
3211 if (BaseStageNum
< DefStageNum
) {
3212 MachineInstr
*NewMI
= MF
.CloneMachineInstr(MI
);
3213 int OffsetDiff
= DefStageNum
- BaseStageNum
;
3214 if (DefCycleNum
< BaseCycleNum
) {
3215 NewMI
->getOperand(BasePos
).setReg(RegAndOffset
.first
);
3220 MI
->getOperand(OffsetPos
).getImm() + RegAndOffset
.second
* OffsetDiff
;
3221 NewMI
->getOperand(OffsetPos
).setImm(NewOffset
);
3222 SU
->setInstr(NewMI
);
3223 MISUnitMap
[NewMI
] = SU
;
3224 NewMIs
.insert(NewMI
);
3229 /// Return true for an order or output dependence that is loop carried
3230 /// potentially. A dependence is loop carried if the destination defines a valu
3231 /// that may be used or defined by the source in a subsequent iteration.
3232 bool SwingSchedulerDAG::isLoopCarriedDep(SUnit
*Source
, const SDep
&Dep
,
3234 if ((Dep
.getKind() != SDep::Order
&& Dep
.getKind() != SDep::Output
) ||
3238 if (!SwpPruneLoopCarried
)
3241 if (Dep
.getKind() == SDep::Output
)
3244 MachineInstr
*SI
= Source
->getInstr();
3245 MachineInstr
*DI
= Dep
.getSUnit()->getInstr();
3248 assert(SI
!= nullptr && DI
!= nullptr && "Expecting SUnit with an MI.");
3250 // Assume ordered loads and stores may have a loop carried dependence.
3251 if (SI
->hasUnmodeledSideEffects() || DI
->hasUnmodeledSideEffects() ||
3252 SI
->mayRaiseFPException() || DI
->mayRaiseFPException() ||
3253 SI
->hasOrderedMemoryRef() || DI
->hasOrderedMemoryRef())
3256 // Only chain dependences between a load and store can be loop carried.
3257 if (!DI
->mayStore() || !SI
->mayLoad())
3260 unsigned DeltaS
, DeltaD
;
3261 if (!computeDelta(*SI
, DeltaS
) || !computeDelta(*DI
, DeltaD
))
3264 const MachineOperand
*BaseOpS
, *BaseOpD
;
3265 int64_t OffsetS
, OffsetD
;
3266 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
3267 if (!TII
->getMemOperandWithOffset(*SI
, BaseOpS
, OffsetS
, TRI
) ||
3268 !TII
->getMemOperandWithOffset(*DI
, BaseOpD
, OffsetD
, TRI
))
3271 if (!BaseOpS
->isIdenticalTo(*BaseOpD
))
3274 // Check that the base register is incremented by a constant value for each
3276 MachineInstr
*Def
= MRI
.getVRegDef(BaseOpS
->getReg());
3277 if (!Def
|| !Def
->isPHI())
3279 unsigned InitVal
= 0;
3280 unsigned LoopVal
= 0;
3281 getPhiRegs(*Def
, BB
, InitVal
, LoopVal
);
3282 MachineInstr
*LoopDef
= MRI
.getVRegDef(LoopVal
);
3284 if (!LoopDef
|| !TII
->getIncrementValue(*LoopDef
, D
))
3287 uint64_t AccessSizeS
= (*SI
->memoperands_begin())->getSize();
3288 uint64_t AccessSizeD
= (*DI
->memoperands_begin())->getSize();
3290 // This is the main test, which checks the offset values and the loop
3291 // increment value to determine if the accesses may be loop carried.
3292 if (AccessSizeS
== MemoryLocation::UnknownSize
||
3293 AccessSizeD
== MemoryLocation::UnknownSize
)
3296 if (DeltaS
!= DeltaD
|| DeltaS
< AccessSizeS
|| DeltaD
< AccessSizeD
)
3299 return (OffsetS
+ (int64_t)AccessSizeS
< OffsetD
+ (int64_t)AccessSizeD
);
3302 void SwingSchedulerDAG::postprocessDAG() {
3303 for (auto &M
: Mutations
)
3307 /// Try to schedule the node at the specified StartCycle and continue
3308 /// until the node is schedule or the EndCycle is reached. This function
3309 /// returns true if the node is scheduled. This routine may search either
3310 /// forward or backward for a place to insert the instruction based upon
3311 /// the relative values of StartCycle and EndCycle.
3312 bool SMSchedule::insert(SUnit
*SU
, int StartCycle
, int EndCycle
, int II
) {
3313 bool forward
= true;
3315 dbgs() << "Trying to insert node between " << StartCycle
<< " and "
3316 << EndCycle
<< " II: " << II
<< "\n";
3318 if (StartCycle
> EndCycle
)
3321 // The terminating condition depends on the direction.
3322 int termCycle
= forward
? EndCycle
+ 1 : EndCycle
- 1;
3323 for (int curCycle
= StartCycle
; curCycle
!= termCycle
;
3324 forward
? ++curCycle
: --curCycle
) {
3326 // Add the already scheduled instructions at the specified cycle to the
3328 ProcItinResources
.clearResources();
3329 for (int checkCycle
= FirstCycle
+ ((curCycle
- FirstCycle
) % II
);
3330 checkCycle
<= LastCycle
; checkCycle
+= II
) {
3331 std::deque
<SUnit
*> &cycleInstrs
= ScheduledInstrs
[checkCycle
];
3333 for (std::deque
<SUnit
*>::iterator I
= cycleInstrs
.begin(),
3334 E
= cycleInstrs
.end();
3336 if (ST
.getInstrInfo()->isZeroCost((*I
)->getInstr()->getOpcode()))
3338 assert(ProcItinResources
.canReserveResources(*(*I
)->getInstr()) &&
3339 "These instructions have already been scheduled.");
3340 ProcItinResources
.reserveResources(*(*I
)->getInstr());
3343 if (ST
.getInstrInfo()->isZeroCost(SU
->getInstr()->getOpcode()) ||
3344 ProcItinResources
.canReserveResources(*SU
->getInstr())) {
3346 dbgs() << "\tinsert at cycle " << curCycle
<< " ";
3347 SU
->getInstr()->dump();
3350 ScheduledInstrs
[curCycle
].push_back(SU
);
3351 InstrToCycle
.insert(std::make_pair(SU
, curCycle
));
3352 if (curCycle
> LastCycle
)
3353 LastCycle
= curCycle
;
3354 if (curCycle
< FirstCycle
)
3355 FirstCycle
= curCycle
;
3359 dbgs() << "\tfailed to insert at cycle " << curCycle
<< " ";
3360 SU
->getInstr()->dump();
3366 // Return the cycle of the earliest scheduled instruction in the chain.
3367 int SMSchedule::earliestCycleInChain(const SDep
&Dep
) {
3368 SmallPtrSet
<SUnit
*, 8> Visited
;
3369 SmallVector
<SDep
, 8> Worklist
;
3370 Worklist
.push_back(Dep
);
3371 int EarlyCycle
= INT_MAX
;
3372 while (!Worklist
.empty()) {
3373 const SDep
&Cur
= Worklist
.pop_back_val();
3374 SUnit
*PrevSU
= Cur
.getSUnit();
3375 if (Visited
.count(PrevSU
))
3377 std::map
<SUnit
*, int>::const_iterator it
= InstrToCycle
.find(PrevSU
);
3378 if (it
== InstrToCycle
.end())
3380 EarlyCycle
= std::min(EarlyCycle
, it
->second
);
3381 for (const auto &PI
: PrevSU
->Preds
)
3382 if (PI
.getKind() == SDep::Order
|| Dep
.getKind() == SDep::Output
)
3383 Worklist
.push_back(PI
);
3384 Visited
.insert(PrevSU
);
3389 // Return the cycle of the latest scheduled instruction in the chain.
3390 int SMSchedule::latestCycleInChain(const SDep
&Dep
) {
3391 SmallPtrSet
<SUnit
*, 8> Visited
;
3392 SmallVector
<SDep
, 8> Worklist
;
3393 Worklist
.push_back(Dep
);
3394 int LateCycle
= INT_MIN
;
3395 while (!Worklist
.empty()) {
3396 const SDep
&Cur
= Worklist
.pop_back_val();
3397 SUnit
*SuccSU
= Cur
.getSUnit();
3398 if (Visited
.count(SuccSU
))
3400 std::map
<SUnit
*, int>::const_iterator it
= InstrToCycle
.find(SuccSU
);
3401 if (it
== InstrToCycle
.end())
3403 LateCycle
= std::max(LateCycle
, it
->second
);
3404 for (const auto &SI
: SuccSU
->Succs
)
3405 if (SI
.getKind() == SDep::Order
|| Dep
.getKind() == SDep::Output
)
3406 Worklist
.push_back(SI
);
3407 Visited
.insert(SuccSU
);
3412 /// If an instruction has a use that spans multiple iterations, then
3413 /// return true. These instructions are characterized by having a back-ege
3414 /// to a Phi, which contains a reference to another Phi.
3415 static SUnit
*multipleIterations(SUnit
*SU
, SwingSchedulerDAG
*DAG
) {
3416 for (auto &P
: SU
->Preds
)
3417 if (DAG
->isBackedge(SU
, P
) && P
.getSUnit()->getInstr()->isPHI())
3418 for (auto &S
: P
.getSUnit()->Succs
)
3419 if (S
.getKind() == SDep::Data
&& S
.getSUnit()->getInstr()->isPHI())
3420 return P
.getSUnit();
3424 /// Compute the scheduling start slot for the instruction. The start slot
3425 /// depends on any predecessor or successor nodes scheduled already.
3426 void SMSchedule::computeStart(SUnit
*SU
, int *MaxEarlyStart
, int *MinLateStart
,
3427 int *MinEnd
, int *MaxStart
, int II
,
3428 SwingSchedulerDAG
*DAG
) {
3429 // Iterate over each instruction that has been scheduled already. The start
3430 // slot computation depends on whether the previously scheduled instruction
3431 // is a predecessor or successor of the specified instruction.
3432 for (int cycle
= getFirstCycle(); cycle
<= LastCycle
; ++cycle
) {
3434 // Iterate over each instruction in the current cycle.
3435 for (SUnit
*I
: getInstructions(cycle
)) {
3436 // Because we're processing a DAG for the dependences, we recognize
3437 // the back-edge in recurrences by anti dependences.
3438 for (unsigned i
= 0, e
= (unsigned)SU
->Preds
.size(); i
!= e
; ++i
) {
3439 const SDep
&Dep
= SU
->Preds
[i
];
3440 if (Dep
.getSUnit() == I
) {
3441 if (!DAG
->isBackedge(SU
, Dep
)) {
3442 int EarlyStart
= cycle
+ Dep
.getLatency() -
3443 DAG
->getDistance(Dep
.getSUnit(), SU
, Dep
) * II
;
3444 *MaxEarlyStart
= std::max(*MaxEarlyStart
, EarlyStart
);
3445 if (DAG
->isLoopCarriedDep(SU
, Dep
, false)) {
3446 int End
= earliestCycleInChain(Dep
) + (II
- 1);
3447 *MinEnd
= std::min(*MinEnd
, End
);
3450 int LateStart
= cycle
- Dep
.getLatency() +
3451 DAG
->getDistance(SU
, Dep
.getSUnit(), Dep
) * II
;
3452 *MinLateStart
= std::min(*MinLateStart
, LateStart
);
3455 // For instruction that requires multiple iterations, make sure that
3456 // the dependent instruction is not scheduled past the definition.
3457 SUnit
*BE
= multipleIterations(I
, DAG
);
3458 if (BE
&& Dep
.getSUnit() == BE
&& !SU
->getInstr()->isPHI() &&
3460 *MinLateStart
= std::min(*MinLateStart
, cycle
);
3462 for (unsigned i
= 0, e
= (unsigned)SU
->Succs
.size(); i
!= e
; ++i
) {
3463 if (SU
->Succs
[i
].getSUnit() == I
) {
3464 const SDep
&Dep
= SU
->Succs
[i
];
3465 if (!DAG
->isBackedge(SU
, Dep
)) {
3466 int LateStart
= cycle
- Dep
.getLatency() +
3467 DAG
->getDistance(SU
, Dep
.getSUnit(), Dep
) * II
;
3468 *MinLateStart
= std::min(*MinLateStart
, LateStart
);
3469 if (DAG
->isLoopCarriedDep(SU
, Dep
)) {
3470 int Start
= latestCycleInChain(Dep
) + 1 - II
;
3471 *MaxStart
= std::max(*MaxStart
, Start
);
3474 int EarlyStart
= cycle
+ Dep
.getLatency() -
3475 DAG
->getDistance(Dep
.getSUnit(), SU
, Dep
) * II
;
3476 *MaxEarlyStart
= std::max(*MaxEarlyStart
, EarlyStart
);
3484 /// Order the instructions within a cycle so that the definitions occur
3485 /// before the uses. Returns true if the instruction is added to the start
3486 /// of the list, or false if added to the end.
3487 void SMSchedule::orderDependence(SwingSchedulerDAG
*SSD
, SUnit
*SU
,
3488 std::deque
<SUnit
*> &Insts
) {
3489 MachineInstr
*MI
= SU
->getInstr();
3490 bool OrderBeforeUse
= false;
3491 bool OrderAfterDef
= false;
3492 bool OrderBeforeDef
= false;
3493 unsigned MoveDef
= 0;
3494 unsigned MoveUse
= 0;
3495 int StageInst1
= stageScheduled(SU
);
3498 for (std::deque
<SUnit
*>::iterator I
= Insts
.begin(), E
= Insts
.end(); I
!= E
;
3500 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
< e
; ++i
) {
3501 MachineOperand
&MO
= MI
->getOperand(i
);
3502 if (!MO
.isReg() || !Register::isVirtualRegister(MO
.getReg()))
3505 Register Reg
= MO
.getReg();
3506 unsigned BasePos
, OffsetPos
;
3507 if (ST
.getInstrInfo()->getBaseAndOffsetPosition(*MI
, BasePos
, OffsetPos
))
3508 if (MI
->getOperand(BasePos
).getReg() == Reg
)
3509 if (unsigned NewReg
= SSD
->getInstrBaseReg(SU
))
3512 std::tie(Reads
, Writes
) =
3513 (*I
)->getInstr()->readsWritesVirtualRegister(Reg
);
3514 if (MO
.isDef() && Reads
&& stageScheduled(*I
) <= StageInst1
) {
3515 OrderBeforeUse
= true;
3518 } else if (MO
.isDef() && Reads
&& stageScheduled(*I
) > StageInst1
) {
3519 // Add the instruction after the scheduled instruction.
3520 OrderAfterDef
= true;
3522 } else if (MO
.isUse() && Writes
&& stageScheduled(*I
) == StageInst1
) {
3523 if (cycleScheduled(*I
) == cycleScheduled(SU
) && !(*I
)->isSucc(SU
)) {
3524 OrderBeforeUse
= true;
3528 OrderAfterDef
= true;
3531 } else if (MO
.isUse() && Writes
&& stageScheduled(*I
) > StageInst1
) {
3532 OrderBeforeUse
= true;
3536 OrderAfterDef
= true;
3539 } else if (MO
.isUse() && Writes
&& stageScheduled(*I
) < StageInst1
) {
3540 // Add the instruction before the scheduled instruction.
3541 OrderBeforeUse
= true;
3544 } else if (MO
.isUse() && stageScheduled(*I
) == StageInst1
&&
3545 isLoopCarriedDefOfUse(SSD
, (*I
)->getInstr(), MO
)) {
3547 OrderBeforeDef
= true;
3552 // Check for order dependences between instructions. Make sure the source
3553 // is ordered before the destination.
3554 for (auto &S
: SU
->Succs
) {
3555 if (S
.getSUnit() != *I
)
3557 if (S
.getKind() == SDep::Order
&& stageScheduled(*I
) == StageInst1
) {
3558 OrderBeforeUse
= true;
3562 // We did not handle HW dependences in previous for loop,
3563 // and we normally set Latency = 0 for Anti deps,
3564 // so may have nodes in same cycle with Anti denpendent on HW regs.
3565 else if (S
.getKind() == SDep::Anti
&& stageScheduled(*I
) == StageInst1
) {
3566 OrderBeforeUse
= true;
3567 if ((MoveUse
== 0) || (Pos
< MoveUse
))
3571 for (auto &P
: SU
->Preds
) {
3572 if (P
.getSUnit() != *I
)
3574 if (P
.getKind() == SDep::Order
&& stageScheduled(*I
) == StageInst1
) {
3575 OrderAfterDef
= true;
3581 // A circular dependence.
3582 if (OrderAfterDef
&& OrderBeforeUse
&& MoveUse
== MoveDef
)
3583 OrderBeforeUse
= false;
3585 // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3586 // to a loop-carried dependence.
3588 OrderBeforeUse
= !OrderAfterDef
|| (MoveUse
> MoveDef
);
3590 // The uncommon case when the instruction order needs to be updated because
3591 // there is both a use and def.
3592 if (OrderBeforeUse
&& OrderAfterDef
) {
3593 SUnit
*UseSU
= Insts
.at(MoveUse
);
3594 SUnit
*DefSU
= Insts
.at(MoveDef
);
3595 if (MoveUse
> MoveDef
) {
3596 Insts
.erase(Insts
.begin() + MoveUse
);
3597 Insts
.erase(Insts
.begin() + MoveDef
);
3599 Insts
.erase(Insts
.begin() + MoveDef
);
3600 Insts
.erase(Insts
.begin() + MoveUse
);
3602 orderDependence(SSD
, UseSU
, Insts
);
3603 orderDependence(SSD
, SU
, Insts
);
3604 orderDependence(SSD
, DefSU
, Insts
);
3607 // Put the new instruction first if there is a use in the list. Otherwise,
3608 // put it at the end of the list.
3610 Insts
.push_front(SU
);
3612 Insts
.push_back(SU
);
3615 /// Return true if the scheduled Phi has a loop carried operand.
3616 bool SMSchedule::isLoopCarried(SwingSchedulerDAG
*SSD
, MachineInstr
&Phi
) {
3619 assert(Phi
.isPHI() && "Expecting a Phi.");
3620 SUnit
*DefSU
= SSD
->getSUnit(&Phi
);
3621 unsigned DefCycle
= cycleScheduled(DefSU
);
3622 int DefStage
= stageScheduled(DefSU
);
3624 unsigned InitVal
= 0;
3625 unsigned LoopVal
= 0;
3626 getPhiRegs(Phi
, Phi
.getParent(), InitVal
, LoopVal
);
3627 SUnit
*UseSU
= SSD
->getSUnit(MRI
.getVRegDef(LoopVal
));
3630 if (UseSU
->getInstr()->isPHI())
3632 unsigned LoopCycle
= cycleScheduled(UseSU
);
3633 int LoopStage
= stageScheduled(UseSU
);
3634 return (LoopCycle
> DefCycle
) || (LoopStage
<= DefStage
);
3637 /// Return true if the instruction is a definition that is loop carried
3638 /// and defines the use on the next iteration.
3639 /// v1 = phi(v2, v3)
3640 /// (Def) v3 = op v1
3642 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3644 bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG
*SSD
,
3645 MachineInstr
*Def
, MachineOperand
&MO
) {
3650 MachineInstr
*Phi
= MRI
.getVRegDef(MO
.getReg());
3651 if (!Phi
|| !Phi
->isPHI() || Phi
->getParent() != Def
->getParent())
3653 if (!isLoopCarried(SSD
, *Phi
))
3655 unsigned LoopReg
= getLoopPhiReg(*Phi
, Phi
->getParent());
3656 for (unsigned i
= 0, e
= Def
->getNumOperands(); i
!= e
; ++i
) {
3657 MachineOperand
&DMO
= Def
->getOperand(i
);
3658 if (!DMO
.isReg() || !DMO
.isDef())
3660 if (DMO
.getReg() == LoopReg
)
3666 // Check if the generated schedule is valid. This function checks if
3667 // an instruction that uses a physical register is scheduled in a
3668 // different stage than the definition. The pipeliner does not handle
3669 // physical register values that may cross a basic block boundary.
3670 bool SMSchedule::isValidSchedule(SwingSchedulerDAG
*SSD
) {
3671 for (int i
= 0, e
= SSD
->SUnits
.size(); i
< e
; ++i
) {
3672 SUnit
&SU
= SSD
->SUnits
[i
];
3673 if (!SU
.hasPhysRegDefs
)
3675 int StageDef
= stageScheduled(&SU
);
3676 assert(StageDef
!= -1 && "Instruction should have been scheduled.");
3677 for (auto &SI
: SU
.Succs
)
3678 if (SI
.isAssignedRegDep())
3679 if (Register::isPhysicalRegister(SI
.getReg()))
3680 if (stageScheduled(SI
.getSUnit()) != StageDef
)
3686 /// A property of the node order in swing-modulo-scheduling is
3687 /// that for nodes outside circuits the following holds:
3688 /// none of them is scheduled after both a successor and a
3690 /// The method below checks whether the property is met.
3691 /// If not, debug information is printed and statistics information updated.
3692 /// Note that we do not use an assert statement.
3693 /// The reason is that although an invalid node oder may prevent
3694 /// the pipeliner from finding a pipelined schedule for arbitrary II,
3695 /// it does not lead to the generation of incorrect code.
3696 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType
&Circuits
) const {
3698 // a sorted vector that maps each SUnit to its index in the NodeOrder
3699 typedef std::pair
<SUnit
*, unsigned> UnitIndex
;
3700 std::vector
<UnitIndex
> Indices(NodeOrder
.size(), std::make_pair(nullptr, 0));
3702 for (unsigned i
= 0, s
= NodeOrder
.size(); i
< s
; ++i
)
3703 Indices
.push_back(std::make_pair(NodeOrder
[i
], i
));
3705 auto CompareKey
= [](UnitIndex i1
, UnitIndex i2
) {
3706 return std::get
<0>(i1
) < std::get
<0>(i2
);
3709 // sort, so that we can perform a binary search
3710 llvm::sort(Indices
, CompareKey
);
3714 // for each SUnit in the NodeOrder, check whether
3715 // it appears after both a successor and a predecessor
3716 // of the SUnit. If this is the case, and the SUnit
3717 // is not part of circuit, then the NodeOrder is not
3719 for (unsigned i
= 0, s
= NodeOrder
.size(); i
< s
; ++i
) {
3720 SUnit
*SU
= NodeOrder
[i
];
3723 bool PredBefore
= false;
3724 bool SuccBefore
= false;
3731 for (SDep
&PredEdge
: SU
->Preds
) {
3732 SUnit
*PredSU
= PredEdge
.getSUnit();
3733 unsigned PredIndex
= std::get
<1>(
3734 *llvm::lower_bound(Indices
, std::make_pair(PredSU
, 0), CompareKey
));
3735 if (!PredSU
->getInstr()->isPHI() && PredIndex
< Index
) {
3742 for (SDep
&SuccEdge
: SU
->Succs
) {
3743 SUnit
*SuccSU
= SuccEdge
.getSUnit();
3744 // Do not process a boundary node, it was not included in NodeOrder,
3745 // hence not in Indices either, call to std::lower_bound() below will
3746 // return Indices.end().
3747 if (SuccSU
->isBoundaryNode())
3749 unsigned SuccIndex
= std::get
<1>(
3750 *llvm::lower_bound(Indices
, std::make_pair(SuccSU
, 0), CompareKey
));
3751 if (!SuccSU
->getInstr()->isPHI() && SuccIndex
< Index
) {
3758 if (PredBefore
&& SuccBefore
&& !SU
->getInstr()->isPHI()) {
3759 // instructions in circuits are allowed to be scheduled
3760 // after both a successor and predecessor.
3761 bool InCircuit
= llvm::any_of(
3762 Circuits
, [SU
](const NodeSet
&Circuit
) { return Circuit
.count(SU
); });
3764 LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
3767 NumNodeOrderIssues
++;
3768 LLVM_DEBUG(dbgs() << "Predecessor ";);
3770 LLVM_DEBUG(dbgs() << Pred
->NodeNum
<< " and successor " << Succ
->NodeNum
3771 << " are scheduled before node " << SU
->NodeNum
3778 dbgs() << "Invalid node order found!\n";
3782 /// Attempt to fix the degenerate cases when the instruction serialization
3783 /// causes the register lifetimes to overlap. For example,
3784 /// p' = store_pi(p, b)
3785 /// = load p, offset
3786 /// In this case p and p' overlap, which means that two registers are needed.
3787 /// Instead, this function changes the load to use p' and updates the offset.
3788 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque
<SUnit
*> &Instrs
) {
3789 unsigned OverlapReg
= 0;
3790 unsigned NewBaseReg
= 0;
3791 for (SUnit
*SU
: Instrs
) {
3792 MachineInstr
*MI
= SU
->getInstr();
3793 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
< e
; ++i
) {
3794 const MachineOperand
&MO
= MI
->getOperand(i
);
3795 // Look for an instruction that uses p. The instruction occurs in the
3796 // same cycle but occurs later in the serialized order.
3797 if (MO
.isReg() && MO
.isUse() && MO
.getReg() == OverlapReg
) {
3798 // Check that the instruction appears in the InstrChanges structure,
3799 // which contains instructions that can have the offset updated.
3800 DenseMap
<SUnit
*, std::pair
<unsigned, int64_t>>::iterator It
=
3801 InstrChanges
.find(SU
);
3802 if (It
!= InstrChanges
.end()) {
3803 unsigned BasePos
, OffsetPos
;
3804 // Update the base register and adjust the offset.
3805 if (TII
->getBaseAndOffsetPosition(*MI
, BasePos
, OffsetPos
)) {
3806 MachineInstr
*NewMI
= MF
.CloneMachineInstr(MI
);
3807 NewMI
->getOperand(BasePos
).setReg(NewBaseReg
);
3809 MI
->getOperand(OffsetPos
).getImm() - It
->second
.second
;
3810 NewMI
->getOperand(OffsetPos
).setImm(NewOffset
);
3811 SU
->setInstr(NewMI
);
3812 MISUnitMap
[NewMI
] = SU
;
3813 NewMIs
.insert(NewMI
);
3820 // Look for an instruction of the form p' = op(p), which uses and defines
3821 // two virtual registers that get allocated to the same physical register.
3822 unsigned TiedUseIdx
= 0;
3823 if (MI
->isRegTiedToUseOperand(i
, &TiedUseIdx
)) {
3824 // OverlapReg is p in the example above.
3825 OverlapReg
= MI
->getOperand(TiedUseIdx
).getReg();
3826 // NewBaseReg is p' in the example above.
3827 NewBaseReg
= MI
->getOperand(i
).getReg();
3834 /// After the schedule has been formed, call this function to combine
3835 /// the instructions from the different stages/cycles. That is, this
3836 /// function creates a schedule that represents a single iteration.
3837 void SMSchedule::finalizeSchedule(SwingSchedulerDAG
*SSD
) {
3838 // Move all instructions to the first stage from later stages.
3839 for (int cycle
= getFirstCycle(); cycle
<= getFinalCycle(); ++cycle
) {
3840 for (int stage
= 1, lastStage
= getMaxStageCount(); stage
<= lastStage
;
3842 std::deque
<SUnit
*> &cycleInstrs
=
3843 ScheduledInstrs
[cycle
+ (stage
* InitiationInterval
)];
3844 for (std::deque
<SUnit
*>::reverse_iterator I
= cycleInstrs
.rbegin(),
3845 E
= cycleInstrs
.rend();
3847 ScheduledInstrs
[cycle
].push_front(*I
);
3850 // Iterate over the definitions in each instruction, and compute the
3851 // stage difference for each use. Keep the maximum value.
3852 for (auto &I
: InstrToCycle
) {
3853 int DefStage
= stageScheduled(I
.first
);
3854 MachineInstr
*MI
= I
.first
->getInstr();
3855 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
< e
; ++i
) {
3856 MachineOperand
&Op
= MI
->getOperand(i
);
3857 if (!Op
.isReg() || !Op
.isDef())
3860 Register Reg
= Op
.getReg();
3861 unsigned MaxDiff
= 0;
3862 bool PhiIsSwapped
= false;
3863 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(Reg
),
3866 MachineOperand
&UseOp
= *UI
;
3867 MachineInstr
*UseMI
= UseOp
.getParent();
3868 SUnit
*SUnitUse
= SSD
->getSUnit(UseMI
);
3869 int UseStage
= stageScheduled(SUnitUse
);
3871 if (UseStage
!= -1 && UseStage
>= DefStage
)
3872 Diff
= UseStage
- DefStage
;
3874 if (isLoopCarried(SSD
, *MI
))
3877 PhiIsSwapped
= true;
3879 MaxDiff
= std::max(Diff
, MaxDiff
);
3881 RegToStageDiff
[Reg
] = std::make_pair(MaxDiff
, PhiIsSwapped
);
3885 // Erase all the elements in the later stages. Only one iteration should
3886 // remain in the scheduled list, and it contains all the instructions.
3887 for (int cycle
= getFinalCycle() + 1; cycle
<= LastCycle
; ++cycle
)
3888 ScheduledInstrs
.erase(cycle
);
3890 // Change the registers in instruction as specified in the InstrChanges
3891 // map. We need to use the new registers to create the correct order.
3892 for (int i
= 0, e
= SSD
->SUnits
.size(); i
!= e
; ++i
) {
3893 SUnit
*SU
= &SSD
->SUnits
[i
];
3894 SSD
->applyInstrChange(SU
->getInstr(), *this);
3897 // Reorder the instructions in each cycle to fix and improve the
3899 for (int Cycle
= getFirstCycle(), E
= getFinalCycle(); Cycle
<= E
; ++Cycle
) {
3900 std::deque
<SUnit
*> &cycleInstrs
= ScheduledInstrs
[Cycle
];
3901 std::deque
<SUnit
*> newOrderPhi
;
3902 for (unsigned i
= 0, e
= cycleInstrs
.size(); i
< e
; ++i
) {
3903 SUnit
*SU
= cycleInstrs
[i
];
3904 if (SU
->getInstr()->isPHI())
3905 newOrderPhi
.push_back(SU
);
3907 std::deque
<SUnit
*> newOrderI
;
3908 for (unsigned i
= 0, e
= cycleInstrs
.size(); i
< e
; ++i
) {
3909 SUnit
*SU
= cycleInstrs
[i
];
3910 if (!SU
->getInstr()->isPHI())
3911 orderDependence(SSD
, SU
, newOrderI
);
3913 // Replace the old order with the new order.
3914 cycleInstrs
.swap(newOrderPhi
);
3915 cycleInstrs
.insert(cycleInstrs
.end(), newOrderI
.begin(), newOrderI
.end());
3916 SSD
->fixupRegisterOverlaps(cycleInstrs
);
3919 LLVM_DEBUG(dump(););
3922 void NodeSet::print(raw_ostream
&os
) const {
3923 os
<< "Num nodes " << size() << " rec " << RecMII
<< " mov " << MaxMOV
3924 << " depth " << MaxDepth
<< " col " << Colocate
<< "\n";
3925 for (const auto &I
: Nodes
)
3926 os
<< " SU(" << I
->NodeNum
<< ") " << *(I
->getInstr());
3930 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3931 /// Print the schedule information to the given output.
3932 void SMSchedule::print(raw_ostream
&os
) const {
3933 // Iterate over each cycle.
3934 for (int cycle
= getFirstCycle(); cycle
<= getFinalCycle(); ++cycle
) {
3935 // Iterate over each instruction in the cycle.
3936 const_sched_iterator cycleInstrs
= ScheduledInstrs
.find(cycle
);
3937 for (SUnit
*CI
: cycleInstrs
->second
) {
3938 os
<< "cycle " << cycle
<< " (" << stageScheduled(CI
) << ") ";
3939 os
<< "(" << CI
->NodeNum
<< ") ";
3940 CI
->getInstr()->print(os
);
3946 /// Utility function used for debugging to print the schedule.
3947 LLVM_DUMP_METHOD
void SMSchedule::dump() const { print(dbgs()); }
3948 LLVM_DUMP_METHOD
void NodeSet::dump() const { print(dbgs()); }
3952 void ResourceManager::initProcResourceVectors(
3953 const MCSchedModel
&SM
, SmallVectorImpl
<uint64_t> &Masks
) {
3954 unsigned ProcResourceID
= 0;
3956 // We currently limit the resource kinds to 64 and below so that we can use
3957 // uint64_t for Masks
3958 assert(SM
.getNumProcResourceKinds() < 64 &&
3959 "Too many kinds of resources, unsupported");
3960 // Create a unique bitmask for every processor resource unit.
3961 // Skip resource at index 0, since it always references 'InvalidUnit'.
3962 Masks
.resize(SM
.getNumProcResourceKinds());
3963 for (unsigned I
= 1, E
= SM
.getNumProcResourceKinds(); I
< E
; ++I
) {
3964 const MCProcResourceDesc
&Desc
= *SM
.getProcResource(I
);
3965 if (Desc
.SubUnitsIdxBegin
)
3967 Masks
[I
] = 1ULL << ProcResourceID
;
3970 // Create a unique bitmask for every processor resource group.
3971 for (unsigned I
= 1, E
= SM
.getNumProcResourceKinds(); I
< E
; ++I
) {
3972 const MCProcResourceDesc
&Desc
= *SM
.getProcResource(I
);
3973 if (!Desc
.SubUnitsIdxBegin
)
3975 Masks
[I
] = 1ULL << ProcResourceID
;
3976 for (unsigned U
= 0; U
< Desc
.NumUnits
; ++U
)
3977 Masks
[I
] |= Masks
[Desc
.SubUnitsIdxBegin
[U
]];
3981 if (SwpShowResMask
) {
3982 dbgs() << "ProcResourceDesc:\n";
3983 for (unsigned I
= 1, E
= SM
.getNumProcResourceKinds(); I
< E
; ++I
) {
3984 const MCProcResourceDesc
*ProcResource
= SM
.getProcResource(I
);
3985 dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3986 ProcResource
->Name
, I
, Masks
[I
],
3987 ProcResource
->NumUnits
);
3989 dbgs() << " -----------------\n";
3994 bool ResourceManager::canReserveResources(const MCInstrDesc
*MID
) const {
3997 if (SwpDebugResource
)
3998 dbgs() << "canReserveResources:\n";
4001 return DFAResources
->canReserveResources(MID
);
4003 unsigned InsnClass
= MID
->getSchedClass();
4004 const MCSchedClassDesc
*SCDesc
= SM
.getSchedClassDesc(InsnClass
);
4005 if (!SCDesc
->isValid()) {
4007 dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4008 dbgs() << "isPseduo:" << MID
->isPseudo() << "\n";
4013 const MCWriteProcResEntry
*I
= STI
->getWriteProcResBegin(SCDesc
);
4014 const MCWriteProcResEntry
*E
= STI
->getWriteProcResEnd(SCDesc
);
4015 for (; I
!= E
; ++I
) {
4018 const MCProcResourceDesc
*ProcResource
=
4019 SM
.getProcResource(I
->ProcResourceIdx
);
4020 unsigned NumUnits
= ProcResource
->NumUnits
;
4022 if (SwpDebugResource
)
4023 dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
4024 ProcResource
->Name
, I
->ProcResourceIdx
,
4025 ProcResourceCount
[I
->ProcResourceIdx
], NumUnits
,
4028 if (ProcResourceCount
[I
->ProcResourceIdx
] >= NumUnits
)
4031 LLVM_DEBUG(if (SwpDebugResource
) dbgs() << "return true\n\n";);
4035 void ResourceManager::reserveResources(const MCInstrDesc
*MID
) {
4037 if (SwpDebugResource
)
4038 dbgs() << "reserveResources:\n";
4041 return DFAResources
->reserveResources(MID
);
4043 unsigned InsnClass
= MID
->getSchedClass();
4044 const MCSchedClassDesc
*SCDesc
= SM
.getSchedClassDesc(InsnClass
);
4045 if (!SCDesc
->isValid()) {
4047 dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4048 dbgs() << "isPseduo:" << MID
->isPseudo() << "\n";
4052 for (const MCWriteProcResEntry
&PRE
:
4053 make_range(STI
->getWriteProcResBegin(SCDesc
),
4054 STI
->getWriteProcResEnd(SCDesc
))) {
4057 ++ProcResourceCount
[PRE
.ProcResourceIdx
];
4059 if (SwpDebugResource
) {
4060 const MCProcResourceDesc
*ProcResource
=
4061 SM
.getProcResource(PRE
.ProcResourceIdx
);
4062 dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
4063 ProcResource
->Name
, PRE
.ProcResourceIdx
,
4064 ProcResourceCount
[PRE
.ProcResourceIdx
],
4065 ProcResource
->NumUnits
, PRE
.Cycles
);
4070 if (SwpDebugResource
)
4071 dbgs() << "reserveResources: done!\n\n";
4075 bool ResourceManager::canReserveResources(const MachineInstr
&MI
) const {
4076 return canReserveResources(&MI
.getDesc());
4079 void ResourceManager::reserveResources(const MachineInstr
&MI
) {
4080 return reserveResources(&MI
.getDesc());
4083 void ResourceManager::clearResources() {
4085 return DFAResources
->clearResources();
4086 std::fill(ProcResourceCount
.begin(), ProcResourceCount
.end(), 0);