[MachineScheduler] Fix physreg dependencies of ExitSU (#123541)
[llvm-project.git] / llvm / lib / CodeGen / MachineTraceMetrics.cpp
blob021c1a058c02046b21a87083903b070d7e214c7a
1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/MachineTraceMetrics.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/PostOrderIterator.h"
13 #include "llvm/ADT/SmallPtrSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/SparseSet.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineLoopInfo.h"
20 #include "llvm/CodeGen/MachineOperand.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/InitializePasses.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Format.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <algorithm>
32 #include <cassert>
33 #include <tuple>
34 #include <utility>
36 using namespace llvm;
38 #define DEBUG_TYPE "machine-trace-metrics"
40 AnalysisKey MachineTraceMetricsAnalysis::Key;
42 MachineTraceMetricsAnalysis::Result
43 MachineTraceMetricsAnalysis::run(MachineFunction &MF,
44 MachineFunctionAnalysisManager &MFAM) {
45 return Result(MF, MFAM.getResult<MachineLoopAnalysis>(MF));
48 PreservedAnalyses
49 MachineTraceMetricsVerifierPass::run(MachineFunction &MF,
50 MachineFunctionAnalysisManager &MFAM) {
51 MFAM.getResult<MachineTraceMetricsAnalysis>(MF).verifyAnalysis();
52 return PreservedAnalyses::all();
55 char MachineTraceMetricsWrapperPass::ID = 0;
57 char &llvm::MachineTraceMetricsID = MachineTraceMetricsWrapperPass::ID;
59 INITIALIZE_PASS_BEGIN(MachineTraceMetricsWrapperPass, DEBUG_TYPE,
60 "Machine Trace Metrics", false, true)
61 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
62 INITIALIZE_PASS_END(MachineTraceMetricsWrapperPass, DEBUG_TYPE,
63 "Machine Trace Metrics", false, true)
65 MachineTraceMetricsWrapperPass::MachineTraceMetricsWrapperPass()
66 : MachineFunctionPass(ID) {}
68 void MachineTraceMetricsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
69 AU.setPreservesAll();
70 AU.addRequired<MachineLoopInfoWrapperPass>();
71 MachineFunctionPass::getAnalysisUsage(AU);
74 void MachineTraceMetrics::init(MachineFunction &Func,
75 const MachineLoopInfo &LI) {
76 MF = &Func;
77 const TargetSubtargetInfo &ST = MF->getSubtarget();
78 TII = ST.getInstrInfo();
79 TRI = ST.getRegisterInfo();
80 MRI = &MF->getRegInfo();
81 Loops = &LI;
82 SchedModel.init(&ST);
83 BlockInfo.resize(MF->getNumBlockIDs());
84 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
85 SchedModel.getNumProcResourceKinds());
88 bool MachineTraceMetricsWrapperPass::runOnMachineFunction(MachineFunction &MF) {
89 MTM.init(MF, getAnalysis<MachineLoopInfoWrapperPass>().getLI());
90 return false;
93 MachineTraceMetrics::~MachineTraceMetrics() { clear(); }
95 void MachineTraceMetrics::clear() {
96 MF = nullptr;
97 BlockInfo.clear();
98 for (auto &E : Ensembles)
99 E.reset();
102 //===----------------------------------------------------------------------===//
103 // Fixed block information
104 //===----------------------------------------------------------------------===//
106 // The number of instructions in a basic block and the CPU resources used by
107 // those instructions don't depend on any given trace strategy.
109 /// Compute the resource usage in basic block MBB.
110 const MachineTraceMetrics::FixedBlockInfo*
111 MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
112 assert(MBB && "No basic block");
113 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
114 if (FBI->hasResources())
115 return FBI;
117 // Compute resource usage in the block.
118 FBI->HasCalls = false;
119 unsigned InstrCount = 0;
121 // Add up per-processor resource cycles as well.
122 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
123 SmallVector<unsigned, 32> PRCycles(PRKinds);
125 for (const auto &MI : *MBB) {
126 if (MI.isTransient())
127 continue;
128 ++InstrCount;
129 if (MI.isCall())
130 FBI->HasCalls = true;
132 // Count processor resources used.
133 if (!SchedModel.hasInstrSchedModel())
134 continue;
135 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
136 if (!SC->isValid())
137 continue;
139 for (TargetSchedModel::ProcResIter
140 PI = SchedModel.getWriteProcResBegin(SC),
141 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
142 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
143 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
146 FBI->InstrCount = InstrCount;
148 // Scale the resource cycles so they are comparable.
149 unsigned PROffset = MBB->getNumber() * PRKinds;
150 for (unsigned K = 0; K != PRKinds; ++K)
151 ProcReleaseAtCycles[PROffset + K] =
152 PRCycles[K] * SchedModel.getResourceFactor(K);
154 return FBI;
157 ArrayRef<unsigned>
158 MachineTraceMetrics::getProcReleaseAtCycles(unsigned MBBNum) const {
159 assert(BlockInfo[MBBNum].hasResources() &&
160 "getResources() must be called before getProcReleaseAtCycles()");
161 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
162 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
163 return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
166 //===----------------------------------------------------------------------===//
167 // Ensemble utility functions
168 //===----------------------------------------------------------------------===//
170 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
171 : MTM(*ct) {
172 BlockInfo.resize(MTM.BlockInfo.size());
173 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
174 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
175 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
178 // Virtual destructor serves as an anchor.
179 MachineTraceMetrics::Ensemble::~Ensemble() = default;
181 const MachineLoop*
182 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
183 return MTM.Loops->getLoopFor(MBB);
186 // Update resource-related information in the TraceBlockInfo for MBB.
187 // Only update resources related to the trace above MBB.
188 void MachineTraceMetrics::Ensemble::
189 computeDepthResources(const MachineBasicBlock *MBB) {
190 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
191 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
192 unsigned PROffset = MBB->getNumber() * PRKinds;
194 // Compute resources from trace above. The top block is simple.
195 if (!TBI->Pred) {
196 TBI->InstrDepth = 0;
197 TBI->Head = MBB->getNumber();
198 std::fill(ProcResourceDepths.begin() + PROffset,
199 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
200 return;
203 // Compute from the block above. A post-order traversal ensures the
204 // predecessor is always computed first.
205 unsigned PredNum = TBI->Pred->getNumber();
206 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
207 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
208 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
209 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
210 TBI->Head = PredTBI->Head;
212 // Compute per-resource depths.
213 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
214 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
215 for (unsigned K = 0; K != PRKinds; ++K)
216 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
219 // Update resource-related information in the TraceBlockInfo for MBB.
220 // Only update resources related to the trace below MBB.
221 void MachineTraceMetrics::Ensemble::
222 computeHeightResources(const MachineBasicBlock *MBB) {
223 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
224 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
225 unsigned PROffset = MBB->getNumber() * PRKinds;
227 // Compute resources for the current block.
228 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
229 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
231 // The trace tail is done.
232 if (!TBI->Succ) {
233 TBI->Tail = MBB->getNumber();
234 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
235 return;
238 // Compute from the block below. A post-order traversal ensures the
239 // predecessor is always computed first.
240 unsigned SuccNum = TBI->Succ->getNumber();
241 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
242 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
243 TBI->InstrHeight += SuccTBI->InstrHeight;
244 TBI->Tail = SuccTBI->Tail;
246 // Compute per-resource heights.
247 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
248 for (unsigned K = 0; K != PRKinds; ++K)
249 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
252 // Check if depth resources for MBB are valid and return the TBI.
253 // Return NULL if the resources have been invalidated.
254 const MachineTraceMetrics::TraceBlockInfo*
255 MachineTraceMetrics::Ensemble::
256 getDepthResources(const MachineBasicBlock *MBB) const {
257 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
258 return TBI->hasValidDepth() ? TBI : nullptr;
261 // Check if height resources for MBB are valid and return the TBI.
262 // Return NULL if the resources have been invalidated.
263 const MachineTraceMetrics::TraceBlockInfo*
264 MachineTraceMetrics::Ensemble::
265 getHeightResources(const MachineBasicBlock *MBB) const {
266 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
267 return TBI->hasValidHeight() ? TBI : nullptr;
270 /// Get an array of processor resource depths for MBB. Indexed by processor
271 /// resource kind, this array contains the scaled processor resources consumed
272 /// by all blocks preceding MBB in its trace. It does not include instructions
273 /// in MBB.
275 /// Compare TraceBlockInfo::InstrDepth.
276 ArrayRef<unsigned>
277 MachineTraceMetrics::Ensemble::
278 getProcResourceDepths(unsigned MBBNum) const {
279 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
280 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
281 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
284 /// Get an array of processor resource heights for MBB. Indexed by processor
285 /// resource kind, this array contains the scaled processor resources consumed
286 /// by this block and all blocks following it in its trace.
288 /// Compare TraceBlockInfo::InstrHeight.
289 ArrayRef<unsigned>
290 MachineTraceMetrics::Ensemble::
291 getProcResourceHeights(unsigned MBBNum) const {
292 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
293 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
294 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
297 //===----------------------------------------------------------------------===//
298 // Trace Selection Strategies
299 //===----------------------------------------------------------------------===//
301 // A trace selection strategy is implemented as a sub-class of Ensemble. The
302 // trace through a block B is computed by two DFS traversals of the CFG
303 // starting from B. One upwards, and one downwards. During the upwards DFS,
304 // pickTracePred() is called on the post-ordered blocks. During the downwards
305 // DFS, pickTraceSucc() is called in a post-order.
308 // We never allow traces that leave loops, but we do allow traces to enter
309 // nested loops. We also never allow traces to contain back-edges.
311 // This means that a loop header can never appear above the center block of a
312 // trace, except as the trace head. Below the center block, loop exiting edges
313 // are banned.
315 // Return true if an edge from the From loop to the To loop is leaving a loop.
316 // Either of To and From can be null.
317 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
318 return From && !From->contains(To);
321 // MinInstrCountEnsemble - Pick the trace that executes the least number of
322 // instructions.
323 namespace {
325 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
326 const char *getName() const override { return "MinInstr"; }
327 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
328 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
330 public:
331 MinInstrCountEnsemble(MachineTraceMetrics *mtm)
332 : MachineTraceMetrics::Ensemble(mtm) {}
335 /// Pick only the current basic block for the trace and do not choose any
336 /// predecessors/successors.
337 class LocalEnsemble : public MachineTraceMetrics::Ensemble {
338 const char *getName() const override { return "Local"; }
339 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
340 return nullptr;
342 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
343 return nullptr;
346 public:
347 LocalEnsemble(MachineTraceMetrics *MTM)
348 : MachineTraceMetrics::Ensemble(MTM) {}
350 } // end anonymous namespace
352 // Select the preferred predecessor for MBB.
353 const MachineBasicBlock*
354 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
355 if (MBB->pred_empty())
356 return nullptr;
357 const MachineLoop *CurLoop = getLoopFor(MBB);
358 // Don't leave loops, and never follow back-edges.
359 if (CurLoop && MBB == CurLoop->getHeader())
360 return nullptr;
361 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
362 const MachineBasicBlock *Best = nullptr;
363 unsigned BestDepth = 0;
364 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
365 const MachineTraceMetrics::TraceBlockInfo *PredTBI =
366 getDepthResources(Pred);
367 // Ignore cycles that aren't natural loops.
368 if (!PredTBI)
369 continue;
370 // Pick the predecessor that would give this block the smallest InstrDepth.
371 unsigned Depth = PredTBI->InstrDepth + CurCount;
372 if (!Best || Depth < BestDepth) {
373 Best = Pred;
374 BestDepth = Depth;
377 return Best;
380 // Select the preferred successor for MBB.
381 const MachineBasicBlock*
382 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
383 if (MBB->succ_empty())
384 return nullptr;
385 const MachineLoop *CurLoop = getLoopFor(MBB);
386 const MachineBasicBlock *Best = nullptr;
387 unsigned BestHeight = 0;
388 for (const MachineBasicBlock *Succ : MBB->successors()) {
389 // Don't consider back-edges.
390 if (CurLoop && Succ == CurLoop->getHeader())
391 continue;
392 // Don't consider successors exiting CurLoop.
393 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
394 continue;
395 const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
396 getHeightResources(Succ);
397 // Ignore cycles that aren't natural loops.
398 if (!SuccTBI)
399 continue;
400 // Pick the successor that would give this block the smallest InstrHeight.
401 unsigned Height = SuccTBI->InstrHeight;
402 if (!Best || Height < BestHeight) {
403 Best = Succ;
404 BestHeight = Height;
407 return Best;
410 // Get an Ensemble sub-class for the requested trace strategy.
411 MachineTraceMetrics::Ensemble *
412 MachineTraceMetrics::getEnsemble(MachineTraceStrategy strategy) {
413 assert(strategy < MachineTraceStrategy::TS_NumStrategies &&
414 "Invalid trace strategy enum");
415 std::unique_ptr<MachineTraceMetrics::Ensemble> &E =
416 Ensembles[static_cast<size_t>(strategy)];
417 if (E)
418 return E.get();
420 // Allocate new Ensemble on demand.
421 switch (strategy) {
422 case MachineTraceStrategy::TS_MinInstrCount:
423 E = std::make_unique<MinInstrCountEnsemble>(MinInstrCountEnsemble(this));
424 break;
425 case MachineTraceStrategy::TS_Local:
426 E = std::make_unique<LocalEnsemble>(LocalEnsemble(this));
427 break;
428 default: llvm_unreachable("Invalid trace strategy enum");
430 return E.get();
433 void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
434 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
435 << '\n');
436 BlockInfo[MBB->getNumber()].invalidate();
437 for (auto &E : Ensembles)
438 if (E)
439 E->invalidate(MBB);
442 bool MachineTraceMetrics::invalidate(
443 MachineFunction &, const PreservedAnalyses &PA,
444 MachineFunctionAnalysisManager::Invalidator &) {
445 // Check whether the analysis, all analyses on machine functions, or the
446 // machine function's CFG have been preserved.
447 auto PAC = PA.getChecker<MachineTraceMetricsAnalysis>();
448 return !PAC.preserved() &&
449 !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
450 !PAC.preservedSet<CFGAnalyses>();
453 void MachineTraceMetrics::verifyAnalysis() const {
454 if (!MF)
455 return;
456 #ifndef NDEBUG
457 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
458 for (auto &E : Ensembles)
459 if (E)
460 E->verify();
461 #endif
464 //===----------------------------------------------------------------------===//
465 // Trace building
466 //===----------------------------------------------------------------------===//
468 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
469 // set abstraction that confines the search to the current loop, and doesn't
470 // revisit blocks.
472 namespace {
474 struct LoopBounds {
475 MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
476 SmallPtrSet<const MachineBasicBlock*, 8> Visited;
477 const MachineLoopInfo *Loops;
478 bool Downward = false;
480 LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
481 const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
484 } // end anonymous namespace
486 // Specialize po_iterator_storage in order to prune the post-order traversal so
487 // it is limited to the current loop and doesn't traverse the loop back edges.
488 namespace llvm {
490 template<>
491 class po_iterator_storage<LoopBounds, true> {
492 LoopBounds &LB;
494 public:
495 po_iterator_storage(LoopBounds &lb) : LB(lb) {}
497 void finishPostorder(const MachineBasicBlock*) {}
499 bool insertEdge(std::optional<const MachineBasicBlock *> From,
500 const MachineBasicBlock *To) {
501 // Skip already visited To blocks.
502 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
503 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
504 return false;
505 // From is null once when To is the trace center block.
506 if (From) {
507 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
508 // Don't follow backedges, don't leave FromLoop when going upwards.
509 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
510 return false;
511 // Don't leave FromLoop.
512 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
513 return false;
516 // To is a new block. Mark the block as visited in case the CFG has cycles
517 // that MachineLoopInfo didn't recognize as a natural loop.
518 return LB.Visited.insert(To).second;
522 } // end namespace llvm
524 /// Compute the trace through MBB.
525 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
526 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
527 << printMBBReference(*MBB) << '\n');
528 // Set up loop bounds for the backwards post-order traversal.
529 LoopBounds Bounds(BlockInfo, MTM.Loops);
531 // Run an upwards post-order search for the trace start.
532 Bounds.Downward = false;
533 Bounds.Visited.clear();
534 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
535 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
536 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
537 // All the predecessors have been visited, pick the preferred one.
538 TBI.Pred = pickTracePred(I);
539 LLVM_DEBUG({
540 if (TBI.Pred)
541 dbgs() << printMBBReference(*TBI.Pred) << '\n';
542 else
543 dbgs() << "null\n";
545 // The trace leading to I is now known, compute the depth resources.
546 computeDepthResources(I);
549 // Run a downwards post-order search for the trace end.
550 Bounds.Downward = true;
551 Bounds.Visited.clear();
552 for (const auto *I : post_order_ext(MBB, Bounds)) {
553 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
554 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
555 // All the successors have been visited, pick the preferred one.
556 TBI.Succ = pickTraceSucc(I);
557 LLVM_DEBUG({
558 if (TBI.Succ)
559 dbgs() << printMBBReference(*TBI.Succ) << '\n';
560 else
561 dbgs() << "null\n";
563 // The trace leaving I is now known, compute the height resources.
564 computeHeightResources(I);
568 /// Invalidate traces through BadMBB.
569 void
570 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
571 SmallVector<const MachineBasicBlock*, 16> WorkList;
572 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
574 // Invalidate height resources of blocks above MBB.
575 if (BadTBI.hasValidHeight()) {
576 BadTBI.invalidateHeight();
577 WorkList.push_back(BadMBB);
578 do {
579 const MachineBasicBlock *MBB = WorkList.pop_back_val();
580 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
581 << getName() << " height.\n");
582 // Find any MBB predecessors that have MBB as their preferred successor.
583 // They are the only ones that need to be invalidated.
584 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
585 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
586 if (!TBI.hasValidHeight())
587 continue;
588 if (TBI.Succ == MBB) {
589 TBI.invalidateHeight();
590 WorkList.push_back(Pred);
591 continue;
593 // Verify that TBI.Succ is actually a *I successor.
594 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
596 } while (!WorkList.empty());
599 // Invalidate depth resources of blocks below MBB.
600 if (BadTBI.hasValidDepth()) {
601 BadTBI.invalidateDepth();
602 WorkList.push_back(BadMBB);
603 do {
604 const MachineBasicBlock *MBB = WorkList.pop_back_val();
605 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
606 << getName() << " depth.\n");
607 // Find any MBB successors that have MBB as their preferred predecessor.
608 // They are the only ones that need to be invalidated.
609 for (const MachineBasicBlock *Succ : MBB->successors()) {
610 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
611 if (!TBI.hasValidDepth())
612 continue;
613 if (TBI.Pred == MBB) {
614 TBI.invalidateDepth();
615 WorkList.push_back(Succ);
616 continue;
618 // Verify that TBI.Pred is actually a *I predecessor.
619 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
621 } while (!WorkList.empty());
624 // Clear any per-instruction data. We only have to do this for BadMBB itself
625 // because the instructions in that block may change. Other blocks may be
626 // invalidated, but their instructions will stay the same, so there is no
627 // need to erase the Cycle entries. They will be overwritten when we
628 // recompute.
629 for (const auto &I : *BadMBB)
630 Cycles.erase(&I);
633 void MachineTraceMetrics::Ensemble::verify() const {
634 #ifndef NDEBUG
635 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
636 "Outdated BlockInfo size");
637 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
638 const TraceBlockInfo &TBI = BlockInfo[Num];
639 if (TBI.hasValidDepth() && TBI.Pred) {
640 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
641 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
642 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
643 "Trace is broken, depth should have been invalidated.");
644 const MachineLoop *Loop = getLoopFor(MBB);
645 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
647 if (TBI.hasValidHeight() && TBI.Succ) {
648 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
649 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
650 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
651 "Trace is broken, height should have been invalidated.");
652 const MachineLoop *Loop = getLoopFor(MBB);
653 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
654 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
655 "Trace contains backedge");
658 #endif
661 //===----------------------------------------------------------------------===//
662 // Data Dependencies
663 //===----------------------------------------------------------------------===//
665 // Compute the depth and height of each instruction based on data dependencies
666 // and instruction latencies. These cycle numbers assume that the CPU can issue
667 // an infinite number of instructions per cycle as long as their dependencies
668 // are ready.
670 // A data dependency is represented as a defining MI and operand numbers on the
671 // defining and using MI.
672 namespace {
674 struct DataDep {
675 const MachineInstr *DefMI;
676 unsigned DefOp;
677 unsigned UseOp;
679 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
680 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
682 /// Create a DataDep from an SSA form virtual register.
683 DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
684 : UseOp(UseOp) {
685 assert(Register::isVirtualRegister(VirtReg));
686 MachineOperand *DefMO = MRI->getOneDef(VirtReg);
687 assert(DefMO && "Register does not have unique def");
688 DefMI = DefMO->getParent();
689 DefOp = DefMO->getOperandNo();
693 } // end anonymous namespace
695 // Get the input data dependencies that must be ready before UseMI can issue.
696 // Return true if UseMI has any physreg operands.
697 static bool getDataDeps(const MachineInstr &UseMI,
698 SmallVectorImpl<DataDep> &Deps,
699 const MachineRegisterInfo *MRI) {
700 // Debug values should not be included in any calculations.
701 if (UseMI.isDebugInstr())
702 return false;
704 bool HasPhysRegs = false;
705 for (const MachineOperand &MO : UseMI.operands()) {
706 if (!MO.isReg())
707 continue;
708 Register Reg = MO.getReg();
709 if (!Reg)
710 continue;
711 if (Reg.isPhysical()) {
712 HasPhysRegs = true;
713 continue;
715 // Collect virtual register reads.
716 if (MO.readsReg())
717 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
719 return HasPhysRegs;
722 // Get the input data dependencies of a PHI instruction, using Pred as the
723 // preferred predecessor.
724 // This will add at most one dependency to Deps.
725 static void getPHIDeps(const MachineInstr &UseMI,
726 SmallVectorImpl<DataDep> &Deps,
727 const MachineBasicBlock *Pred,
728 const MachineRegisterInfo *MRI) {
729 // No predecessor at the beginning of a trace. Ignore dependencies.
730 if (!Pred)
731 return;
732 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
733 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
734 if (UseMI.getOperand(i + 1).getMBB() == Pred) {
735 Register Reg = UseMI.getOperand(i).getReg();
736 Deps.push_back(DataDep(MRI, Reg, i));
737 return;
742 // Identify physreg dependencies for UseMI, and update the live regunit
743 // tracking set when scanning instructions downwards.
744 static void updatePhysDepsDownwards(const MachineInstr *UseMI,
745 SmallVectorImpl<DataDep> &Deps,
746 SparseSet<LiveRegUnit> &RegUnits,
747 const TargetRegisterInfo *TRI) {
748 SmallVector<MCRegister, 8> Kills;
749 SmallVector<unsigned, 8> LiveDefOps;
751 for (const MachineOperand &MO : UseMI->operands()) {
752 if (!MO.isReg() || !MO.getReg().isPhysical())
753 continue;
754 MCRegister Reg = MO.getReg().asMCReg();
755 // Track live defs and kills for updating RegUnits.
756 if (MO.isDef()) {
757 if (MO.isDead())
758 Kills.push_back(Reg);
759 else
760 LiveDefOps.push_back(MO.getOperandNo());
761 } else if (MO.isKill())
762 Kills.push_back(Reg);
763 // Identify dependencies.
764 if (!MO.readsReg())
765 continue;
766 for (MCRegUnit Unit : TRI->regunits(Reg)) {
767 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
768 if (I == RegUnits.end())
769 continue;
770 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
771 break;
775 // Update RegUnits to reflect live registers after UseMI.
776 // First kills.
777 for (MCRegister Kill : Kills)
778 for (MCRegUnit Unit : TRI->regunits(Kill))
779 RegUnits.erase(Unit);
781 // Second, live defs.
782 for (unsigned DefOp : LiveDefOps) {
783 for (MCRegUnit Unit :
784 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
785 LiveRegUnit &LRU = RegUnits[Unit];
786 LRU.MI = UseMI;
787 LRU.Op = DefOp;
792 /// The length of the critical path through a trace is the maximum of two path
793 /// lengths:
795 /// 1. The maximum height+depth over all instructions in the trace center block.
797 /// 2. The longest cross-block dependency chain. For small blocks, it is
798 /// possible that the critical path through the trace doesn't include any
799 /// instructions in the block.
801 /// This function computes the second number from the live-in list of the
802 /// center block.
803 unsigned MachineTraceMetrics::Ensemble::
804 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
805 assert(TBI.HasValidInstrDepths && "Missing depth info");
806 assert(TBI.HasValidInstrHeights && "Missing height info");
807 unsigned MaxLen = 0;
808 for (const LiveInReg &LIR : TBI.LiveIns) {
809 if (!LIR.Reg.isVirtual())
810 continue;
811 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
812 // Ignore dependencies outside the current trace.
813 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
814 if (!DefTBI.isUsefulDominator(TBI))
815 continue;
816 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
817 MaxLen = std::max(MaxLen, Len);
819 return MaxLen;
822 void MachineTraceMetrics::Ensemble::
823 updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI,
824 SparseSet<LiveRegUnit> &RegUnits) {
825 SmallVector<DataDep, 8> Deps;
826 // Collect all data dependencies.
827 if (UseMI.isPHI())
828 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
829 else if (getDataDeps(UseMI, Deps, MTM.MRI))
830 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
832 // Filter and process dependencies, computing the earliest issue cycle.
833 unsigned Cycle = 0;
834 for (const DataDep &Dep : Deps) {
835 const TraceBlockInfo&DepTBI =
836 BlockInfo[Dep.DefMI->getParent()->getNumber()];
837 // Ignore dependencies from outside the current trace.
838 if (!DepTBI.isUsefulDominator(TBI))
839 continue;
840 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
841 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
842 // Add latency if DefMI is a real instruction. Transients get latency 0.
843 if (!Dep.DefMI->isTransient())
844 DepCycle += MTM.SchedModel
845 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
846 Cycle = std::max(Cycle, DepCycle);
848 // Remember the instruction depth.
849 InstrCycles &MICycles = Cycles[&UseMI];
850 MICycles.Depth = Cycle;
852 if (TBI.HasValidInstrHeights) {
853 // Update critical path length.
854 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
855 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
856 } else {
857 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
861 void MachineTraceMetrics::Ensemble::
862 updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI,
863 SparseSet<LiveRegUnit> &RegUnits) {
864 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
867 void MachineTraceMetrics::Ensemble::
868 updateDepths(MachineBasicBlock::iterator Start,
869 MachineBasicBlock::iterator End,
870 SparseSet<LiveRegUnit> &RegUnits) {
871 for (; Start != End; Start++)
872 updateDepth(Start->getParent(), *Start, RegUnits);
875 /// Compute instruction depths for all instructions above or in MBB in its
876 /// trace. This assumes that the trace through MBB has already been computed.
877 void MachineTraceMetrics::Ensemble::
878 computeInstrDepths(const MachineBasicBlock *MBB) {
879 // The top of the trace may already be computed, and HasValidInstrDepths
880 // implies Head->HasValidInstrDepths, so we only need to start from the first
881 // block in the trace that needs to be recomputed.
882 SmallVector<const MachineBasicBlock*, 8> Stack;
883 do {
884 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
885 assert(TBI.hasValidDepth() && "Incomplete trace");
886 if (TBI.HasValidInstrDepths)
887 break;
888 Stack.push_back(MBB);
889 MBB = TBI.Pred;
890 } while (MBB);
892 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
893 // in the trace. We should track any live-out physregs that were defined in
894 // the trace. This is quite rare in SSA form, typically created by CSE
895 // hoisting a compare.
896 SparseSet<LiveRegUnit> RegUnits;
897 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
899 // Go through trace blocks in top-down order, stopping after the center block.
900 while (!Stack.empty()) {
901 MBB = Stack.pop_back_val();
902 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
903 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
904 TBI.HasValidInstrDepths = true;
905 TBI.CriticalPath = 0;
907 // Print out resource depths here as well.
908 LLVM_DEBUG({
909 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
910 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
911 for (unsigned K = 0; K != PRDepths.size(); ++K)
912 if (PRDepths[K]) {
913 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
914 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
915 << MTM.SchedModel.getProcResource(K)->Name << " ("
916 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
920 // Also compute the critical path length through MBB when possible.
921 if (TBI.HasValidInstrHeights)
922 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
924 for (const auto &UseMI : *MBB) {
925 updateDepth(TBI, UseMI, RegUnits);
930 // Identify physreg dependencies for MI when scanning instructions upwards.
931 // Return the issue height of MI after considering any live regunits.
932 // Height is the issue height computed from virtual register dependencies alone.
933 static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
934 SparseSet<LiveRegUnit> &RegUnits,
935 const TargetSchedModel &SchedModel,
936 const TargetInstrInfo *TII,
937 const TargetRegisterInfo *TRI) {
938 SmallVector<unsigned, 8> ReadOps;
940 for (const MachineOperand &MO : MI.operands()) {
941 if (!MO.isReg())
942 continue;
943 Register Reg = MO.getReg();
944 if (!Reg.isPhysical())
945 continue;
946 if (MO.readsReg())
947 ReadOps.push_back(MO.getOperandNo());
948 if (!MO.isDef())
949 continue;
950 // This is a def of Reg. Remove corresponding entries from RegUnits, and
951 // update MI Height to consider the physreg dependencies.
952 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
953 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
954 if (I == RegUnits.end())
955 continue;
956 unsigned DepHeight = I->Cycle;
957 if (!MI.isTransient()) {
958 // We may not know the UseMI of this dependency, if it came from the
959 // live-in list. SchedModel can handle a NULL UseMI.
960 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
961 I->MI, I->Op);
963 Height = std::max(Height, DepHeight);
964 // This regunit is dead above MI.
965 RegUnits.erase(I);
969 // Now we know the height of MI. Update any regunits read.
970 for (unsigned Op : ReadOps) {
971 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
972 for (MCRegUnit Unit : TRI->regunits(Reg)) {
973 LiveRegUnit &LRU = RegUnits[Unit];
974 // Set the height to the highest reader of the unit.
975 if (LRU.Cycle <= Height && LRU.MI != &MI) {
976 LRU.Cycle = Height;
977 LRU.MI = &MI;
978 LRU.Op = Op;
983 return Height;
986 using MIHeightMap = DenseMap<const MachineInstr *, unsigned>;
988 // Push the height of DefMI upwards if required to match UseMI.
989 // Return true if this is the first time DefMI was seen.
990 static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
991 unsigned UseHeight, MIHeightMap &Heights,
992 const TargetSchedModel &SchedModel,
993 const TargetInstrInfo *TII) {
994 // Adjust height by Dep.DefMI latency.
995 if (!Dep.DefMI->isTransient())
996 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
997 Dep.UseOp);
999 // Update Heights[DefMI] to be the maximum height seen.
1000 MIHeightMap::iterator I;
1001 bool New;
1002 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
1003 if (New)
1004 return true;
1006 // DefMI has been pushed before. Give it the max height.
1007 if (I->second < UseHeight)
1008 I->second = UseHeight;
1009 return false;
1012 /// Assuming that the virtual register defined by DefMI:DefOp was used by
1013 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
1014 /// when reaching the block that contains DefMI.
1015 void MachineTraceMetrics::Ensemble::
1016 addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
1017 ArrayRef<const MachineBasicBlock*> Trace) {
1018 assert(!Trace.empty() && "Trace should contain at least one block");
1019 Register Reg = DefMI->getOperand(DefOp).getReg();
1020 assert(Reg.isVirtual());
1021 const MachineBasicBlock *DefMBB = DefMI->getParent();
1023 // Reg is live-in to all blocks in Trace that follow DefMBB.
1024 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
1025 if (MBB == DefMBB)
1026 return;
1027 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1028 // Just add the register. The height will be updated later.
1029 TBI.LiveIns.push_back(Reg);
1033 /// Compute instruction heights in the trace through MBB. This updates MBB and
1034 /// the blocks below it in the trace. It is assumed that the trace has already
1035 /// been computed.
1036 void MachineTraceMetrics::Ensemble::
1037 computeInstrHeights(const MachineBasicBlock *MBB) {
1038 // The bottom of the trace may already be computed.
1039 // Find the blocks that need updating.
1040 SmallVector<const MachineBasicBlock*, 8> Stack;
1041 do {
1042 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1043 assert(TBI.hasValidHeight() && "Incomplete trace");
1044 if (TBI.HasValidInstrHeights)
1045 break;
1046 Stack.push_back(MBB);
1047 TBI.LiveIns.clear();
1048 MBB = TBI.Succ;
1049 } while (MBB);
1051 // As we move upwards in the trace, keep track of instructions that are
1052 // required by deeper trace instructions. Map MI -> height required so far.
1053 MIHeightMap Heights;
1055 // For physregs, the def isn't known when we see the use.
1056 // Instead, keep track of the highest use of each regunit.
1057 SparseSet<LiveRegUnit> RegUnits;
1058 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1060 // If the bottom of the trace was already precomputed, initialize heights
1061 // from its live-in list.
1062 // MBB is the highest precomputed block in the trace.
1063 if (MBB) {
1064 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1065 for (LiveInReg &LI : TBI.LiveIns) {
1066 if (LI.Reg.isVirtual()) {
1067 // For virtual registers, the def latency is included.
1068 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1069 if (Height < LI.Height)
1070 Height = LI.Height;
1071 } else {
1072 // For register units, the def latency is not included because we don't
1073 // know the def yet.
1074 RegUnits[LI.Reg].Cycle = LI.Height;
1079 // Go through the trace blocks in bottom-up order.
1080 SmallVector<DataDep, 8> Deps;
1081 for (;!Stack.empty(); Stack.pop_back()) {
1082 MBB = Stack.back();
1083 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1084 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1085 TBI.HasValidInstrHeights = true;
1086 TBI.CriticalPath = 0;
1088 LLVM_DEBUG({
1089 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1090 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1091 for (unsigned K = 0; K != PRHeights.size(); ++K)
1092 if (PRHeights[K]) {
1093 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1094 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1095 << MTM.SchedModel.getProcResource(K)->Name << " ("
1096 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1100 // Get dependencies from PHIs in the trace successor.
1101 const MachineBasicBlock *Succ = TBI.Succ;
1102 // If MBB is the last block in the trace, and it has a back-edge to the
1103 // loop header, get loop-carried dependencies from PHIs in the header. For
1104 // that purpose, pretend that all the loop header PHIs have height 0.
1105 if (!Succ)
1106 if (const MachineLoop *Loop = getLoopFor(MBB))
1107 if (MBB->isSuccessor(Loop->getHeader()))
1108 Succ = Loop->getHeader();
1110 if (Succ) {
1111 for (const auto &PHI : *Succ) {
1112 if (!PHI.isPHI())
1113 break;
1114 Deps.clear();
1115 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1116 if (!Deps.empty()) {
1117 // Loop header PHI heights are all 0.
1118 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1119 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1120 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1121 MTM.TII))
1122 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1127 // Go through the block backwards.
1128 for (const MachineInstr &MI : reverse(*MBB)) {
1129 // Find the MI height as determined by virtual register uses in the
1130 // trace below.
1131 unsigned Cycle = 0;
1132 MIHeightMap::iterator HeightI = Heights.find(&MI);
1133 if (HeightI != Heights.end()) {
1134 Cycle = HeightI->second;
1135 // We won't be seeing any more MI uses.
1136 Heights.erase(HeightI);
1139 // Don't process PHI deps. They depend on the specific predecessor, and
1140 // we'll get them when visiting the predecessor.
1141 Deps.clear();
1142 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1144 // There may also be regunit dependencies to include in the height.
1145 if (HasPhysRegs)
1146 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1147 MTM.TII, MTM.TRI);
1149 // Update the required height of any virtual registers read by MI.
1150 for (const DataDep &Dep : Deps)
1151 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1152 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1154 InstrCycles &MICycles = Cycles[&MI];
1155 MICycles.Height = Cycle;
1156 if (!TBI.HasValidInstrDepths) {
1157 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1158 continue;
1160 // Update critical path length.
1161 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1162 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1165 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1166 // height because the final height isn't known until now.
1167 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1168 for (LiveInReg &LIR : TBI.LiveIns) {
1169 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1170 LIR.Height = Heights.lookup(DefMI);
1171 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1174 // Transfer the live regunits to the live-in list.
1175 for (const LiveRegUnit &RU : RegUnits) {
1176 TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
1177 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1178 << RU.Cycle);
1180 LLVM_DEBUG(dbgs() << '\n');
1182 if (!TBI.HasValidInstrDepths)
1183 continue;
1184 // Add live-ins to the critical path length.
1185 TBI.CriticalPath = std::max(TBI.CriticalPath,
1186 computeCrossBlockCriticalPath(TBI));
1187 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1191 MachineTraceMetrics::Trace
1192 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
1193 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1195 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1196 computeTrace(MBB);
1197 if (!TBI.HasValidInstrDepths)
1198 computeInstrDepths(MBB);
1199 if (!TBI.HasValidInstrHeights)
1200 computeInstrHeights(MBB);
1202 return Trace(*this, TBI);
1205 unsigned
1206 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const {
1207 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1208 "MI must be in the trace center block");
1209 InstrCycles Cyc = getInstrCycles(MI);
1210 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1213 unsigned
1214 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const {
1215 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1216 SmallVector<DataDep, 1> Deps;
1217 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1218 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1219 DataDep &Dep = Deps.front();
1220 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1221 // Add latency if DefMI is a real instruction. Transients get latency 0.
1222 if (!Dep.DefMI->isTransient())
1223 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1224 &PHI, Dep.UseOp);
1225 return DepCycle;
1228 /// When bottom is set include instructions in current block in estimate.
1229 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
1230 // Find the limiting processor resource.
1231 // Numbers have been pre-scaled to be comparable.
1232 unsigned PRMax = 0;
1233 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1234 if (Bottom) {
1235 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1236 for (unsigned K = 0; K != PRDepths.size(); ++K)
1237 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1238 } else {
1239 for (unsigned PRD : PRDepths)
1240 PRMax = std::max(PRMax, PRD);
1242 // Convert to cycle count.
1243 PRMax = TE.MTM.getCycles(PRMax);
1245 /// All instructions before current block
1246 unsigned Instrs = TBI.InstrDepth;
1247 // plus instructions in current block
1248 if (Bottom)
1249 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1250 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1251 Instrs /= IW;
1252 // Assume issue width 1 without a schedule model.
1253 return std::max(Instrs, PRMax);
1256 unsigned MachineTraceMetrics::Trace::getResourceLength(
1257 ArrayRef<const MachineBasicBlock *> Extrablocks,
1258 ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
1259 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1260 // Add up resources above and below the center block.
1261 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1262 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1263 unsigned PRMax = 0;
1265 // Capture computing cycles from extra instructions
1266 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1267 unsigned ResourceIdx)
1268 ->unsigned {
1269 unsigned Cycles = 0;
1270 for (const MCSchedClassDesc *SC : Instrs) {
1271 if (!SC->isValid())
1272 continue;
1273 for (TargetSchedModel::ProcResIter
1274 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1275 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1276 PI != PE; ++PI) {
1277 if (PI->ProcResourceIdx != ResourceIdx)
1278 continue;
1279 Cycles += (PI->ReleaseAtCycle *
1280 TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1283 return Cycles;
1286 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1287 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1288 for (const MachineBasicBlock *MBB : Extrablocks)
1289 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1290 PRCycles += extraCycles(ExtraInstrs, K);
1291 PRCycles -= extraCycles(RemoveInstrs, K);
1292 PRMax = std::max(PRMax, PRCycles);
1294 // Convert to cycle count.
1295 PRMax = TE.MTM.getCycles(PRMax);
1297 // Instrs: #instructions in current trace outside current block.
1298 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1299 // Add instruction count from the extra blocks.
1300 for (const MachineBasicBlock *MBB : Extrablocks)
1301 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1302 Instrs += ExtraInstrs.size();
1303 Instrs -= RemoveInstrs.size();
1304 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1305 Instrs /= IW;
1306 // Assume issue width 1 without a schedule model.
1307 return std::max(Instrs, PRMax);
1310 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI,
1311 const MachineInstr &UseMI) const {
1312 if (DefMI.getParent() == UseMI.getParent())
1313 return true;
1315 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1316 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1318 return DepTBI.isUsefulDominator(TBI);
1321 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
1322 OS << getName() << " ensemble:\n";
1323 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1324 OS << " %bb." << i << '\t';
1325 BlockInfo[i].print(OS);
1326 OS << '\n';
1330 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
1331 if (hasValidDepth()) {
1332 OS << "depth=" << InstrDepth;
1333 if (Pred)
1334 OS << " pred=" << printMBBReference(*Pred);
1335 else
1336 OS << " pred=null";
1337 OS << " head=%bb." << Head;
1338 if (HasValidInstrDepths)
1339 OS << " +instrs";
1340 } else
1341 OS << "depth invalid";
1342 OS << ", ";
1343 if (hasValidHeight()) {
1344 OS << "height=" << InstrHeight;
1345 if (Succ)
1346 OS << " succ=" << printMBBReference(*Succ);
1347 else
1348 OS << " succ=null";
1349 OS << " tail=%bb." << Tail;
1350 if (HasValidInstrHeights)
1351 OS << " +instrs";
1352 } else
1353 OS << "height invalid";
1354 if (HasValidInstrDepths && HasValidInstrHeights)
1355 OS << ", crit=" << CriticalPath;
1358 void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
1359 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1361 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1362 << " --> %bb." << TBI.Tail << ':';
1363 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1364 OS << ' ' << getInstrCount() << " instrs.";
1365 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1366 OS << ' ' << TBI.CriticalPath << " cycles.";
1368 const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
1369 OS << "\n%bb." << MBBNum;
1370 while (Block->hasValidDepth() && Block->Pred) {
1371 unsigned Num = Block->Pred->getNumber();
1372 OS << " <- " << printMBBReference(*Block->Pred);
1373 Block = &TE.BlockInfo[Num];
1376 Block = &TBI;
1377 OS << "\n ";
1378 while (Block->hasValidHeight() && Block->Succ) {
1379 unsigned Num = Block->Succ->getNumber();
1380 OS << " -> " << printMBBReference(*Block->Succ);
1381 Block = &TE.BlockInfo[Num];
1383 OS << '\n';