1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/MachineTraceMetrics.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/PostOrderIterator.h"
13 #include "llvm/ADT/SmallPtrSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/SparseSet.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineLoopInfo.h"
20 #include "llvm/CodeGen/MachineOperand.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/InitializePasses.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Format.h"
30 #include "llvm/Support/raw_ostream.h"
38 #define DEBUG_TYPE "machine-trace-metrics"
40 AnalysisKey
MachineTraceMetricsAnalysis::Key
;
42 MachineTraceMetricsAnalysis::Result
43 MachineTraceMetricsAnalysis::run(MachineFunction
&MF
,
44 MachineFunctionAnalysisManager
&MFAM
) {
45 return Result(MF
, MFAM
.getResult
<MachineLoopAnalysis
>(MF
));
49 MachineTraceMetricsVerifierPass::run(MachineFunction
&MF
,
50 MachineFunctionAnalysisManager
&MFAM
) {
51 MFAM
.getResult
<MachineTraceMetricsAnalysis
>(MF
).verifyAnalysis();
52 return PreservedAnalyses::all();
55 char MachineTraceMetricsWrapperPass::ID
= 0;
57 char &llvm::MachineTraceMetricsID
= MachineTraceMetricsWrapperPass::ID
;
59 INITIALIZE_PASS_BEGIN(MachineTraceMetricsWrapperPass
, DEBUG_TYPE
,
60 "Machine Trace Metrics", false, true)
61 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass
)
62 INITIALIZE_PASS_END(MachineTraceMetricsWrapperPass
, DEBUG_TYPE
,
63 "Machine Trace Metrics", false, true)
65 MachineTraceMetricsWrapperPass::MachineTraceMetricsWrapperPass()
66 : MachineFunctionPass(ID
) {}
68 void MachineTraceMetricsWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
70 AU
.addRequired
<MachineLoopInfoWrapperPass
>();
71 MachineFunctionPass::getAnalysisUsage(AU
);
74 void MachineTraceMetrics::init(MachineFunction
&Func
,
75 const MachineLoopInfo
&LI
) {
77 const TargetSubtargetInfo
&ST
= MF
->getSubtarget();
78 TII
= ST
.getInstrInfo();
79 TRI
= ST
.getRegisterInfo();
80 MRI
= &MF
->getRegInfo();
83 BlockInfo
.resize(MF
->getNumBlockIDs());
84 ProcReleaseAtCycles
.resize(MF
->getNumBlockIDs() *
85 SchedModel
.getNumProcResourceKinds());
88 bool MachineTraceMetricsWrapperPass::runOnMachineFunction(MachineFunction
&MF
) {
89 MTM
.init(MF
, getAnalysis
<MachineLoopInfoWrapperPass
>().getLI());
93 MachineTraceMetrics::~MachineTraceMetrics() { clear(); }
95 void MachineTraceMetrics::clear() {
98 for (auto &E
: Ensembles
)
102 //===----------------------------------------------------------------------===//
103 // Fixed block information
104 //===----------------------------------------------------------------------===//
106 // The number of instructions in a basic block and the CPU resources used by
107 // those instructions don't depend on any given trace strategy.
109 /// Compute the resource usage in basic block MBB.
110 const MachineTraceMetrics::FixedBlockInfo
*
111 MachineTraceMetrics::getResources(const MachineBasicBlock
*MBB
) {
112 assert(MBB
&& "No basic block");
113 FixedBlockInfo
*FBI
= &BlockInfo
[MBB
->getNumber()];
114 if (FBI
->hasResources())
117 // Compute resource usage in the block.
118 FBI
->HasCalls
= false;
119 unsigned InstrCount
= 0;
121 // Add up per-processor resource cycles as well.
122 unsigned PRKinds
= SchedModel
.getNumProcResourceKinds();
123 SmallVector
<unsigned, 32> PRCycles(PRKinds
);
125 for (const auto &MI
: *MBB
) {
126 if (MI
.isTransient())
130 FBI
->HasCalls
= true;
132 // Count processor resources used.
133 if (!SchedModel
.hasInstrSchedModel())
135 const MCSchedClassDesc
*SC
= SchedModel
.resolveSchedClass(&MI
);
139 for (TargetSchedModel::ProcResIter
140 PI
= SchedModel
.getWriteProcResBegin(SC
),
141 PE
= SchedModel
.getWriteProcResEnd(SC
); PI
!= PE
; ++PI
) {
142 assert(PI
->ProcResourceIdx
< PRKinds
&& "Bad processor resource kind");
143 PRCycles
[PI
->ProcResourceIdx
] += PI
->ReleaseAtCycle
;
146 FBI
->InstrCount
= InstrCount
;
148 // Scale the resource cycles so they are comparable.
149 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
150 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
151 ProcReleaseAtCycles
[PROffset
+ K
] =
152 PRCycles
[K
] * SchedModel
.getResourceFactor(K
);
158 MachineTraceMetrics::getProcReleaseAtCycles(unsigned MBBNum
) const {
159 assert(BlockInfo
[MBBNum
].hasResources() &&
160 "getResources() must be called before getProcReleaseAtCycles()");
161 unsigned PRKinds
= SchedModel
.getNumProcResourceKinds();
162 assert((MBBNum
+1) * PRKinds
<= ProcReleaseAtCycles
.size());
163 return ArrayRef(ProcReleaseAtCycles
.data() + MBBNum
* PRKinds
, PRKinds
);
166 //===----------------------------------------------------------------------===//
167 // Ensemble utility functions
168 //===----------------------------------------------------------------------===//
170 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics
*ct
)
172 BlockInfo
.resize(MTM
.BlockInfo
.size());
173 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
174 ProcResourceDepths
.resize(MTM
.BlockInfo
.size() * PRKinds
);
175 ProcResourceHeights
.resize(MTM
.BlockInfo
.size() * PRKinds
);
178 // Virtual destructor serves as an anchor.
179 MachineTraceMetrics::Ensemble::~Ensemble() = default;
182 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock
*MBB
) const {
183 return MTM
.Loops
->getLoopFor(MBB
);
186 // Update resource-related information in the TraceBlockInfo for MBB.
187 // Only update resources related to the trace above MBB.
188 void MachineTraceMetrics::Ensemble::
189 computeDepthResources(const MachineBasicBlock
*MBB
) {
190 TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
191 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
192 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
194 // Compute resources from trace above. The top block is simple.
197 TBI
->Head
= MBB
->getNumber();
198 std::fill(ProcResourceDepths
.begin() + PROffset
,
199 ProcResourceDepths
.begin() + PROffset
+ PRKinds
, 0);
203 // Compute from the block above. A post-order traversal ensures the
204 // predecessor is always computed first.
205 unsigned PredNum
= TBI
->Pred
->getNumber();
206 TraceBlockInfo
*PredTBI
= &BlockInfo
[PredNum
];
207 assert(PredTBI
->hasValidDepth() && "Trace above has not been computed yet");
208 const FixedBlockInfo
*PredFBI
= MTM
.getResources(TBI
->Pred
);
209 TBI
->InstrDepth
= PredTBI
->InstrDepth
+ PredFBI
->InstrCount
;
210 TBI
->Head
= PredTBI
->Head
;
212 // Compute per-resource depths.
213 ArrayRef
<unsigned> PredPRDepths
= getProcResourceDepths(PredNum
);
214 ArrayRef
<unsigned> PredPRCycles
= MTM
.getProcReleaseAtCycles(PredNum
);
215 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
216 ProcResourceDepths
[PROffset
+ K
] = PredPRDepths
[K
] + PredPRCycles
[K
];
219 // Update resource-related information in the TraceBlockInfo for MBB.
220 // Only update resources related to the trace below MBB.
221 void MachineTraceMetrics::Ensemble::
222 computeHeightResources(const MachineBasicBlock
*MBB
) {
223 TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
224 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
225 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
227 // Compute resources for the current block.
228 TBI
->InstrHeight
= MTM
.getResources(MBB
)->InstrCount
;
229 ArrayRef
<unsigned> PRCycles
= MTM
.getProcReleaseAtCycles(MBB
->getNumber());
231 // The trace tail is done.
233 TBI
->Tail
= MBB
->getNumber();
234 llvm::copy(PRCycles
, ProcResourceHeights
.begin() + PROffset
);
238 // Compute from the block below. A post-order traversal ensures the
239 // predecessor is always computed first.
240 unsigned SuccNum
= TBI
->Succ
->getNumber();
241 TraceBlockInfo
*SuccTBI
= &BlockInfo
[SuccNum
];
242 assert(SuccTBI
->hasValidHeight() && "Trace below has not been computed yet");
243 TBI
->InstrHeight
+= SuccTBI
->InstrHeight
;
244 TBI
->Tail
= SuccTBI
->Tail
;
246 // Compute per-resource heights.
247 ArrayRef
<unsigned> SuccPRHeights
= getProcResourceHeights(SuccNum
);
248 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
249 ProcResourceHeights
[PROffset
+ K
] = SuccPRHeights
[K
] + PRCycles
[K
];
252 // Check if depth resources for MBB are valid and return the TBI.
253 // Return NULL if the resources have been invalidated.
254 const MachineTraceMetrics::TraceBlockInfo
*
255 MachineTraceMetrics::Ensemble::
256 getDepthResources(const MachineBasicBlock
*MBB
) const {
257 const TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
258 return TBI
->hasValidDepth() ? TBI
: nullptr;
261 // Check if height resources for MBB are valid and return the TBI.
262 // Return NULL if the resources have been invalidated.
263 const MachineTraceMetrics::TraceBlockInfo
*
264 MachineTraceMetrics::Ensemble::
265 getHeightResources(const MachineBasicBlock
*MBB
) const {
266 const TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
267 return TBI
->hasValidHeight() ? TBI
: nullptr;
270 /// Get an array of processor resource depths for MBB. Indexed by processor
271 /// resource kind, this array contains the scaled processor resources consumed
272 /// by all blocks preceding MBB in its trace. It does not include instructions
275 /// Compare TraceBlockInfo::InstrDepth.
277 MachineTraceMetrics::Ensemble::
278 getProcResourceDepths(unsigned MBBNum
) const {
279 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
280 assert((MBBNum
+1) * PRKinds
<= ProcResourceDepths
.size());
281 return ArrayRef(ProcResourceDepths
.data() + MBBNum
* PRKinds
, PRKinds
);
284 /// Get an array of processor resource heights for MBB. Indexed by processor
285 /// resource kind, this array contains the scaled processor resources consumed
286 /// by this block and all blocks following it in its trace.
288 /// Compare TraceBlockInfo::InstrHeight.
290 MachineTraceMetrics::Ensemble::
291 getProcResourceHeights(unsigned MBBNum
) const {
292 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
293 assert((MBBNum
+1) * PRKinds
<= ProcResourceHeights
.size());
294 return ArrayRef(ProcResourceHeights
.data() + MBBNum
* PRKinds
, PRKinds
);
297 //===----------------------------------------------------------------------===//
298 // Trace Selection Strategies
299 //===----------------------------------------------------------------------===//
301 // A trace selection strategy is implemented as a sub-class of Ensemble. The
302 // trace through a block B is computed by two DFS traversals of the CFG
303 // starting from B. One upwards, and one downwards. During the upwards DFS,
304 // pickTracePred() is called on the post-ordered blocks. During the downwards
305 // DFS, pickTraceSucc() is called in a post-order.
308 // We never allow traces that leave loops, but we do allow traces to enter
309 // nested loops. We also never allow traces to contain back-edges.
311 // This means that a loop header can never appear above the center block of a
312 // trace, except as the trace head. Below the center block, loop exiting edges
315 // Return true if an edge from the From loop to the To loop is leaving a loop.
316 // Either of To and From can be null.
317 static bool isExitingLoop(const MachineLoop
*From
, const MachineLoop
*To
) {
318 return From
&& !From
->contains(To
);
321 // MinInstrCountEnsemble - Pick the trace that executes the least number of
325 class MinInstrCountEnsemble
: public MachineTraceMetrics::Ensemble
{
326 const char *getName() const override
{ return "MinInstr"; }
327 const MachineBasicBlock
*pickTracePred(const MachineBasicBlock
*) override
;
328 const MachineBasicBlock
*pickTraceSucc(const MachineBasicBlock
*) override
;
331 MinInstrCountEnsemble(MachineTraceMetrics
*mtm
)
332 : MachineTraceMetrics::Ensemble(mtm
) {}
335 /// Pick only the current basic block for the trace and do not choose any
336 /// predecessors/successors.
337 class LocalEnsemble
: public MachineTraceMetrics::Ensemble
{
338 const char *getName() const override
{ return "Local"; }
339 const MachineBasicBlock
*pickTracePred(const MachineBasicBlock
*) override
{
342 const MachineBasicBlock
*pickTraceSucc(const MachineBasicBlock
*) override
{
347 LocalEnsemble(MachineTraceMetrics
*MTM
)
348 : MachineTraceMetrics::Ensemble(MTM
) {}
350 } // end anonymous namespace
352 // Select the preferred predecessor for MBB.
353 const MachineBasicBlock
*
354 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock
*MBB
) {
355 if (MBB
->pred_empty())
357 const MachineLoop
*CurLoop
= getLoopFor(MBB
);
358 // Don't leave loops, and never follow back-edges.
359 if (CurLoop
&& MBB
== CurLoop
->getHeader())
361 unsigned CurCount
= MTM
.getResources(MBB
)->InstrCount
;
362 const MachineBasicBlock
*Best
= nullptr;
363 unsigned BestDepth
= 0;
364 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
365 const MachineTraceMetrics::TraceBlockInfo
*PredTBI
=
366 getDepthResources(Pred
);
367 // Ignore cycles that aren't natural loops.
370 // Pick the predecessor that would give this block the smallest InstrDepth.
371 unsigned Depth
= PredTBI
->InstrDepth
+ CurCount
;
372 if (!Best
|| Depth
< BestDepth
) {
380 // Select the preferred successor for MBB.
381 const MachineBasicBlock
*
382 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock
*MBB
) {
383 if (MBB
->succ_empty())
385 const MachineLoop
*CurLoop
= getLoopFor(MBB
);
386 const MachineBasicBlock
*Best
= nullptr;
387 unsigned BestHeight
= 0;
388 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
389 // Don't consider back-edges.
390 if (CurLoop
&& Succ
== CurLoop
->getHeader())
392 // Don't consider successors exiting CurLoop.
393 if (isExitingLoop(CurLoop
, getLoopFor(Succ
)))
395 const MachineTraceMetrics::TraceBlockInfo
*SuccTBI
=
396 getHeightResources(Succ
);
397 // Ignore cycles that aren't natural loops.
400 // Pick the successor that would give this block the smallest InstrHeight.
401 unsigned Height
= SuccTBI
->InstrHeight
;
402 if (!Best
|| Height
< BestHeight
) {
410 // Get an Ensemble sub-class for the requested trace strategy.
411 MachineTraceMetrics::Ensemble
*
412 MachineTraceMetrics::getEnsemble(MachineTraceStrategy strategy
) {
413 assert(strategy
< MachineTraceStrategy::TS_NumStrategies
&&
414 "Invalid trace strategy enum");
415 std::unique_ptr
<MachineTraceMetrics::Ensemble
> &E
=
416 Ensembles
[static_cast<size_t>(strategy
)];
420 // Allocate new Ensemble on demand.
422 case MachineTraceStrategy::TS_MinInstrCount
:
423 E
= std::make_unique
<MinInstrCountEnsemble
>(MinInstrCountEnsemble(this));
425 case MachineTraceStrategy::TS_Local
:
426 E
= std::make_unique
<LocalEnsemble
>(LocalEnsemble(this));
428 default: llvm_unreachable("Invalid trace strategy enum");
433 void MachineTraceMetrics::invalidate(const MachineBasicBlock
*MBB
) {
434 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB
)
436 BlockInfo
[MBB
->getNumber()].invalidate();
437 for (auto &E
: Ensembles
)
442 bool MachineTraceMetrics::invalidate(
443 MachineFunction
&, const PreservedAnalyses
&PA
,
444 MachineFunctionAnalysisManager::Invalidator
&) {
445 // Check whether the analysis, all analyses on machine functions, or the
446 // machine function's CFG have been preserved.
447 auto PAC
= PA
.getChecker
<MachineTraceMetricsAnalysis
>();
448 return !PAC
.preserved() &&
449 !PAC
.preservedSet
<AllAnalysesOn
<MachineFunction
>>() &&
450 !PAC
.preservedSet
<CFGAnalyses
>();
453 void MachineTraceMetrics::verifyAnalysis() const {
457 assert(BlockInfo
.size() == MF
->getNumBlockIDs() && "Outdated BlockInfo size");
458 for (auto &E
: Ensembles
)
464 //===----------------------------------------------------------------------===//
466 //===----------------------------------------------------------------------===//
468 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
469 // set abstraction that confines the search to the current loop, and doesn't
475 MutableArrayRef
<MachineTraceMetrics::TraceBlockInfo
> Blocks
;
476 SmallPtrSet
<const MachineBasicBlock
*, 8> Visited
;
477 const MachineLoopInfo
*Loops
;
478 bool Downward
= false;
480 LoopBounds(MutableArrayRef
<MachineTraceMetrics::TraceBlockInfo
> blocks
,
481 const MachineLoopInfo
*loops
) : Blocks(blocks
), Loops(loops
) {}
484 } // end anonymous namespace
486 // Specialize po_iterator_storage in order to prune the post-order traversal so
487 // it is limited to the current loop and doesn't traverse the loop back edges.
491 class po_iterator_storage
<LoopBounds
, true> {
495 po_iterator_storage(LoopBounds
&lb
) : LB(lb
) {}
497 void finishPostorder(const MachineBasicBlock
*) {}
499 bool insertEdge(std::optional
<const MachineBasicBlock
*> From
,
500 const MachineBasicBlock
*To
) {
501 // Skip already visited To blocks.
502 MachineTraceMetrics::TraceBlockInfo
&TBI
= LB
.Blocks
[To
->getNumber()];
503 if (LB
.Downward
? TBI
.hasValidHeight() : TBI
.hasValidDepth())
505 // From is null once when To is the trace center block.
507 if (const MachineLoop
*FromLoop
= LB
.Loops
->getLoopFor(*From
)) {
508 // Don't follow backedges, don't leave FromLoop when going upwards.
509 if ((LB
.Downward
? To
: *From
) == FromLoop
->getHeader())
511 // Don't leave FromLoop.
512 if (isExitingLoop(FromLoop
, LB
.Loops
->getLoopFor(To
)))
516 // To is a new block. Mark the block as visited in case the CFG has cycles
517 // that MachineLoopInfo didn't recognize as a natural loop.
518 return LB
.Visited
.insert(To
).second
;
522 } // end namespace llvm
524 /// Compute the trace through MBB.
525 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock
*MBB
) {
526 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
527 << printMBBReference(*MBB
) << '\n');
528 // Set up loop bounds for the backwards post-order traversal.
529 LoopBounds
Bounds(BlockInfo
, MTM
.Loops
);
531 // Run an upwards post-order search for the trace start.
532 Bounds
.Downward
= false;
533 Bounds
.Visited
.clear();
534 for (const auto *I
: inverse_post_order_ext(MBB
, Bounds
)) {
535 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I
) << ": ");
536 TraceBlockInfo
&TBI
= BlockInfo
[I
->getNumber()];
537 // All the predecessors have been visited, pick the preferred one.
538 TBI
.Pred
= pickTracePred(I
);
541 dbgs() << printMBBReference(*TBI
.Pred
) << '\n';
545 // The trace leading to I is now known, compute the depth resources.
546 computeDepthResources(I
);
549 // Run a downwards post-order search for the trace end.
550 Bounds
.Downward
= true;
551 Bounds
.Visited
.clear();
552 for (const auto *I
: post_order_ext(MBB
, Bounds
)) {
553 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I
) << ": ");
554 TraceBlockInfo
&TBI
= BlockInfo
[I
->getNumber()];
555 // All the successors have been visited, pick the preferred one.
556 TBI
.Succ
= pickTraceSucc(I
);
559 dbgs() << printMBBReference(*TBI
.Succ
) << '\n';
563 // The trace leaving I is now known, compute the height resources.
564 computeHeightResources(I
);
568 /// Invalidate traces through BadMBB.
570 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock
*BadMBB
) {
571 SmallVector
<const MachineBasicBlock
*, 16> WorkList
;
572 TraceBlockInfo
&BadTBI
= BlockInfo
[BadMBB
->getNumber()];
574 // Invalidate height resources of blocks above MBB.
575 if (BadTBI
.hasValidHeight()) {
576 BadTBI
.invalidateHeight();
577 WorkList
.push_back(BadMBB
);
579 const MachineBasicBlock
*MBB
= WorkList
.pop_back_val();
580 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB
) << ' '
581 << getName() << " height.\n");
582 // Find any MBB predecessors that have MBB as their preferred successor.
583 // They are the only ones that need to be invalidated.
584 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
585 TraceBlockInfo
&TBI
= BlockInfo
[Pred
->getNumber()];
586 if (!TBI
.hasValidHeight())
588 if (TBI
.Succ
== MBB
) {
589 TBI
.invalidateHeight();
590 WorkList
.push_back(Pred
);
593 // Verify that TBI.Succ is actually a *I successor.
594 assert((!TBI
.Succ
|| Pred
->isSuccessor(TBI
.Succ
)) && "CFG changed");
596 } while (!WorkList
.empty());
599 // Invalidate depth resources of blocks below MBB.
600 if (BadTBI
.hasValidDepth()) {
601 BadTBI
.invalidateDepth();
602 WorkList
.push_back(BadMBB
);
604 const MachineBasicBlock
*MBB
= WorkList
.pop_back_val();
605 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB
) << ' '
606 << getName() << " depth.\n");
607 // Find any MBB successors that have MBB as their preferred predecessor.
608 // They are the only ones that need to be invalidated.
609 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
610 TraceBlockInfo
&TBI
= BlockInfo
[Succ
->getNumber()];
611 if (!TBI
.hasValidDepth())
613 if (TBI
.Pred
== MBB
) {
614 TBI
.invalidateDepth();
615 WorkList
.push_back(Succ
);
618 // Verify that TBI.Pred is actually a *I predecessor.
619 assert((!TBI
.Pred
|| Succ
->isPredecessor(TBI
.Pred
)) && "CFG changed");
621 } while (!WorkList
.empty());
624 // Clear any per-instruction data. We only have to do this for BadMBB itself
625 // because the instructions in that block may change. Other blocks may be
626 // invalidated, but their instructions will stay the same, so there is no
627 // need to erase the Cycle entries. They will be overwritten when we
629 for (const auto &I
: *BadMBB
)
633 void MachineTraceMetrics::Ensemble::verify() const {
635 assert(BlockInfo
.size() == MTM
.MF
->getNumBlockIDs() &&
636 "Outdated BlockInfo size");
637 for (unsigned Num
= 0, e
= BlockInfo
.size(); Num
!= e
; ++Num
) {
638 const TraceBlockInfo
&TBI
= BlockInfo
[Num
];
639 if (TBI
.hasValidDepth() && TBI
.Pred
) {
640 const MachineBasicBlock
*MBB
= MTM
.MF
->getBlockNumbered(Num
);
641 assert(MBB
->isPredecessor(TBI
.Pred
) && "CFG doesn't match trace");
642 assert(BlockInfo
[TBI
.Pred
->getNumber()].hasValidDepth() &&
643 "Trace is broken, depth should have been invalidated.");
644 const MachineLoop
*Loop
= getLoopFor(MBB
);
645 assert(!(Loop
&& MBB
== Loop
->getHeader()) && "Trace contains backedge");
647 if (TBI
.hasValidHeight() && TBI
.Succ
) {
648 const MachineBasicBlock
*MBB
= MTM
.MF
->getBlockNumbered(Num
);
649 assert(MBB
->isSuccessor(TBI
.Succ
) && "CFG doesn't match trace");
650 assert(BlockInfo
[TBI
.Succ
->getNumber()].hasValidHeight() &&
651 "Trace is broken, height should have been invalidated.");
652 const MachineLoop
*Loop
= getLoopFor(MBB
);
653 const MachineLoop
*SuccLoop
= getLoopFor(TBI
.Succ
);
654 assert(!(Loop
&& Loop
== SuccLoop
&& TBI
.Succ
== Loop
->getHeader()) &&
655 "Trace contains backedge");
661 //===----------------------------------------------------------------------===//
663 //===----------------------------------------------------------------------===//
665 // Compute the depth and height of each instruction based on data dependencies
666 // and instruction latencies. These cycle numbers assume that the CPU can issue
667 // an infinite number of instructions per cycle as long as their dependencies
670 // A data dependency is represented as a defining MI and operand numbers on the
671 // defining and using MI.
675 const MachineInstr
*DefMI
;
679 DataDep(const MachineInstr
*DefMI
, unsigned DefOp
, unsigned UseOp
)
680 : DefMI(DefMI
), DefOp(DefOp
), UseOp(UseOp
) {}
682 /// Create a DataDep from an SSA form virtual register.
683 DataDep(const MachineRegisterInfo
*MRI
, unsigned VirtReg
, unsigned UseOp
)
685 assert(Register::isVirtualRegister(VirtReg
));
686 MachineOperand
*DefMO
= MRI
->getOneDef(VirtReg
);
687 assert(DefMO
&& "Register does not have unique def");
688 DefMI
= DefMO
->getParent();
689 DefOp
= DefMO
->getOperandNo();
693 } // end anonymous namespace
695 // Get the input data dependencies that must be ready before UseMI can issue.
696 // Return true if UseMI has any physreg operands.
697 static bool getDataDeps(const MachineInstr
&UseMI
,
698 SmallVectorImpl
<DataDep
> &Deps
,
699 const MachineRegisterInfo
*MRI
) {
700 // Debug values should not be included in any calculations.
701 if (UseMI
.isDebugInstr())
704 bool HasPhysRegs
= false;
705 for (const MachineOperand
&MO
: UseMI
.operands()) {
708 Register Reg
= MO
.getReg();
711 if (Reg
.isPhysical()) {
715 // Collect virtual register reads.
717 Deps
.push_back(DataDep(MRI
, Reg
, MO
.getOperandNo()));
722 // Get the input data dependencies of a PHI instruction, using Pred as the
723 // preferred predecessor.
724 // This will add at most one dependency to Deps.
725 static void getPHIDeps(const MachineInstr
&UseMI
,
726 SmallVectorImpl
<DataDep
> &Deps
,
727 const MachineBasicBlock
*Pred
,
728 const MachineRegisterInfo
*MRI
) {
729 // No predecessor at the beginning of a trace. Ignore dependencies.
732 assert(UseMI
.isPHI() && UseMI
.getNumOperands() % 2 && "Bad PHI");
733 for (unsigned i
= 1; i
!= UseMI
.getNumOperands(); i
+= 2) {
734 if (UseMI
.getOperand(i
+ 1).getMBB() == Pred
) {
735 Register Reg
= UseMI
.getOperand(i
).getReg();
736 Deps
.push_back(DataDep(MRI
, Reg
, i
));
742 // Identify physreg dependencies for UseMI, and update the live regunit
743 // tracking set when scanning instructions downwards.
744 static void updatePhysDepsDownwards(const MachineInstr
*UseMI
,
745 SmallVectorImpl
<DataDep
> &Deps
,
746 SparseSet
<LiveRegUnit
> &RegUnits
,
747 const TargetRegisterInfo
*TRI
) {
748 SmallVector
<MCRegister
, 8> Kills
;
749 SmallVector
<unsigned, 8> LiveDefOps
;
751 for (const MachineOperand
&MO
: UseMI
->operands()) {
752 if (!MO
.isReg() || !MO
.getReg().isPhysical())
754 MCRegister Reg
= MO
.getReg().asMCReg();
755 // Track live defs and kills for updating RegUnits.
758 Kills
.push_back(Reg
);
760 LiveDefOps
.push_back(MO
.getOperandNo());
761 } else if (MO
.isKill())
762 Kills
.push_back(Reg
);
763 // Identify dependencies.
766 for (MCRegUnit Unit
: TRI
->regunits(Reg
)) {
767 SparseSet
<LiveRegUnit
>::iterator I
= RegUnits
.find(Unit
);
768 if (I
== RegUnits
.end())
770 Deps
.push_back(DataDep(I
->MI
, I
->Op
, MO
.getOperandNo()));
775 // Update RegUnits to reflect live registers after UseMI.
777 for (MCRegister Kill
: Kills
)
778 for (MCRegUnit Unit
: TRI
->regunits(Kill
))
779 RegUnits
.erase(Unit
);
781 // Second, live defs.
782 for (unsigned DefOp
: LiveDefOps
) {
783 for (MCRegUnit Unit
:
784 TRI
->regunits(UseMI
->getOperand(DefOp
).getReg().asMCReg())) {
785 LiveRegUnit
&LRU
= RegUnits
[Unit
];
792 /// The length of the critical path through a trace is the maximum of two path
795 /// 1. The maximum height+depth over all instructions in the trace center block.
797 /// 2. The longest cross-block dependency chain. For small blocks, it is
798 /// possible that the critical path through the trace doesn't include any
799 /// instructions in the block.
801 /// This function computes the second number from the live-in list of the
803 unsigned MachineTraceMetrics::Ensemble::
804 computeCrossBlockCriticalPath(const TraceBlockInfo
&TBI
) {
805 assert(TBI
.HasValidInstrDepths
&& "Missing depth info");
806 assert(TBI
.HasValidInstrHeights
&& "Missing height info");
808 for (const LiveInReg
&LIR
: TBI
.LiveIns
) {
809 if (!LIR
.Reg
.isVirtual())
811 const MachineInstr
*DefMI
= MTM
.MRI
->getVRegDef(LIR
.Reg
);
812 // Ignore dependencies outside the current trace.
813 const TraceBlockInfo
&DefTBI
= BlockInfo
[DefMI
->getParent()->getNumber()];
814 if (!DefTBI
.isUsefulDominator(TBI
))
816 unsigned Len
= LIR
.Height
+ Cycles
[DefMI
].Depth
;
817 MaxLen
= std::max(MaxLen
, Len
);
822 void MachineTraceMetrics::Ensemble::
823 updateDepth(MachineTraceMetrics::TraceBlockInfo
&TBI
, const MachineInstr
&UseMI
,
824 SparseSet
<LiveRegUnit
> &RegUnits
) {
825 SmallVector
<DataDep
, 8> Deps
;
826 // Collect all data dependencies.
828 getPHIDeps(UseMI
, Deps
, TBI
.Pred
, MTM
.MRI
);
829 else if (getDataDeps(UseMI
, Deps
, MTM
.MRI
))
830 updatePhysDepsDownwards(&UseMI
, Deps
, RegUnits
, MTM
.TRI
);
832 // Filter and process dependencies, computing the earliest issue cycle.
834 for (const DataDep
&Dep
: Deps
) {
835 const TraceBlockInfo
&DepTBI
=
836 BlockInfo
[Dep
.DefMI
->getParent()->getNumber()];
837 // Ignore dependencies from outside the current trace.
838 if (!DepTBI
.isUsefulDominator(TBI
))
840 assert(DepTBI
.HasValidInstrDepths
&& "Inconsistent dependency");
841 unsigned DepCycle
= Cycles
.lookup(Dep
.DefMI
).Depth
;
842 // Add latency if DefMI is a real instruction. Transients get latency 0.
843 if (!Dep
.DefMI
->isTransient())
844 DepCycle
+= MTM
.SchedModel
845 .computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
, &UseMI
, Dep
.UseOp
);
846 Cycle
= std::max(Cycle
, DepCycle
);
848 // Remember the instruction depth.
849 InstrCycles
&MICycles
= Cycles
[&UseMI
];
850 MICycles
.Depth
= Cycle
;
852 if (TBI
.HasValidInstrHeights
) {
853 // Update critical path length.
854 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
, Cycle
+ MICycles
.Height
);
855 LLVM_DEBUG(dbgs() << TBI
.CriticalPath
<< '\t' << Cycle
<< '\t' << UseMI
);
857 LLVM_DEBUG(dbgs() << Cycle
<< '\t' << UseMI
);
861 void MachineTraceMetrics::Ensemble::
862 updateDepth(const MachineBasicBlock
*MBB
, const MachineInstr
&UseMI
,
863 SparseSet
<LiveRegUnit
> &RegUnits
) {
864 updateDepth(BlockInfo
[MBB
->getNumber()], UseMI
, RegUnits
);
867 void MachineTraceMetrics::Ensemble::
868 updateDepths(MachineBasicBlock::iterator Start
,
869 MachineBasicBlock::iterator End
,
870 SparseSet
<LiveRegUnit
> &RegUnits
) {
871 for (; Start
!= End
; Start
++)
872 updateDepth(Start
->getParent(), *Start
, RegUnits
);
875 /// Compute instruction depths for all instructions above or in MBB in its
876 /// trace. This assumes that the trace through MBB has already been computed.
877 void MachineTraceMetrics::Ensemble::
878 computeInstrDepths(const MachineBasicBlock
*MBB
) {
879 // The top of the trace may already be computed, and HasValidInstrDepths
880 // implies Head->HasValidInstrDepths, so we only need to start from the first
881 // block in the trace that needs to be recomputed.
882 SmallVector
<const MachineBasicBlock
*, 8> Stack
;
884 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
885 assert(TBI
.hasValidDepth() && "Incomplete trace");
886 if (TBI
.HasValidInstrDepths
)
888 Stack
.push_back(MBB
);
892 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
893 // in the trace. We should track any live-out physregs that were defined in
894 // the trace. This is quite rare in SSA form, typically created by CSE
895 // hoisting a compare.
896 SparseSet
<LiveRegUnit
> RegUnits
;
897 RegUnits
.setUniverse(MTM
.TRI
->getNumRegUnits());
899 // Go through trace blocks in top-down order, stopping after the center block.
900 while (!Stack
.empty()) {
901 MBB
= Stack
.pop_back_val();
902 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB
) << ":\n");
903 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
904 TBI
.HasValidInstrDepths
= true;
905 TBI
.CriticalPath
= 0;
907 // Print out resource depths here as well.
909 dbgs() << format("%7u Instructions\n", TBI
.InstrDepth
);
910 ArrayRef
<unsigned> PRDepths
= getProcResourceDepths(MBB
->getNumber());
911 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
913 unsigned Factor
= MTM
.SchedModel
.getResourceFactor(K
);
914 dbgs() << format("%6uc @ ", MTM
.getCycles(PRDepths
[K
]))
915 << MTM
.SchedModel
.getProcResource(K
)->Name
<< " ("
916 << PRDepths
[K
]/Factor
<< " ops x" << Factor
<< ")\n";
920 // Also compute the critical path length through MBB when possible.
921 if (TBI
.HasValidInstrHeights
)
922 TBI
.CriticalPath
= computeCrossBlockCriticalPath(TBI
);
924 for (const auto &UseMI
: *MBB
) {
925 updateDepth(TBI
, UseMI
, RegUnits
);
930 // Identify physreg dependencies for MI when scanning instructions upwards.
931 // Return the issue height of MI after considering any live regunits.
932 // Height is the issue height computed from virtual register dependencies alone.
933 static unsigned updatePhysDepsUpwards(const MachineInstr
&MI
, unsigned Height
,
934 SparseSet
<LiveRegUnit
> &RegUnits
,
935 const TargetSchedModel
&SchedModel
,
936 const TargetInstrInfo
*TII
,
937 const TargetRegisterInfo
*TRI
) {
938 SmallVector
<unsigned, 8> ReadOps
;
940 for (const MachineOperand
&MO
: MI
.operands()) {
943 Register Reg
= MO
.getReg();
944 if (!Reg
.isPhysical())
947 ReadOps
.push_back(MO
.getOperandNo());
950 // This is a def of Reg. Remove corresponding entries from RegUnits, and
951 // update MI Height to consider the physreg dependencies.
952 for (MCRegUnit Unit
: TRI
->regunits(Reg
.asMCReg())) {
953 SparseSet
<LiveRegUnit
>::iterator I
= RegUnits
.find(Unit
);
954 if (I
== RegUnits
.end())
956 unsigned DepHeight
= I
->Cycle
;
957 if (!MI
.isTransient()) {
958 // We may not know the UseMI of this dependency, if it came from the
959 // live-in list. SchedModel can handle a NULL UseMI.
960 DepHeight
+= SchedModel
.computeOperandLatency(&MI
, MO
.getOperandNo(),
963 Height
= std::max(Height
, DepHeight
);
964 // This regunit is dead above MI.
969 // Now we know the height of MI. Update any regunits read.
970 for (unsigned Op
: ReadOps
) {
971 MCRegister Reg
= MI
.getOperand(Op
).getReg().asMCReg();
972 for (MCRegUnit Unit
: TRI
->regunits(Reg
)) {
973 LiveRegUnit
&LRU
= RegUnits
[Unit
];
974 // Set the height to the highest reader of the unit.
975 if (LRU
.Cycle
<= Height
&& LRU
.MI
!= &MI
) {
986 using MIHeightMap
= DenseMap
<const MachineInstr
*, unsigned>;
988 // Push the height of DefMI upwards if required to match UseMI.
989 // Return true if this is the first time DefMI was seen.
990 static bool pushDepHeight(const DataDep
&Dep
, const MachineInstr
&UseMI
,
991 unsigned UseHeight
, MIHeightMap
&Heights
,
992 const TargetSchedModel
&SchedModel
,
993 const TargetInstrInfo
*TII
) {
994 // Adjust height by Dep.DefMI latency.
995 if (!Dep
.DefMI
->isTransient())
996 UseHeight
+= SchedModel
.computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
, &UseMI
,
999 // Update Heights[DefMI] to be the maximum height seen.
1000 MIHeightMap::iterator I
;
1002 std::tie(I
, New
) = Heights
.insert(std::make_pair(Dep
.DefMI
, UseHeight
));
1006 // DefMI has been pushed before. Give it the max height.
1007 if (I
->second
< UseHeight
)
1008 I
->second
= UseHeight
;
1012 /// Assuming that the virtual register defined by DefMI:DefOp was used by
1013 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
1014 /// when reaching the block that contains DefMI.
1015 void MachineTraceMetrics::Ensemble::
1016 addLiveIns(const MachineInstr
*DefMI
, unsigned DefOp
,
1017 ArrayRef
<const MachineBasicBlock
*> Trace
) {
1018 assert(!Trace
.empty() && "Trace should contain at least one block");
1019 Register Reg
= DefMI
->getOperand(DefOp
).getReg();
1020 assert(Reg
.isVirtual());
1021 const MachineBasicBlock
*DefMBB
= DefMI
->getParent();
1023 // Reg is live-in to all blocks in Trace that follow DefMBB.
1024 for (const MachineBasicBlock
*MBB
: llvm::reverse(Trace
)) {
1027 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1028 // Just add the register. The height will be updated later.
1029 TBI
.LiveIns
.push_back(Reg
);
1033 /// Compute instruction heights in the trace through MBB. This updates MBB and
1034 /// the blocks below it in the trace. It is assumed that the trace has already
1036 void MachineTraceMetrics::Ensemble::
1037 computeInstrHeights(const MachineBasicBlock
*MBB
) {
1038 // The bottom of the trace may already be computed.
1039 // Find the blocks that need updating.
1040 SmallVector
<const MachineBasicBlock
*, 8> Stack
;
1042 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1043 assert(TBI
.hasValidHeight() && "Incomplete trace");
1044 if (TBI
.HasValidInstrHeights
)
1046 Stack
.push_back(MBB
);
1047 TBI
.LiveIns
.clear();
1051 // As we move upwards in the trace, keep track of instructions that are
1052 // required by deeper trace instructions. Map MI -> height required so far.
1053 MIHeightMap Heights
;
1055 // For physregs, the def isn't known when we see the use.
1056 // Instead, keep track of the highest use of each regunit.
1057 SparseSet
<LiveRegUnit
> RegUnits
;
1058 RegUnits
.setUniverse(MTM
.TRI
->getNumRegUnits());
1060 // If the bottom of the trace was already precomputed, initialize heights
1061 // from its live-in list.
1062 // MBB is the highest precomputed block in the trace.
1064 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1065 for (LiveInReg
&LI
: TBI
.LiveIns
) {
1066 if (LI
.Reg
.isVirtual()) {
1067 // For virtual registers, the def latency is included.
1068 unsigned &Height
= Heights
[MTM
.MRI
->getVRegDef(LI
.Reg
)];
1069 if (Height
< LI
.Height
)
1072 // For register units, the def latency is not included because we don't
1073 // know the def yet.
1074 RegUnits
[LI
.Reg
].Cycle
= LI
.Height
;
1079 // Go through the trace blocks in bottom-up order.
1080 SmallVector
<DataDep
, 8> Deps
;
1081 for (;!Stack
.empty(); Stack
.pop_back()) {
1083 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB
) << ":\n");
1084 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1085 TBI
.HasValidInstrHeights
= true;
1086 TBI
.CriticalPath
= 0;
1089 dbgs() << format("%7u Instructions\n", TBI
.InstrHeight
);
1090 ArrayRef
<unsigned> PRHeights
= getProcResourceHeights(MBB
->getNumber());
1091 for (unsigned K
= 0; K
!= PRHeights
.size(); ++K
)
1093 unsigned Factor
= MTM
.SchedModel
.getResourceFactor(K
);
1094 dbgs() << format("%6uc @ ", MTM
.getCycles(PRHeights
[K
]))
1095 << MTM
.SchedModel
.getProcResource(K
)->Name
<< " ("
1096 << PRHeights
[K
]/Factor
<< " ops x" << Factor
<< ")\n";
1100 // Get dependencies from PHIs in the trace successor.
1101 const MachineBasicBlock
*Succ
= TBI
.Succ
;
1102 // If MBB is the last block in the trace, and it has a back-edge to the
1103 // loop header, get loop-carried dependencies from PHIs in the header. For
1104 // that purpose, pretend that all the loop header PHIs have height 0.
1106 if (const MachineLoop
*Loop
= getLoopFor(MBB
))
1107 if (MBB
->isSuccessor(Loop
->getHeader()))
1108 Succ
= Loop
->getHeader();
1111 for (const auto &PHI
: *Succ
) {
1115 getPHIDeps(PHI
, Deps
, MBB
, MTM
.MRI
);
1116 if (!Deps
.empty()) {
1117 // Loop header PHI heights are all 0.
1118 unsigned Height
= TBI
.Succ
? Cycles
.lookup(&PHI
).Height
: 0;
1119 LLVM_DEBUG(dbgs() << "pred\t" << Height
<< '\t' << PHI
);
1120 if (pushDepHeight(Deps
.front(), PHI
, Height
, Heights
, MTM
.SchedModel
,
1122 addLiveIns(Deps
.front().DefMI
, Deps
.front().DefOp
, Stack
);
1127 // Go through the block backwards.
1128 for (const MachineInstr
&MI
: reverse(*MBB
)) {
1129 // Find the MI height as determined by virtual register uses in the
1132 MIHeightMap::iterator HeightI
= Heights
.find(&MI
);
1133 if (HeightI
!= Heights
.end()) {
1134 Cycle
= HeightI
->second
;
1135 // We won't be seeing any more MI uses.
1136 Heights
.erase(HeightI
);
1139 // Don't process PHI deps. They depend on the specific predecessor, and
1140 // we'll get them when visiting the predecessor.
1142 bool HasPhysRegs
= !MI
.isPHI() && getDataDeps(MI
, Deps
, MTM
.MRI
);
1144 // There may also be regunit dependencies to include in the height.
1146 Cycle
= updatePhysDepsUpwards(MI
, Cycle
, RegUnits
, MTM
.SchedModel
,
1149 // Update the required height of any virtual registers read by MI.
1150 for (const DataDep
&Dep
: Deps
)
1151 if (pushDepHeight(Dep
, MI
, Cycle
, Heights
, MTM
.SchedModel
, MTM
.TII
))
1152 addLiveIns(Dep
.DefMI
, Dep
.DefOp
, Stack
);
1154 InstrCycles
&MICycles
= Cycles
[&MI
];
1155 MICycles
.Height
= Cycle
;
1156 if (!TBI
.HasValidInstrDepths
) {
1157 LLVM_DEBUG(dbgs() << Cycle
<< '\t' << MI
);
1160 // Update critical path length.
1161 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
, Cycle
+ MICycles
.Depth
);
1162 LLVM_DEBUG(dbgs() << TBI
.CriticalPath
<< '\t' << Cycle
<< '\t' << MI
);
1165 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1166 // height because the final height isn't known until now.
1167 LLVM_DEBUG(dbgs() << printMBBReference(*MBB
) << " Live-ins:");
1168 for (LiveInReg
&LIR
: TBI
.LiveIns
) {
1169 const MachineInstr
*DefMI
= MTM
.MRI
->getVRegDef(LIR
.Reg
);
1170 LIR
.Height
= Heights
.lookup(DefMI
);
1171 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR
.Reg
) << '@' << LIR
.Height
);
1174 // Transfer the live regunits to the live-in list.
1175 for (const LiveRegUnit
&RU
: RegUnits
) {
1176 TBI
.LiveIns
.push_back(LiveInReg(RU
.RegUnit
, RU
.Cycle
));
1177 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU
.RegUnit
, MTM
.TRI
) << '@'
1180 LLVM_DEBUG(dbgs() << '\n');
1182 if (!TBI
.HasValidInstrDepths
)
1184 // Add live-ins to the critical path length.
1185 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
,
1186 computeCrossBlockCriticalPath(TBI
));
1187 LLVM_DEBUG(dbgs() << "Critical path: " << TBI
.CriticalPath
<< '\n');
1191 MachineTraceMetrics::Trace
1192 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock
*MBB
) {
1193 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1195 if (!TBI
.hasValidDepth() || !TBI
.hasValidHeight())
1197 if (!TBI
.HasValidInstrDepths
)
1198 computeInstrDepths(MBB
);
1199 if (!TBI
.HasValidInstrHeights
)
1200 computeInstrHeights(MBB
);
1202 return Trace(*this, TBI
);
1206 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr
&MI
) const {
1207 assert(getBlockNum() == unsigned(MI
.getParent()->getNumber()) &&
1208 "MI must be in the trace center block");
1209 InstrCycles Cyc
= getInstrCycles(MI
);
1210 return getCriticalPath() - (Cyc
.Depth
+ Cyc
.Height
);
1214 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr
&PHI
) const {
1215 const MachineBasicBlock
*MBB
= TE
.MTM
.MF
->getBlockNumbered(getBlockNum());
1216 SmallVector
<DataDep
, 1> Deps
;
1217 getPHIDeps(PHI
, Deps
, MBB
, TE
.MTM
.MRI
);
1218 assert(Deps
.size() == 1 && "PHI doesn't have MBB as a predecessor");
1219 DataDep
&Dep
= Deps
.front();
1220 unsigned DepCycle
= getInstrCycles(*Dep
.DefMI
).Depth
;
1221 // Add latency if DefMI is a real instruction. Transients get latency 0.
1222 if (!Dep
.DefMI
->isTransient())
1223 DepCycle
+= TE
.MTM
.SchedModel
.computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
,
1228 /// When bottom is set include instructions in current block in estimate.
1229 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom
) const {
1230 // Find the limiting processor resource.
1231 // Numbers have been pre-scaled to be comparable.
1233 ArrayRef
<unsigned> PRDepths
= TE
.getProcResourceDepths(getBlockNum());
1235 ArrayRef
<unsigned> PRCycles
= TE
.MTM
.getProcReleaseAtCycles(getBlockNum());
1236 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
1237 PRMax
= std::max(PRMax
, PRDepths
[K
] + PRCycles
[K
]);
1239 for (unsigned PRD
: PRDepths
)
1240 PRMax
= std::max(PRMax
, PRD
);
1242 // Convert to cycle count.
1243 PRMax
= TE
.MTM
.getCycles(PRMax
);
1245 /// All instructions before current block
1246 unsigned Instrs
= TBI
.InstrDepth
;
1247 // plus instructions in current block
1249 Instrs
+= TE
.MTM
.BlockInfo
[getBlockNum()].InstrCount
;
1250 if (unsigned IW
= TE
.MTM
.SchedModel
.getIssueWidth())
1252 // Assume issue width 1 without a schedule model.
1253 return std::max(Instrs
, PRMax
);
1256 unsigned MachineTraceMetrics::Trace::getResourceLength(
1257 ArrayRef
<const MachineBasicBlock
*> Extrablocks
,
1258 ArrayRef
<const MCSchedClassDesc
*> ExtraInstrs
,
1259 ArrayRef
<const MCSchedClassDesc
*> RemoveInstrs
) const {
1260 // Add up resources above and below the center block.
1261 ArrayRef
<unsigned> PRDepths
= TE
.getProcResourceDepths(getBlockNum());
1262 ArrayRef
<unsigned> PRHeights
= TE
.getProcResourceHeights(getBlockNum());
1265 // Capture computing cycles from extra instructions
1266 auto extraCycles
= [this](ArrayRef
<const MCSchedClassDesc
*> Instrs
,
1267 unsigned ResourceIdx
)
1269 unsigned Cycles
= 0;
1270 for (const MCSchedClassDesc
*SC
: Instrs
) {
1273 for (TargetSchedModel::ProcResIter
1274 PI
= TE
.MTM
.SchedModel
.getWriteProcResBegin(SC
),
1275 PE
= TE
.MTM
.SchedModel
.getWriteProcResEnd(SC
);
1277 if (PI
->ProcResourceIdx
!= ResourceIdx
)
1279 Cycles
+= (PI
->ReleaseAtCycle
*
1280 TE
.MTM
.SchedModel
.getResourceFactor(ResourceIdx
));
1286 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
) {
1287 unsigned PRCycles
= PRDepths
[K
] + PRHeights
[K
];
1288 for (const MachineBasicBlock
*MBB
: Extrablocks
)
1289 PRCycles
+= TE
.MTM
.getProcReleaseAtCycles(MBB
->getNumber())[K
];
1290 PRCycles
+= extraCycles(ExtraInstrs
, K
);
1291 PRCycles
-= extraCycles(RemoveInstrs
, K
);
1292 PRMax
= std::max(PRMax
, PRCycles
);
1294 // Convert to cycle count.
1295 PRMax
= TE
.MTM
.getCycles(PRMax
);
1297 // Instrs: #instructions in current trace outside current block.
1298 unsigned Instrs
= TBI
.InstrDepth
+ TBI
.InstrHeight
;
1299 // Add instruction count from the extra blocks.
1300 for (const MachineBasicBlock
*MBB
: Extrablocks
)
1301 Instrs
+= TE
.MTM
.getResources(MBB
)->InstrCount
;
1302 Instrs
+= ExtraInstrs
.size();
1303 Instrs
-= RemoveInstrs
.size();
1304 if (unsigned IW
= TE
.MTM
.SchedModel
.getIssueWidth())
1306 // Assume issue width 1 without a schedule model.
1307 return std::max(Instrs
, PRMax
);
1310 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr
&DefMI
,
1311 const MachineInstr
&UseMI
) const {
1312 if (DefMI
.getParent() == UseMI
.getParent())
1315 const TraceBlockInfo
&DepTBI
= TE
.BlockInfo
[DefMI
.getParent()->getNumber()];
1316 const TraceBlockInfo
&TBI
= TE
.BlockInfo
[UseMI
.getParent()->getNumber()];
1318 return DepTBI
.isUsefulDominator(TBI
);
1321 void MachineTraceMetrics::Ensemble::print(raw_ostream
&OS
) const {
1322 OS
<< getName() << " ensemble:\n";
1323 for (unsigned i
= 0, e
= BlockInfo
.size(); i
!= e
; ++i
) {
1324 OS
<< " %bb." << i
<< '\t';
1325 BlockInfo
[i
].print(OS
);
1330 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream
&OS
) const {
1331 if (hasValidDepth()) {
1332 OS
<< "depth=" << InstrDepth
;
1334 OS
<< " pred=" << printMBBReference(*Pred
);
1337 OS
<< " head=%bb." << Head
;
1338 if (HasValidInstrDepths
)
1341 OS
<< "depth invalid";
1343 if (hasValidHeight()) {
1344 OS
<< "height=" << InstrHeight
;
1346 OS
<< " succ=" << printMBBReference(*Succ
);
1349 OS
<< " tail=%bb." << Tail
;
1350 if (HasValidInstrHeights
)
1353 OS
<< "height invalid";
1354 if (HasValidInstrDepths
&& HasValidInstrHeights
)
1355 OS
<< ", crit=" << CriticalPath
;
1358 void MachineTraceMetrics::Trace::print(raw_ostream
&OS
) const {
1359 unsigned MBBNum
= &TBI
- &TE
.BlockInfo
[0];
1361 OS
<< TE
.getName() << " trace %bb." << TBI
.Head
<< " --> %bb." << MBBNum
1362 << " --> %bb." << TBI
.Tail
<< ':';
1363 if (TBI
.hasValidHeight() && TBI
.hasValidDepth())
1364 OS
<< ' ' << getInstrCount() << " instrs.";
1365 if (TBI
.HasValidInstrDepths
&& TBI
.HasValidInstrHeights
)
1366 OS
<< ' ' << TBI
.CriticalPath
<< " cycles.";
1368 const MachineTraceMetrics::TraceBlockInfo
*Block
= &TBI
;
1369 OS
<< "\n%bb." << MBBNum
;
1370 while (Block
->hasValidDepth() && Block
->Pred
) {
1371 unsigned Num
= Block
->Pred
->getNumber();
1372 OS
<< " <- " << printMBBReference(*Block
->Pred
);
1373 Block
= &TE
.BlockInfo
[Num
];
1378 while (Block
->hasValidHeight() && Block
->Succ
) {
1379 unsigned Num
= Block
->Succ
->getNumber();
1380 OS
<< " -> " << printMBBReference(*Block
->Succ
);
1381 Block
= &TE
.BlockInfo
[Num
];