1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/MachineTraceMetrics.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/PostOrderIterator.h"
13 #include "llvm/ADT/SmallPtrSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/SparseSet.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineLoopInfo.h"
21 #include "llvm/CodeGen/MachineOperand.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/CodeGen/TargetSchedule.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/Format.h"
32 #include "llvm/Support/raw_ostream.h"
41 #define DEBUG_TYPE "machine-trace-metrics"
43 char MachineTraceMetrics::ID
= 0;
45 char &llvm::MachineTraceMetricsID
= MachineTraceMetrics::ID
;
47 INITIALIZE_PASS_BEGIN(MachineTraceMetrics
, DEBUG_TYPE
,
48 "Machine Trace Metrics", false, true)
49 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo
)
50 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
51 INITIALIZE_PASS_END(MachineTraceMetrics
, DEBUG_TYPE
,
52 "Machine Trace Metrics", false, true)
54 MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID
) {
55 std::fill(std::begin(Ensembles
), std::end(Ensembles
), nullptr);
58 void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage
&AU
) const {
60 AU
.addRequired
<MachineBranchProbabilityInfo
>();
61 AU
.addRequired
<MachineLoopInfo
>();
62 MachineFunctionPass::getAnalysisUsage(AU
);
65 bool MachineTraceMetrics::runOnMachineFunction(MachineFunction
&Func
) {
67 const TargetSubtargetInfo
&ST
= MF
->getSubtarget();
68 TII
= ST
.getInstrInfo();
69 TRI
= ST
.getRegisterInfo();
70 MRI
= &MF
->getRegInfo();
71 Loops
= &getAnalysis
<MachineLoopInfo
>();
73 BlockInfo
.resize(MF
->getNumBlockIDs());
74 ProcReleaseAtCycles
.resize(MF
->getNumBlockIDs() *
75 SchedModel
.getNumProcResourceKinds());
79 void MachineTraceMetrics::releaseMemory() {
82 for (Ensemble
*&E
: Ensembles
) {
88 //===----------------------------------------------------------------------===//
89 // Fixed block information
90 //===----------------------------------------------------------------------===//
92 // The number of instructions in a basic block and the CPU resources used by
93 // those instructions don't depend on any given trace strategy.
95 /// Compute the resource usage in basic block MBB.
96 const MachineTraceMetrics::FixedBlockInfo
*
97 MachineTraceMetrics::getResources(const MachineBasicBlock
*MBB
) {
98 assert(MBB
&& "No basic block");
99 FixedBlockInfo
*FBI
= &BlockInfo
[MBB
->getNumber()];
100 if (FBI
->hasResources())
103 // Compute resource usage in the block.
104 FBI
->HasCalls
= false;
105 unsigned InstrCount
= 0;
107 // Add up per-processor resource cycles as well.
108 unsigned PRKinds
= SchedModel
.getNumProcResourceKinds();
109 SmallVector
<unsigned, 32> PRCycles(PRKinds
);
111 for (const auto &MI
: *MBB
) {
112 if (MI
.isTransient())
116 FBI
->HasCalls
= true;
118 // Count processor resources used.
119 if (!SchedModel
.hasInstrSchedModel())
121 const MCSchedClassDesc
*SC
= SchedModel
.resolveSchedClass(&MI
);
125 for (TargetSchedModel::ProcResIter
126 PI
= SchedModel
.getWriteProcResBegin(SC
),
127 PE
= SchedModel
.getWriteProcResEnd(SC
); PI
!= PE
; ++PI
) {
128 assert(PI
->ProcResourceIdx
< PRKinds
&& "Bad processor resource kind");
129 PRCycles
[PI
->ProcResourceIdx
] += PI
->ReleaseAtCycle
;
132 FBI
->InstrCount
= InstrCount
;
134 // Scale the resource cycles so they are comparable.
135 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
136 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
137 ProcReleaseAtCycles
[PROffset
+ K
] =
138 PRCycles
[K
] * SchedModel
.getResourceFactor(K
);
144 MachineTraceMetrics::getProcReleaseAtCycles(unsigned MBBNum
) const {
145 assert(BlockInfo
[MBBNum
].hasResources() &&
146 "getResources() must be called before getProcReleaseAtCycles()");
147 unsigned PRKinds
= SchedModel
.getNumProcResourceKinds();
148 assert((MBBNum
+1) * PRKinds
<= ProcReleaseAtCycles
.size());
149 return ArrayRef(ProcReleaseAtCycles
.data() + MBBNum
* PRKinds
, PRKinds
);
152 //===----------------------------------------------------------------------===//
153 // Ensemble utility functions
154 //===----------------------------------------------------------------------===//
156 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics
*ct
)
158 BlockInfo
.resize(MTM
.BlockInfo
.size());
159 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
160 ProcResourceDepths
.resize(MTM
.BlockInfo
.size() * PRKinds
);
161 ProcResourceHeights
.resize(MTM
.BlockInfo
.size() * PRKinds
);
164 // Virtual destructor serves as an anchor.
165 MachineTraceMetrics::Ensemble::~Ensemble() = default;
168 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock
*MBB
) const {
169 return MTM
.Loops
->getLoopFor(MBB
);
172 // Update resource-related information in the TraceBlockInfo for MBB.
173 // Only update resources related to the trace above MBB.
174 void MachineTraceMetrics::Ensemble::
175 computeDepthResources(const MachineBasicBlock
*MBB
) {
176 TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
177 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
178 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
180 // Compute resources from trace above. The top block is simple.
183 TBI
->Head
= MBB
->getNumber();
184 std::fill(ProcResourceDepths
.begin() + PROffset
,
185 ProcResourceDepths
.begin() + PROffset
+ PRKinds
, 0);
189 // Compute from the block above. A post-order traversal ensures the
190 // predecessor is always computed first.
191 unsigned PredNum
= TBI
->Pred
->getNumber();
192 TraceBlockInfo
*PredTBI
= &BlockInfo
[PredNum
];
193 assert(PredTBI
->hasValidDepth() && "Trace above has not been computed yet");
194 const FixedBlockInfo
*PredFBI
= MTM
.getResources(TBI
->Pred
);
195 TBI
->InstrDepth
= PredTBI
->InstrDepth
+ PredFBI
->InstrCount
;
196 TBI
->Head
= PredTBI
->Head
;
198 // Compute per-resource depths.
199 ArrayRef
<unsigned> PredPRDepths
= getProcResourceDepths(PredNum
);
200 ArrayRef
<unsigned> PredPRCycles
= MTM
.getProcReleaseAtCycles(PredNum
);
201 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
202 ProcResourceDepths
[PROffset
+ K
] = PredPRDepths
[K
] + PredPRCycles
[K
];
205 // Update resource-related information in the TraceBlockInfo for MBB.
206 // Only update resources related to the trace below MBB.
207 void MachineTraceMetrics::Ensemble::
208 computeHeightResources(const MachineBasicBlock
*MBB
) {
209 TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
210 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
211 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
213 // Compute resources for the current block.
214 TBI
->InstrHeight
= MTM
.getResources(MBB
)->InstrCount
;
215 ArrayRef
<unsigned> PRCycles
= MTM
.getProcReleaseAtCycles(MBB
->getNumber());
217 // The trace tail is done.
219 TBI
->Tail
= MBB
->getNumber();
220 llvm::copy(PRCycles
, ProcResourceHeights
.begin() + PROffset
);
224 // Compute from the block below. A post-order traversal ensures the
225 // predecessor is always computed first.
226 unsigned SuccNum
= TBI
->Succ
->getNumber();
227 TraceBlockInfo
*SuccTBI
= &BlockInfo
[SuccNum
];
228 assert(SuccTBI
->hasValidHeight() && "Trace below has not been computed yet");
229 TBI
->InstrHeight
+= SuccTBI
->InstrHeight
;
230 TBI
->Tail
= SuccTBI
->Tail
;
232 // Compute per-resource heights.
233 ArrayRef
<unsigned> SuccPRHeights
= getProcResourceHeights(SuccNum
);
234 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
235 ProcResourceHeights
[PROffset
+ K
] = SuccPRHeights
[K
] + PRCycles
[K
];
238 // Check if depth resources for MBB are valid and return the TBI.
239 // Return NULL if the resources have been invalidated.
240 const MachineTraceMetrics::TraceBlockInfo
*
241 MachineTraceMetrics::Ensemble::
242 getDepthResources(const MachineBasicBlock
*MBB
) const {
243 const TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
244 return TBI
->hasValidDepth() ? TBI
: nullptr;
247 // Check if height resources for MBB are valid and return the TBI.
248 // Return NULL if the resources have been invalidated.
249 const MachineTraceMetrics::TraceBlockInfo
*
250 MachineTraceMetrics::Ensemble::
251 getHeightResources(const MachineBasicBlock
*MBB
) const {
252 const TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
253 return TBI
->hasValidHeight() ? TBI
: nullptr;
256 /// Get an array of processor resource depths for MBB. Indexed by processor
257 /// resource kind, this array contains the scaled processor resources consumed
258 /// by all blocks preceding MBB in its trace. It does not include instructions
261 /// Compare TraceBlockInfo::InstrDepth.
263 MachineTraceMetrics::Ensemble::
264 getProcResourceDepths(unsigned MBBNum
) const {
265 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
266 assert((MBBNum
+1) * PRKinds
<= ProcResourceDepths
.size());
267 return ArrayRef(ProcResourceDepths
.data() + MBBNum
* PRKinds
, PRKinds
);
270 /// Get an array of processor resource heights for MBB. Indexed by processor
271 /// resource kind, this array contains the scaled processor resources consumed
272 /// by this block and all blocks following it in its trace.
274 /// Compare TraceBlockInfo::InstrHeight.
276 MachineTraceMetrics::Ensemble::
277 getProcResourceHeights(unsigned MBBNum
) const {
278 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
279 assert((MBBNum
+1) * PRKinds
<= ProcResourceHeights
.size());
280 return ArrayRef(ProcResourceHeights
.data() + MBBNum
* PRKinds
, PRKinds
);
283 //===----------------------------------------------------------------------===//
284 // Trace Selection Strategies
285 //===----------------------------------------------------------------------===//
287 // A trace selection strategy is implemented as a sub-class of Ensemble. The
288 // trace through a block B is computed by two DFS traversals of the CFG
289 // starting from B. One upwards, and one downwards. During the upwards DFS,
290 // pickTracePred() is called on the post-ordered blocks. During the downwards
291 // DFS, pickTraceSucc() is called in a post-order.
294 // We never allow traces that leave loops, but we do allow traces to enter
295 // nested loops. We also never allow traces to contain back-edges.
297 // This means that a loop header can never appear above the center block of a
298 // trace, except as the trace head. Below the center block, loop exiting edges
301 // Return true if an edge from the From loop to the To loop is leaving a loop.
302 // Either of To and From can be null.
303 static bool isExitingLoop(const MachineLoop
*From
, const MachineLoop
*To
) {
304 return From
&& !From
->contains(To
);
307 // MinInstrCountEnsemble - Pick the trace that executes the least number of
311 class MinInstrCountEnsemble
: public MachineTraceMetrics::Ensemble
{
312 const char *getName() const override
{ return "MinInstr"; }
313 const MachineBasicBlock
*pickTracePred(const MachineBasicBlock
*) override
;
314 const MachineBasicBlock
*pickTraceSucc(const MachineBasicBlock
*) override
;
317 MinInstrCountEnsemble(MachineTraceMetrics
*mtm
)
318 : MachineTraceMetrics::Ensemble(mtm
) {}
321 /// Pick only the current basic block for the trace and do not choose any
322 /// predecessors/successors.
323 class LocalEnsemble
: public MachineTraceMetrics::Ensemble
{
324 const char *getName() const override
{ return "Local"; }
325 const MachineBasicBlock
*pickTracePred(const MachineBasicBlock
*) override
{
328 const MachineBasicBlock
*pickTraceSucc(const MachineBasicBlock
*) override
{
333 LocalEnsemble(MachineTraceMetrics
*MTM
)
334 : MachineTraceMetrics::Ensemble(MTM
) {}
336 } // end anonymous namespace
338 // Select the preferred predecessor for MBB.
339 const MachineBasicBlock
*
340 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock
*MBB
) {
341 if (MBB
->pred_empty())
343 const MachineLoop
*CurLoop
= getLoopFor(MBB
);
344 // Don't leave loops, and never follow back-edges.
345 if (CurLoop
&& MBB
== CurLoop
->getHeader())
347 unsigned CurCount
= MTM
.getResources(MBB
)->InstrCount
;
348 const MachineBasicBlock
*Best
= nullptr;
349 unsigned BestDepth
= 0;
350 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
351 const MachineTraceMetrics::TraceBlockInfo
*PredTBI
=
352 getDepthResources(Pred
);
353 // Ignore cycles that aren't natural loops.
356 // Pick the predecessor that would give this block the smallest InstrDepth.
357 unsigned Depth
= PredTBI
->InstrDepth
+ CurCount
;
358 if (!Best
|| Depth
< BestDepth
) {
366 // Select the preferred successor for MBB.
367 const MachineBasicBlock
*
368 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock
*MBB
) {
369 if (MBB
->succ_empty())
371 const MachineLoop
*CurLoop
= getLoopFor(MBB
);
372 const MachineBasicBlock
*Best
= nullptr;
373 unsigned BestHeight
= 0;
374 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
375 // Don't consider back-edges.
376 if (CurLoop
&& Succ
== CurLoop
->getHeader())
378 // Don't consider successors exiting CurLoop.
379 if (isExitingLoop(CurLoop
, getLoopFor(Succ
)))
381 const MachineTraceMetrics::TraceBlockInfo
*SuccTBI
=
382 getHeightResources(Succ
);
383 // Ignore cycles that aren't natural loops.
386 // Pick the successor that would give this block the smallest InstrHeight.
387 unsigned Height
= SuccTBI
->InstrHeight
;
388 if (!Best
|| Height
< BestHeight
) {
396 // Get an Ensemble sub-class for the requested trace strategy.
397 MachineTraceMetrics::Ensemble
*
398 MachineTraceMetrics::getEnsemble(MachineTraceStrategy strategy
) {
399 assert(strategy
< MachineTraceStrategy::TS_NumStrategies
&&
400 "Invalid trace strategy enum");
401 Ensemble
*&E
= Ensembles
[static_cast<size_t>(strategy
)];
405 // Allocate new Ensemble on demand.
407 case MachineTraceStrategy::TS_MinInstrCount
:
408 return (E
= new MinInstrCountEnsemble(this));
409 case MachineTraceStrategy::TS_Local
:
410 return (E
= new LocalEnsemble(this));
411 default: llvm_unreachable("Invalid trace strategy enum");
415 void MachineTraceMetrics::invalidate(const MachineBasicBlock
*MBB
) {
416 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB
)
418 BlockInfo
[MBB
->getNumber()].invalidate();
419 for (Ensemble
*E
: Ensembles
)
424 void MachineTraceMetrics::verifyAnalysis() const {
428 assert(BlockInfo
.size() == MF
->getNumBlockIDs() && "Outdated BlockInfo size");
429 for (Ensemble
*E
: Ensembles
)
435 //===----------------------------------------------------------------------===//
437 //===----------------------------------------------------------------------===//
439 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
440 // set abstraction that confines the search to the current loop, and doesn't
446 MutableArrayRef
<MachineTraceMetrics::TraceBlockInfo
> Blocks
;
447 SmallPtrSet
<const MachineBasicBlock
*, 8> Visited
;
448 const MachineLoopInfo
*Loops
;
449 bool Downward
= false;
451 LoopBounds(MutableArrayRef
<MachineTraceMetrics::TraceBlockInfo
> blocks
,
452 const MachineLoopInfo
*loops
) : Blocks(blocks
), Loops(loops
) {}
455 } // end anonymous namespace
457 // Specialize po_iterator_storage in order to prune the post-order traversal so
458 // it is limited to the current loop and doesn't traverse the loop back edges.
462 class po_iterator_storage
<LoopBounds
, true> {
466 po_iterator_storage(LoopBounds
&lb
) : LB(lb
) {}
468 void finishPostorder(const MachineBasicBlock
*) {}
470 bool insertEdge(std::optional
<const MachineBasicBlock
*> From
,
471 const MachineBasicBlock
*To
) {
472 // Skip already visited To blocks.
473 MachineTraceMetrics::TraceBlockInfo
&TBI
= LB
.Blocks
[To
->getNumber()];
474 if (LB
.Downward
? TBI
.hasValidHeight() : TBI
.hasValidDepth())
476 // From is null once when To is the trace center block.
478 if (const MachineLoop
*FromLoop
= LB
.Loops
->getLoopFor(*From
)) {
479 // Don't follow backedges, don't leave FromLoop when going upwards.
480 if ((LB
.Downward
? To
: *From
) == FromLoop
->getHeader())
482 // Don't leave FromLoop.
483 if (isExitingLoop(FromLoop
, LB
.Loops
->getLoopFor(To
)))
487 // To is a new block. Mark the block as visited in case the CFG has cycles
488 // that MachineLoopInfo didn't recognize as a natural loop.
489 return LB
.Visited
.insert(To
).second
;
493 } // end namespace llvm
495 /// Compute the trace through MBB.
496 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock
*MBB
) {
497 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
498 << printMBBReference(*MBB
) << '\n');
499 // Set up loop bounds for the backwards post-order traversal.
500 LoopBounds
Bounds(BlockInfo
, MTM
.Loops
);
502 // Run an upwards post-order search for the trace start.
503 Bounds
.Downward
= false;
504 Bounds
.Visited
.clear();
505 for (const auto *I
: inverse_post_order_ext(MBB
, Bounds
)) {
506 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I
) << ": ");
507 TraceBlockInfo
&TBI
= BlockInfo
[I
->getNumber()];
508 // All the predecessors have been visited, pick the preferred one.
509 TBI
.Pred
= pickTracePred(I
);
512 dbgs() << printMBBReference(*TBI
.Pred
) << '\n';
516 // The trace leading to I is now known, compute the depth resources.
517 computeDepthResources(I
);
520 // Run a downwards post-order search for the trace end.
521 Bounds
.Downward
= true;
522 Bounds
.Visited
.clear();
523 for (const auto *I
: post_order_ext(MBB
, Bounds
)) {
524 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I
) << ": ");
525 TraceBlockInfo
&TBI
= BlockInfo
[I
->getNumber()];
526 // All the successors have been visited, pick the preferred one.
527 TBI
.Succ
= pickTraceSucc(I
);
530 dbgs() << printMBBReference(*TBI
.Succ
) << '\n';
534 // The trace leaving I is now known, compute the height resources.
535 computeHeightResources(I
);
539 /// Invalidate traces through BadMBB.
541 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock
*BadMBB
) {
542 SmallVector
<const MachineBasicBlock
*, 16> WorkList
;
543 TraceBlockInfo
&BadTBI
= BlockInfo
[BadMBB
->getNumber()];
545 // Invalidate height resources of blocks above MBB.
546 if (BadTBI
.hasValidHeight()) {
547 BadTBI
.invalidateHeight();
548 WorkList
.push_back(BadMBB
);
550 const MachineBasicBlock
*MBB
= WorkList
.pop_back_val();
551 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB
) << ' '
552 << getName() << " height.\n");
553 // Find any MBB predecessors that have MBB as their preferred successor.
554 // They are the only ones that need to be invalidated.
555 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
556 TraceBlockInfo
&TBI
= BlockInfo
[Pred
->getNumber()];
557 if (!TBI
.hasValidHeight())
559 if (TBI
.Succ
== MBB
) {
560 TBI
.invalidateHeight();
561 WorkList
.push_back(Pred
);
564 // Verify that TBI.Succ is actually a *I successor.
565 assert((!TBI
.Succ
|| Pred
->isSuccessor(TBI
.Succ
)) && "CFG changed");
567 } while (!WorkList
.empty());
570 // Invalidate depth resources of blocks below MBB.
571 if (BadTBI
.hasValidDepth()) {
572 BadTBI
.invalidateDepth();
573 WorkList
.push_back(BadMBB
);
575 const MachineBasicBlock
*MBB
= WorkList
.pop_back_val();
576 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB
) << ' '
577 << getName() << " depth.\n");
578 // Find any MBB successors that have MBB as their preferred predecessor.
579 // They are the only ones that need to be invalidated.
580 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
581 TraceBlockInfo
&TBI
= BlockInfo
[Succ
->getNumber()];
582 if (!TBI
.hasValidDepth())
584 if (TBI
.Pred
== MBB
) {
585 TBI
.invalidateDepth();
586 WorkList
.push_back(Succ
);
589 // Verify that TBI.Pred is actually a *I predecessor.
590 assert((!TBI
.Pred
|| Succ
->isPredecessor(TBI
.Pred
)) && "CFG changed");
592 } while (!WorkList
.empty());
595 // Clear any per-instruction data. We only have to do this for BadMBB itself
596 // because the instructions in that block may change. Other blocks may be
597 // invalidated, but their instructions will stay the same, so there is no
598 // need to erase the Cycle entries. They will be overwritten when we
600 for (const auto &I
: *BadMBB
)
604 void MachineTraceMetrics::Ensemble::verify() const {
606 assert(BlockInfo
.size() == MTM
.MF
->getNumBlockIDs() &&
607 "Outdated BlockInfo size");
608 for (unsigned Num
= 0, e
= BlockInfo
.size(); Num
!= e
; ++Num
) {
609 const TraceBlockInfo
&TBI
= BlockInfo
[Num
];
610 if (TBI
.hasValidDepth() && TBI
.Pred
) {
611 const MachineBasicBlock
*MBB
= MTM
.MF
->getBlockNumbered(Num
);
612 assert(MBB
->isPredecessor(TBI
.Pred
) && "CFG doesn't match trace");
613 assert(BlockInfo
[TBI
.Pred
->getNumber()].hasValidDepth() &&
614 "Trace is broken, depth should have been invalidated.");
615 const MachineLoop
*Loop
= getLoopFor(MBB
);
616 assert(!(Loop
&& MBB
== Loop
->getHeader()) && "Trace contains backedge");
618 if (TBI
.hasValidHeight() && TBI
.Succ
) {
619 const MachineBasicBlock
*MBB
= MTM
.MF
->getBlockNumbered(Num
);
620 assert(MBB
->isSuccessor(TBI
.Succ
) && "CFG doesn't match trace");
621 assert(BlockInfo
[TBI
.Succ
->getNumber()].hasValidHeight() &&
622 "Trace is broken, height should have been invalidated.");
623 const MachineLoop
*Loop
= getLoopFor(MBB
);
624 const MachineLoop
*SuccLoop
= getLoopFor(TBI
.Succ
);
625 assert(!(Loop
&& Loop
== SuccLoop
&& TBI
.Succ
== Loop
->getHeader()) &&
626 "Trace contains backedge");
632 //===----------------------------------------------------------------------===//
634 //===----------------------------------------------------------------------===//
636 // Compute the depth and height of each instruction based on data dependencies
637 // and instruction latencies. These cycle numbers assume that the CPU can issue
638 // an infinite number of instructions per cycle as long as their dependencies
641 // A data dependency is represented as a defining MI and operand numbers on the
642 // defining and using MI.
646 const MachineInstr
*DefMI
;
650 DataDep(const MachineInstr
*DefMI
, unsigned DefOp
, unsigned UseOp
)
651 : DefMI(DefMI
), DefOp(DefOp
), UseOp(UseOp
) {}
653 /// Create a DataDep from an SSA form virtual register.
654 DataDep(const MachineRegisterInfo
*MRI
, unsigned VirtReg
, unsigned UseOp
)
656 assert(Register::isVirtualRegister(VirtReg
));
657 MachineRegisterInfo::def_iterator DefI
= MRI
->def_begin(VirtReg
);
658 assert(!DefI
.atEnd() && "Register has no defs");
659 DefMI
= DefI
->getParent();
660 DefOp
= DefI
.getOperandNo();
661 assert((++DefI
).atEnd() && "Register has multiple defs");
665 } // end anonymous namespace
667 // Get the input data dependencies that must be ready before UseMI can issue.
668 // Return true if UseMI has any physreg operands.
669 static bool getDataDeps(const MachineInstr
&UseMI
,
670 SmallVectorImpl
<DataDep
> &Deps
,
671 const MachineRegisterInfo
*MRI
) {
672 // Debug values should not be included in any calculations.
673 if (UseMI
.isDebugInstr())
676 bool HasPhysRegs
= false;
677 for (const MachineOperand
&MO
: UseMI
.operands()) {
680 Register Reg
= MO
.getReg();
683 if (Reg
.isPhysical()) {
687 // Collect virtual register reads.
689 Deps
.push_back(DataDep(MRI
, Reg
, MO
.getOperandNo()));
694 // Get the input data dependencies of a PHI instruction, using Pred as the
695 // preferred predecessor.
696 // This will add at most one dependency to Deps.
697 static void getPHIDeps(const MachineInstr
&UseMI
,
698 SmallVectorImpl
<DataDep
> &Deps
,
699 const MachineBasicBlock
*Pred
,
700 const MachineRegisterInfo
*MRI
) {
701 // No predecessor at the beginning of a trace. Ignore dependencies.
704 assert(UseMI
.isPHI() && UseMI
.getNumOperands() % 2 && "Bad PHI");
705 for (unsigned i
= 1; i
!= UseMI
.getNumOperands(); i
+= 2) {
706 if (UseMI
.getOperand(i
+ 1).getMBB() == Pred
) {
707 Register Reg
= UseMI
.getOperand(i
).getReg();
708 Deps
.push_back(DataDep(MRI
, Reg
, i
));
714 // Identify physreg dependencies for UseMI, and update the live regunit
715 // tracking set when scanning instructions downwards.
716 static void updatePhysDepsDownwards(const MachineInstr
*UseMI
,
717 SmallVectorImpl
<DataDep
> &Deps
,
718 SparseSet
<LiveRegUnit
> &RegUnits
,
719 const TargetRegisterInfo
*TRI
) {
720 SmallVector
<MCRegister
, 8> Kills
;
721 SmallVector
<unsigned, 8> LiveDefOps
;
723 for (const MachineOperand
&MO
: UseMI
->operands()) {
724 if (!MO
.isReg() || !MO
.getReg().isPhysical())
726 MCRegister Reg
= MO
.getReg().asMCReg();
727 // Track live defs and kills for updating RegUnits.
730 Kills
.push_back(Reg
);
732 LiveDefOps
.push_back(MO
.getOperandNo());
733 } else if (MO
.isKill())
734 Kills
.push_back(Reg
);
735 // Identify dependencies.
738 for (MCRegUnit Unit
: TRI
->regunits(Reg
)) {
739 SparseSet
<LiveRegUnit
>::iterator I
= RegUnits
.find(Unit
);
740 if (I
== RegUnits
.end())
742 Deps
.push_back(DataDep(I
->MI
, I
->Op
, MO
.getOperandNo()));
747 // Update RegUnits to reflect live registers after UseMI.
749 for (MCRegister Kill
: Kills
)
750 for (MCRegUnit Unit
: TRI
->regunits(Kill
))
751 RegUnits
.erase(Unit
);
753 // Second, live defs.
754 for (unsigned DefOp
: LiveDefOps
) {
755 for (MCRegUnit Unit
:
756 TRI
->regunits(UseMI
->getOperand(DefOp
).getReg().asMCReg())) {
757 LiveRegUnit
&LRU
= RegUnits
[Unit
];
764 /// The length of the critical path through a trace is the maximum of two path
767 /// 1. The maximum height+depth over all instructions in the trace center block.
769 /// 2. The longest cross-block dependency chain. For small blocks, it is
770 /// possible that the critical path through the trace doesn't include any
771 /// instructions in the block.
773 /// This function computes the second number from the live-in list of the
775 unsigned MachineTraceMetrics::Ensemble::
776 computeCrossBlockCriticalPath(const TraceBlockInfo
&TBI
) {
777 assert(TBI
.HasValidInstrDepths
&& "Missing depth info");
778 assert(TBI
.HasValidInstrHeights
&& "Missing height info");
780 for (const LiveInReg
&LIR
: TBI
.LiveIns
) {
781 if (!LIR
.Reg
.isVirtual())
783 const MachineInstr
*DefMI
= MTM
.MRI
->getVRegDef(LIR
.Reg
);
784 // Ignore dependencies outside the current trace.
785 const TraceBlockInfo
&DefTBI
= BlockInfo
[DefMI
->getParent()->getNumber()];
786 if (!DefTBI
.isUsefulDominator(TBI
))
788 unsigned Len
= LIR
.Height
+ Cycles
[DefMI
].Depth
;
789 MaxLen
= std::max(MaxLen
, Len
);
794 void MachineTraceMetrics::Ensemble::
795 updateDepth(MachineTraceMetrics::TraceBlockInfo
&TBI
, const MachineInstr
&UseMI
,
796 SparseSet
<LiveRegUnit
> &RegUnits
) {
797 SmallVector
<DataDep
, 8> Deps
;
798 // Collect all data dependencies.
800 getPHIDeps(UseMI
, Deps
, TBI
.Pred
, MTM
.MRI
);
801 else if (getDataDeps(UseMI
, Deps
, MTM
.MRI
))
802 updatePhysDepsDownwards(&UseMI
, Deps
, RegUnits
, MTM
.TRI
);
804 // Filter and process dependencies, computing the earliest issue cycle.
806 for (const DataDep
&Dep
: Deps
) {
807 const TraceBlockInfo
&DepTBI
=
808 BlockInfo
[Dep
.DefMI
->getParent()->getNumber()];
809 // Ignore dependencies from outside the current trace.
810 if (!DepTBI
.isUsefulDominator(TBI
))
812 assert(DepTBI
.HasValidInstrDepths
&& "Inconsistent dependency");
813 unsigned DepCycle
= Cycles
.lookup(Dep
.DefMI
).Depth
;
814 // Add latency if DefMI is a real instruction. Transients get latency 0.
815 if (!Dep
.DefMI
->isTransient())
816 DepCycle
+= MTM
.SchedModel
817 .computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
, &UseMI
, Dep
.UseOp
);
818 Cycle
= std::max(Cycle
, DepCycle
);
820 // Remember the instruction depth.
821 InstrCycles
&MICycles
= Cycles
[&UseMI
];
822 MICycles
.Depth
= Cycle
;
824 if (TBI
.HasValidInstrHeights
) {
825 // Update critical path length.
826 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
, Cycle
+ MICycles
.Height
);
827 LLVM_DEBUG(dbgs() << TBI
.CriticalPath
<< '\t' << Cycle
<< '\t' << UseMI
);
829 LLVM_DEBUG(dbgs() << Cycle
<< '\t' << UseMI
);
833 void MachineTraceMetrics::Ensemble::
834 updateDepth(const MachineBasicBlock
*MBB
, const MachineInstr
&UseMI
,
835 SparseSet
<LiveRegUnit
> &RegUnits
) {
836 updateDepth(BlockInfo
[MBB
->getNumber()], UseMI
, RegUnits
);
839 void MachineTraceMetrics::Ensemble::
840 updateDepths(MachineBasicBlock::iterator Start
,
841 MachineBasicBlock::iterator End
,
842 SparseSet
<LiveRegUnit
> &RegUnits
) {
843 for (; Start
!= End
; Start
++)
844 updateDepth(Start
->getParent(), *Start
, RegUnits
);
847 /// Compute instruction depths for all instructions above or in MBB in its
848 /// trace. This assumes that the trace through MBB has already been computed.
849 void MachineTraceMetrics::Ensemble::
850 computeInstrDepths(const MachineBasicBlock
*MBB
) {
851 // The top of the trace may already be computed, and HasValidInstrDepths
852 // implies Head->HasValidInstrDepths, so we only need to start from the first
853 // block in the trace that needs to be recomputed.
854 SmallVector
<const MachineBasicBlock
*, 8> Stack
;
856 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
857 assert(TBI
.hasValidDepth() && "Incomplete trace");
858 if (TBI
.HasValidInstrDepths
)
860 Stack
.push_back(MBB
);
864 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
865 // in the trace. We should track any live-out physregs that were defined in
866 // the trace. This is quite rare in SSA form, typically created by CSE
867 // hoisting a compare.
868 SparseSet
<LiveRegUnit
> RegUnits
;
869 RegUnits
.setUniverse(MTM
.TRI
->getNumRegUnits());
871 // Go through trace blocks in top-down order, stopping after the center block.
872 while (!Stack
.empty()) {
873 MBB
= Stack
.pop_back_val();
874 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB
) << ":\n");
875 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
876 TBI
.HasValidInstrDepths
= true;
877 TBI
.CriticalPath
= 0;
879 // Print out resource depths here as well.
881 dbgs() << format("%7u Instructions\n", TBI
.InstrDepth
);
882 ArrayRef
<unsigned> PRDepths
= getProcResourceDepths(MBB
->getNumber());
883 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
885 unsigned Factor
= MTM
.SchedModel
.getResourceFactor(K
);
886 dbgs() << format("%6uc @ ", MTM
.getCycles(PRDepths
[K
]))
887 << MTM
.SchedModel
.getProcResource(K
)->Name
<< " ("
888 << PRDepths
[K
]/Factor
<< " ops x" << Factor
<< ")\n";
892 // Also compute the critical path length through MBB when possible.
893 if (TBI
.HasValidInstrHeights
)
894 TBI
.CriticalPath
= computeCrossBlockCriticalPath(TBI
);
896 for (const auto &UseMI
: *MBB
) {
897 updateDepth(TBI
, UseMI
, RegUnits
);
902 // Identify physreg dependencies for MI when scanning instructions upwards.
903 // Return the issue height of MI after considering any live regunits.
904 // Height is the issue height computed from virtual register dependencies alone.
905 static unsigned updatePhysDepsUpwards(const MachineInstr
&MI
, unsigned Height
,
906 SparseSet
<LiveRegUnit
> &RegUnits
,
907 const TargetSchedModel
&SchedModel
,
908 const TargetInstrInfo
*TII
,
909 const TargetRegisterInfo
*TRI
) {
910 SmallVector
<unsigned, 8> ReadOps
;
912 for (const MachineOperand
&MO
: MI
.operands()) {
915 Register Reg
= MO
.getReg();
916 if (!Reg
.isPhysical())
919 ReadOps
.push_back(MO
.getOperandNo());
922 // This is a def of Reg. Remove corresponding entries from RegUnits, and
923 // update MI Height to consider the physreg dependencies.
924 for (MCRegUnit Unit
: TRI
->regunits(Reg
.asMCReg())) {
925 SparseSet
<LiveRegUnit
>::iterator I
= RegUnits
.find(Unit
);
926 if (I
== RegUnits
.end())
928 unsigned DepHeight
= I
->Cycle
;
929 if (!MI
.isTransient()) {
930 // We may not know the UseMI of this dependency, if it came from the
931 // live-in list. SchedModel can handle a NULL UseMI.
932 DepHeight
+= SchedModel
.computeOperandLatency(&MI
, MO
.getOperandNo(),
935 Height
= std::max(Height
, DepHeight
);
936 // This regunit is dead above MI.
941 // Now we know the height of MI. Update any regunits read.
942 for (size_t I
= 0, E
= ReadOps
.size(); I
!= E
; ++I
) {
943 MCRegister Reg
= MI
.getOperand(ReadOps
[I
]).getReg().asMCReg();
944 for (MCRegUnit Unit
: TRI
->regunits(Reg
)) {
945 LiveRegUnit
&LRU
= RegUnits
[Unit
];
946 // Set the height to the highest reader of the unit.
947 if (LRU
.Cycle
<= Height
&& LRU
.MI
!= &MI
) {
958 using MIHeightMap
= DenseMap
<const MachineInstr
*, unsigned>;
960 // Push the height of DefMI upwards if required to match UseMI.
961 // Return true if this is the first time DefMI was seen.
962 static bool pushDepHeight(const DataDep
&Dep
, const MachineInstr
&UseMI
,
963 unsigned UseHeight
, MIHeightMap
&Heights
,
964 const TargetSchedModel
&SchedModel
,
965 const TargetInstrInfo
*TII
) {
966 // Adjust height by Dep.DefMI latency.
967 if (!Dep
.DefMI
->isTransient())
968 UseHeight
+= SchedModel
.computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
, &UseMI
,
971 // Update Heights[DefMI] to be the maximum height seen.
972 MIHeightMap::iterator I
;
974 std::tie(I
, New
) = Heights
.insert(std::make_pair(Dep
.DefMI
, UseHeight
));
978 // DefMI has been pushed before. Give it the max height.
979 if (I
->second
< UseHeight
)
980 I
->second
= UseHeight
;
984 /// Assuming that the virtual register defined by DefMI:DefOp was used by
985 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
986 /// when reaching the block that contains DefMI.
987 void MachineTraceMetrics::Ensemble::
988 addLiveIns(const MachineInstr
*DefMI
, unsigned DefOp
,
989 ArrayRef
<const MachineBasicBlock
*> Trace
) {
990 assert(!Trace
.empty() && "Trace should contain at least one block");
991 Register Reg
= DefMI
->getOperand(DefOp
).getReg();
992 assert(Reg
.isVirtual());
993 const MachineBasicBlock
*DefMBB
= DefMI
->getParent();
995 // Reg is live-in to all blocks in Trace that follow DefMBB.
996 for (const MachineBasicBlock
*MBB
: llvm::reverse(Trace
)) {
999 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1000 // Just add the register. The height will be updated later.
1001 TBI
.LiveIns
.push_back(Reg
);
1005 /// Compute instruction heights in the trace through MBB. This updates MBB and
1006 /// the blocks below it in the trace. It is assumed that the trace has already
1008 void MachineTraceMetrics::Ensemble::
1009 computeInstrHeights(const MachineBasicBlock
*MBB
) {
1010 // The bottom of the trace may already be computed.
1011 // Find the blocks that need updating.
1012 SmallVector
<const MachineBasicBlock
*, 8> Stack
;
1014 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1015 assert(TBI
.hasValidHeight() && "Incomplete trace");
1016 if (TBI
.HasValidInstrHeights
)
1018 Stack
.push_back(MBB
);
1019 TBI
.LiveIns
.clear();
1023 // As we move upwards in the trace, keep track of instructions that are
1024 // required by deeper trace instructions. Map MI -> height required so far.
1025 MIHeightMap Heights
;
1027 // For physregs, the def isn't known when we see the use.
1028 // Instead, keep track of the highest use of each regunit.
1029 SparseSet
<LiveRegUnit
> RegUnits
;
1030 RegUnits
.setUniverse(MTM
.TRI
->getNumRegUnits());
1032 // If the bottom of the trace was already precomputed, initialize heights
1033 // from its live-in list.
1034 // MBB is the highest precomputed block in the trace.
1036 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1037 for (LiveInReg
&LI
: TBI
.LiveIns
) {
1038 if (LI
.Reg
.isVirtual()) {
1039 // For virtual registers, the def latency is included.
1040 unsigned &Height
= Heights
[MTM
.MRI
->getVRegDef(LI
.Reg
)];
1041 if (Height
< LI
.Height
)
1044 // For register units, the def latency is not included because we don't
1045 // know the def yet.
1046 RegUnits
[LI
.Reg
].Cycle
= LI
.Height
;
1051 // Go through the trace blocks in bottom-up order.
1052 SmallVector
<DataDep
, 8> Deps
;
1053 for (;!Stack
.empty(); Stack
.pop_back()) {
1055 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB
) << ":\n");
1056 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1057 TBI
.HasValidInstrHeights
= true;
1058 TBI
.CriticalPath
= 0;
1061 dbgs() << format("%7u Instructions\n", TBI
.InstrHeight
);
1062 ArrayRef
<unsigned> PRHeights
= getProcResourceHeights(MBB
->getNumber());
1063 for (unsigned K
= 0; K
!= PRHeights
.size(); ++K
)
1065 unsigned Factor
= MTM
.SchedModel
.getResourceFactor(K
);
1066 dbgs() << format("%6uc @ ", MTM
.getCycles(PRHeights
[K
]))
1067 << MTM
.SchedModel
.getProcResource(K
)->Name
<< " ("
1068 << PRHeights
[K
]/Factor
<< " ops x" << Factor
<< ")\n";
1072 // Get dependencies from PHIs in the trace successor.
1073 const MachineBasicBlock
*Succ
= TBI
.Succ
;
1074 // If MBB is the last block in the trace, and it has a back-edge to the
1075 // loop header, get loop-carried dependencies from PHIs in the header. For
1076 // that purpose, pretend that all the loop header PHIs have height 0.
1078 if (const MachineLoop
*Loop
= getLoopFor(MBB
))
1079 if (MBB
->isSuccessor(Loop
->getHeader()))
1080 Succ
= Loop
->getHeader();
1083 for (const auto &PHI
: *Succ
) {
1087 getPHIDeps(PHI
, Deps
, MBB
, MTM
.MRI
);
1088 if (!Deps
.empty()) {
1089 // Loop header PHI heights are all 0.
1090 unsigned Height
= TBI
.Succ
? Cycles
.lookup(&PHI
).Height
: 0;
1091 LLVM_DEBUG(dbgs() << "pred\t" << Height
<< '\t' << PHI
);
1092 if (pushDepHeight(Deps
.front(), PHI
, Height
, Heights
, MTM
.SchedModel
,
1094 addLiveIns(Deps
.front().DefMI
, Deps
.front().DefOp
, Stack
);
1099 // Go through the block backwards.
1100 for (const MachineInstr
&MI
: reverse(*MBB
)) {
1101 // Find the MI height as determined by virtual register uses in the
1104 MIHeightMap::iterator HeightI
= Heights
.find(&MI
);
1105 if (HeightI
!= Heights
.end()) {
1106 Cycle
= HeightI
->second
;
1107 // We won't be seeing any more MI uses.
1108 Heights
.erase(HeightI
);
1111 // Don't process PHI deps. They depend on the specific predecessor, and
1112 // we'll get them when visiting the predecessor.
1114 bool HasPhysRegs
= !MI
.isPHI() && getDataDeps(MI
, Deps
, MTM
.MRI
);
1116 // There may also be regunit dependencies to include in the height.
1118 Cycle
= updatePhysDepsUpwards(MI
, Cycle
, RegUnits
, MTM
.SchedModel
,
1121 // Update the required height of any virtual registers read by MI.
1122 for (const DataDep
&Dep
: Deps
)
1123 if (pushDepHeight(Dep
, MI
, Cycle
, Heights
, MTM
.SchedModel
, MTM
.TII
))
1124 addLiveIns(Dep
.DefMI
, Dep
.DefOp
, Stack
);
1126 InstrCycles
&MICycles
= Cycles
[&MI
];
1127 MICycles
.Height
= Cycle
;
1128 if (!TBI
.HasValidInstrDepths
) {
1129 LLVM_DEBUG(dbgs() << Cycle
<< '\t' << MI
);
1132 // Update critical path length.
1133 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
, Cycle
+ MICycles
.Depth
);
1134 LLVM_DEBUG(dbgs() << TBI
.CriticalPath
<< '\t' << Cycle
<< '\t' << MI
);
1137 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1138 // height because the final height isn't known until now.
1139 LLVM_DEBUG(dbgs() << printMBBReference(*MBB
) << " Live-ins:");
1140 for (LiveInReg
&LIR
: TBI
.LiveIns
) {
1141 const MachineInstr
*DefMI
= MTM
.MRI
->getVRegDef(LIR
.Reg
);
1142 LIR
.Height
= Heights
.lookup(DefMI
);
1143 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR
.Reg
) << '@' << LIR
.Height
);
1146 // Transfer the live regunits to the live-in list.
1147 for (const LiveRegUnit
&RU
: RegUnits
) {
1148 TBI
.LiveIns
.push_back(LiveInReg(RU
.RegUnit
, RU
.Cycle
));
1149 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU
.RegUnit
, MTM
.TRI
) << '@'
1152 LLVM_DEBUG(dbgs() << '\n');
1154 if (!TBI
.HasValidInstrDepths
)
1156 // Add live-ins to the critical path length.
1157 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
,
1158 computeCrossBlockCriticalPath(TBI
));
1159 LLVM_DEBUG(dbgs() << "Critical path: " << TBI
.CriticalPath
<< '\n');
1163 MachineTraceMetrics::Trace
1164 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock
*MBB
) {
1165 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1167 if (!TBI
.hasValidDepth() || !TBI
.hasValidHeight())
1169 if (!TBI
.HasValidInstrDepths
)
1170 computeInstrDepths(MBB
);
1171 if (!TBI
.HasValidInstrHeights
)
1172 computeInstrHeights(MBB
);
1174 return Trace(*this, TBI
);
1178 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr
&MI
) const {
1179 assert(getBlockNum() == unsigned(MI
.getParent()->getNumber()) &&
1180 "MI must be in the trace center block");
1181 InstrCycles Cyc
= getInstrCycles(MI
);
1182 return getCriticalPath() - (Cyc
.Depth
+ Cyc
.Height
);
1186 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr
&PHI
) const {
1187 const MachineBasicBlock
*MBB
= TE
.MTM
.MF
->getBlockNumbered(getBlockNum());
1188 SmallVector
<DataDep
, 1> Deps
;
1189 getPHIDeps(PHI
, Deps
, MBB
, TE
.MTM
.MRI
);
1190 assert(Deps
.size() == 1 && "PHI doesn't have MBB as a predecessor");
1191 DataDep
&Dep
= Deps
.front();
1192 unsigned DepCycle
= getInstrCycles(*Dep
.DefMI
).Depth
;
1193 // Add latency if DefMI is a real instruction. Transients get latency 0.
1194 if (!Dep
.DefMI
->isTransient())
1195 DepCycle
+= TE
.MTM
.SchedModel
.computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
,
1200 /// When bottom is set include instructions in current block in estimate.
1201 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom
) const {
1202 // Find the limiting processor resource.
1203 // Numbers have been pre-scaled to be comparable.
1205 ArrayRef
<unsigned> PRDepths
= TE
.getProcResourceDepths(getBlockNum());
1207 ArrayRef
<unsigned> PRCycles
= TE
.MTM
.getProcReleaseAtCycles(getBlockNum());
1208 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
1209 PRMax
= std::max(PRMax
, PRDepths
[K
] + PRCycles
[K
]);
1211 for (unsigned PRD
: PRDepths
)
1212 PRMax
= std::max(PRMax
, PRD
);
1214 // Convert to cycle count.
1215 PRMax
= TE
.MTM
.getCycles(PRMax
);
1217 /// All instructions before current block
1218 unsigned Instrs
= TBI
.InstrDepth
;
1219 // plus instructions in current block
1221 Instrs
+= TE
.MTM
.BlockInfo
[getBlockNum()].InstrCount
;
1222 if (unsigned IW
= TE
.MTM
.SchedModel
.getIssueWidth())
1224 // Assume issue width 1 without a schedule model.
1225 return std::max(Instrs
, PRMax
);
1228 unsigned MachineTraceMetrics::Trace::getResourceLength(
1229 ArrayRef
<const MachineBasicBlock
*> Extrablocks
,
1230 ArrayRef
<const MCSchedClassDesc
*> ExtraInstrs
,
1231 ArrayRef
<const MCSchedClassDesc
*> RemoveInstrs
) const {
1232 // Add up resources above and below the center block.
1233 ArrayRef
<unsigned> PRDepths
= TE
.getProcResourceDepths(getBlockNum());
1234 ArrayRef
<unsigned> PRHeights
= TE
.getProcResourceHeights(getBlockNum());
1237 // Capture computing cycles from extra instructions
1238 auto extraCycles
= [this](ArrayRef
<const MCSchedClassDesc
*> Instrs
,
1239 unsigned ResourceIdx
)
1241 unsigned Cycles
= 0;
1242 for (const MCSchedClassDesc
*SC
: Instrs
) {
1245 for (TargetSchedModel::ProcResIter
1246 PI
= TE
.MTM
.SchedModel
.getWriteProcResBegin(SC
),
1247 PE
= TE
.MTM
.SchedModel
.getWriteProcResEnd(SC
);
1249 if (PI
->ProcResourceIdx
!= ResourceIdx
)
1251 Cycles
+= (PI
->ReleaseAtCycle
*
1252 TE
.MTM
.SchedModel
.getResourceFactor(ResourceIdx
));
1258 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
) {
1259 unsigned PRCycles
= PRDepths
[K
] + PRHeights
[K
];
1260 for (const MachineBasicBlock
*MBB
: Extrablocks
)
1261 PRCycles
+= TE
.MTM
.getProcReleaseAtCycles(MBB
->getNumber())[K
];
1262 PRCycles
+= extraCycles(ExtraInstrs
, K
);
1263 PRCycles
-= extraCycles(RemoveInstrs
, K
);
1264 PRMax
= std::max(PRMax
, PRCycles
);
1266 // Convert to cycle count.
1267 PRMax
= TE
.MTM
.getCycles(PRMax
);
1269 // Instrs: #instructions in current trace outside current block.
1270 unsigned Instrs
= TBI
.InstrDepth
+ TBI
.InstrHeight
;
1271 // Add instruction count from the extra blocks.
1272 for (const MachineBasicBlock
*MBB
: Extrablocks
)
1273 Instrs
+= TE
.MTM
.getResources(MBB
)->InstrCount
;
1274 Instrs
+= ExtraInstrs
.size();
1275 Instrs
-= RemoveInstrs
.size();
1276 if (unsigned IW
= TE
.MTM
.SchedModel
.getIssueWidth())
1278 // Assume issue width 1 without a schedule model.
1279 return std::max(Instrs
, PRMax
);
1282 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr
&DefMI
,
1283 const MachineInstr
&UseMI
) const {
1284 if (DefMI
.getParent() == UseMI
.getParent())
1287 const TraceBlockInfo
&DepTBI
= TE
.BlockInfo
[DefMI
.getParent()->getNumber()];
1288 const TraceBlockInfo
&TBI
= TE
.BlockInfo
[UseMI
.getParent()->getNumber()];
1290 return DepTBI
.isUsefulDominator(TBI
);
1293 void MachineTraceMetrics::Ensemble::print(raw_ostream
&OS
) const {
1294 OS
<< getName() << " ensemble:\n";
1295 for (unsigned i
= 0, e
= BlockInfo
.size(); i
!= e
; ++i
) {
1296 OS
<< " %bb." << i
<< '\t';
1297 BlockInfo
[i
].print(OS
);
1302 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream
&OS
) const {
1303 if (hasValidDepth()) {
1304 OS
<< "depth=" << InstrDepth
;
1306 OS
<< " pred=" << printMBBReference(*Pred
);
1309 OS
<< " head=%bb." << Head
;
1310 if (HasValidInstrDepths
)
1313 OS
<< "depth invalid";
1315 if (hasValidHeight()) {
1316 OS
<< "height=" << InstrHeight
;
1318 OS
<< " succ=" << printMBBReference(*Succ
);
1321 OS
<< " tail=%bb." << Tail
;
1322 if (HasValidInstrHeights
)
1325 OS
<< "height invalid";
1326 if (HasValidInstrDepths
&& HasValidInstrHeights
)
1327 OS
<< ", crit=" << CriticalPath
;
1330 void MachineTraceMetrics::Trace::print(raw_ostream
&OS
) const {
1331 unsigned MBBNum
= &TBI
- &TE
.BlockInfo
[0];
1333 OS
<< TE
.getName() << " trace %bb." << TBI
.Head
<< " --> %bb." << MBBNum
1334 << " --> %bb." << TBI
.Tail
<< ':';
1335 if (TBI
.hasValidHeight() && TBI
.hasValidDepth())
1336 OS
<< ' ' << getInstrCount() << " instrs.";
1337 if (TBI
.HasValidInstrDepths
&& TBI
.HasValidInstrHeights
)
1338 OS
<< ' ' << TBI
.CriticalPath
<< " cycles.";
1340 const MachineTraceMetrics::TraceBlockInfo
*Block
= &TBI
;
1341 OS
<< "\n%bb." << MBBNum
;
1342 while (Block
->hasValidDepth() && Block
->Pred
) {
1343 unsigned Num
= Block
->Pred
->getNumber();
1344 OS
<< " <- " << printMBBReference(*Block
->Pred
);
1345 Block
= &TE
.BlockInfo
[Num
];
1350 while (Block
->hasValidHeight() && Block
->Succ
) {
1351 unsigned Num
= Block
->Succ
->getNumber();
1352 OS
<< " -> " << printMBBReference(*Block
->Succ
);
1353 Block
= &TE
.BlockInfo
[Num
];