1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/MachineTraceMetrics.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/PostOrderIterator.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/SparseSet.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineInstr.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/MachineOperand.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/TargetRegisterInfo.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/CodeGen/TargetSubtargetInfo.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/raw_ostream.h"
42 #define DEBUG_TYPE "machine-trace-metrics"
44 char MachineTraceMetrics::ID
= 0;
46 char &llvm::MachineTraceMetricsID
= MachineTraceMetrics::ID
;
48 INITIALIZE_PASS_BEGIN(MachineTraceMetrics
, DEBUG_TYPE
,
49 "Machine Trace Metrics", false, true)
50 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo
)
51 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
52 INITIALIZE_PASS_END(MachineTraceMetrics
, DEBUG_TYPE
,
53 "Machine Trace Metrics", false, true)
55 MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID
) {
56 std::fill(std::begin(Ensembles
), std::end(Ensembles
), nullptr);
59 void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage
&AU
) const {
61 AU
.addRequired
<MachineBranchProbabilityInfo
>();
62 AU
.addRequired
<MachineLoopInfo
>();
63 MachineFunctionPass::getAnalysisUsage(AU
);
66 bool MachineTraceMetrics::runOnMachineFunction(MachineFunction
&Func
) {
68 const TargetSubtargetInfo
&ST
= MF
->getSubtarget();
69 TII
= ST
.getInstrInfo();
70 TRI
= ST
.getRegisterInfo();
71 MRI
= &MF
->getRegInfo();
72 Loops
= &getAnalysis
<MachineLoopInfo
>();
74 BlockInfo
.resize(MF
->getNumBlockIDs());
75 ProcResourceCycles
.resize(MF
->getNumBlockIDs() *
76 SchedModel
.getNumProcResourceKinds());
80 void MachineTraceMetrics::releaseMemory() {
83 for (unsigned i
= 0; i
!= TS_NumStrategies
; ++i
) {
85 Ensembles
[i
] = nullptr;
89 //===----------------------------------------------------------------------===//
90 // Fixed block information
91 //===----------------------------------------------------------------------===//
93 // The number of instructions in a basic block and the CPU resources used by
94 // those instructions don't depend on any given trace strategy.
96 /// Compute the resource usage in basic block MBB.
97 const MachineTraceMetrics::FixedBlockInfo
*
98 MachineTraceMetrics::getResources(const MachineBasicBlock
*MBB
) {
99 assert(MBB
&& "No basic block");
100 FixedBlockInfo
*FBI
= &BlockInfo
[MBB
->getNumber()];
101 if (FBI
->hasResources())
104 // Compute resource usage in the block.
105 FBI
->HasCalls
= false;
106 unsigned InstrCount
= 0;
108 // Add up per-processor resource cycles as well.
109 unsigned PRKinds
= SchedModel
.getNumProcResourceKinds();
110 SmallVector
<unsigned, 32> PRCycles(PRKinds
);
112 for (const auto &MI
: *MBB
) {
113 if (MI
.isTransient())
117 FBI
->HasCalls
= true;
119 // Count processor resources used.
120 if (!SchedModel
.hasInstrSchedModel())
122 const MCSchedClassDesc
*SC
= SchedModel
.resolveSchedClass(&MI
);
126 for (TargetSchedModel::ProcResIter
127 PI
= SchedModel
.getWriteProcResBegin(SC
),
128 PE
= SchedModel
.getWriteProcResEnd(SC
); PI
!= PE
; ++PI
) {
129 assert(PI
->ProcResourceIdx
< PRKinds
&& "Bad processor resource kind");
130 PRCycles
[PI
->ProcResourceIdx
] += PI
->Cycles
;
133 FBI
->InstrCount
= InstrCount
;
135 // Scale the resource cycles so they are comparable.
136 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
137 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
138 ProcResourceCycles
[PROffset
+ K
] =
139 PRCycles
[K
] * SchedModel
.getResourceFactor(K
);
145 MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum
) const {
146 assert(BlockInfo
[MBBNum
].hasResources() &&
147 "getResources() must be called before getProcResourceCycles()");
148 unsigned PRKinds
= SchedModel
.getNumProcResourceKinds();
149 assert((MBBNum
+1) * PRKinds
<= ProcResourceCycles
.size());
150 return makeArrayRef(ProcResourceCycles
.data() + MBBNum
* PRKinds
, PRKinds
);
153 //===----------------------------------------------------------------------===//
154 // Ensemble utility functions
155 //===----------------------------------------------------------------------===//
157 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics
*ct
)
159 BlockInfo
.resize(MTM
.BlockInfo
.size());
160 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
161 ProcResourceDepths
.resize(MTM
.BlockInfo
.size() * PRKinds
);
162 ProcResourceHeights
.resize(MTM
.BlockInfo
.size() * PRKinds
);
165 // Virtual destructor serves as an anchor.
166 MachineTraceMetrics::Ensemble::~Ensemble() = default;
169 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock
*MBB
) const {
170 return MTM
.Loops
->getLoopFor(MBB
);
173 // Update resource-related information in the TraceBlockInfo for MBB.
174 // Only update resources related to the trace above MBB.
175 void MachineTraceMetrics::Ensemble::
176 computeDepthResources(const MachineBasicBlock
*MBB
) {
177 TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
178 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
179 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
181 // Compute resources from trace above. The top block is simple.
184 TBI
->Head
= MBB
->getNumber();
185 std::fill(ProcResourceDepths
.begin() + PROffset
,
186 ProcResourceDepths
.begin() + PROffset
+ PRKinds
, 0);
190 // Compute from the block above. A post-order traversal ensures the
191 // predecessor is always computed first.
192 unsigned PredNum
= TBI
->Pred
->getNumber();
193 TraceBlockInfo
*PredTBI
= &BlockInfo
[PredNum
];
194 assert(PredTBI
->hasValidDepth() && "Trace above has not been computed yet");
195 const FixedBlockInfo
*PredFBI
= MTM
.getResources(TBI
->Pred
);
196 TBI
->InstrDepth
= PredTBI
->InstrDepth
+ PredFBI
->InstrCount
;
197 TBI
->Head
= PredTBI
->Head
;
199 // Compute per-resource depths.
200 ArrayRef
<unsigned> PredPRDepths
= getProcResourceDepths(PredNum
);
201 ArrayRef
<unsigned> PredPRCycles
= MTM
.getProcResourceCycles(PredNum
);
202 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
203 ProcResourceDepths
[PROffset
+ K
] = PredPRDepths
[K
] + PredPRCycles
[K
];
206 // Update resource-related information in the TraceBlockInfo for MBB.
207 // Only update resources related to the trace below MBB.
208 void MachineTraceMetrics::Ensemble::
209 computeHeightResources(const MachineBasicBlock
*MBB
) {
210 TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
211 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
212 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
214 // Compute resources for the current block.
215 TBI
->InstrHeight
= MTM
.getResources(MBB
)->InstrCount
;
216 ArrayRef
<unsigned> PRCycles
= MTM
.getProcResourceCycles(MBB
->getNumber());
218 // The trace tail is done.
220 TBI
->Tail
= MBB
->getNumber();
221 llvm::copy(PRCycles
, ProcResourceHeights
.begin() + PROffset
);
225 // Compute from the block below. A post-order traversal ensures the
226 // predecessor is always computed first.
227 unsigned SuccNum
= TBI
->Succ
->getNumber();
228 TraceBlockInfo
*SuccTBI
= &BlockInfo
[SuccNum
];
229 assert(SuccTBI
->hasValidHeight() && "Trace below has not been computed yet");
230 TBI
->InstrHeight
+= SuccTBI
->InstrHeight
;
231 TBI
->Tail
= SuccTBI
->Tail
;
233 // Compute per-resource heights.
234 ArrayRef
<unsigned> SuccPRHeights
= getProcResourceHeights(SuccNum
);
235 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
236 ProcResourceHeights
[PROffset
+ K
] = SuccPRHeights
[K
] + PRCycles
[K
];
239 // Check if depth resources for MBB are valid and return the TBI.
240 // Return NULL if the resources have been invalidated.
241 const MachineTraceMetrics::TraceBlockInfo
*
242 MachineTraceMetrics::Ensemble::
243 getDepthResources(const MachineBasicBlock
*MBB
) const {
244 const TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
245 return TBI
->hasValidDepth() ? TBI
: nullptr;
248 // Check if height resources for MBB are valid and return the TBI.
249 // Return NULL if the resources have been invalidated.
250 const MachineTraceMetrics::TraceBlockInfo
*
251 MachineTraceMetrics::Ensemble::
252 getHeightResources(const MachineBasicBlock
*MBB
) const {
253 const TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
254 return TBI
->hasValidHeight() ? TBI
: nullptr;
257 /// Get an array of processor resource depths for MBB. Indexed by processor
258 /// resource kind, this array contains the scaled processor resources consumed
259 /// by all blocks preceding MBB in its trace. It does not include instructions
262 /// Compare TraceBlockInfo::InstrDepth.
264 MachineTraceMetrics::Ensemble::
265 getProcResourceDepths(unsigned MBBNum
) const {
266 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
267 assert((MBBNum
+1) * PRKinds
<= ProcResourceDepths
.size());
268 return makeArrayRef(ProcResourceDepths
.data() + MBBNum
* PRKinds
, PRKinds
);
271 /// Get an array of processor resource heights for MBB. Indexed by processor
272 /// resource kind, this array contains the scaled processor resources consumed
273 /// by this block and all blocks following it in its trace.
275 /// Compare TraceBlockInfo::InstrHeight.
277 MachineTraceMetrics::Ensemble::
278 getProcResourceHeights(unsigned MBBNum
) const {
279 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
280 assert((MBBNum
+1) * PRKinds
<= ProcResourceHeights
.size());
281 return makeArrayRef(ProcResourceHeights
.data() + MBBNum
* PRKinds
, PRKinds
);
284 //===----------------------------------------------------------------------===//
285 // Trace Selection Strategies
286 //===----------------------------------------------------------------------===//
288 // A trace selection strategy is implemented as a sub-class of Ensemble. The
289 // trace through a block B is computed by two DFS traversals of the CFG
290 // starting from B. One upwards, and one downwards. During the upwards DFS,
291 // pickTracePred() is called on the post-ordered blocks. During the downwards
292 // DFS, pickTraceSucc() is called in a post-order.
295 // We never allow traces that leave loops, but we do allow traces to enter
296 // nested loops. We also never allow traces to contain back-edges.
298 // This means that a loop header can never appear above the center block of a
299 // trace, except as the trace head. Below the center block, loop exiting edges
302 // Return true if an edge from the From loop to the To loop is leaving a loop.
303 // Either of To and From can be null.
304 static bool isExitingLoop(const MachineLoop
*From
, const MachineLoop
*To
) {
305 return From
&& !From
->contains(To
);
308 // MinInstrCountEnsemble - Pick the trace that executes the least number of
312 class MinInstrCountEnsemble
: public MachineTraceMetrics::Ensemble
{
313 const char *getName() const override
{ return "MinInstr"; }
314 const MachineBasicBlock
*pickTracePred(const MachineBasicBlock
*) override
;
315 const MachineBasicBlock
*pickTraceSucc(const MachineBasicBlock
*) override
;
318 MinInstrCountEnsemble(MachineTraceMetrics
*mtm
)
319 : MachineTraceMetrics::Ensemble(mtm
) {}
322 } // end anonymous namespace
324 // Select the preferred predecessor for MBB.
325 const MachineBasicBlock
*
326 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock
*MBB
) {
327 if (MBB
->pred_empty())
329 const MachineLoop
*CurLoop
= getLoopFor(MBB
);
330 // Don't leave loops, and never follow back-edges.
331 if (CurLoop
&& MBB
== CurLoop
->getHeader())
333 unsigned CurCount
= MTM
.getResources(MBB
)->InstrCount
;
334 const MachineBasicBlock
*Best
= nullptr;
335 unsigned BestDepth
= 0;
336 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
337 const MachineTraceMetrics::TraceBlockInfo
*PredTBI
=
338 getDepthResources(Pred
);
339 // Ignore cycles that aren't natural loops.
342 // Pick the predecessor that would give this block the smallest InstrDepth.
343 unsigned Depth
= PredTBI
->InstrDepth
+ CurCount
;
344 if (!Best
|| Depth
< BestDepth
) {
352 // Select the preferred successor for MBB.
353 const MachineBasicBlock
*
354 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock
*MBB
) {
355 if (MBB
->pred_empty())
357 const MachineLoop
*CurLoop
= getLoopFor(MBB
);
358 const MachineBasicBlock
*Best
= nullptr;
359 unsigned BestHeight
= 0;
360 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
361 // Don't consider back-edges.
362 if (CurLoop
&& Succ
== CurLoop
->getHeader())
364 // Don't consider successors exiting CurLoop.
365 if (isExitingLoop(CurLoop
, getLoopFor(Succ
)))
367 const MachineTraceMetrics::TraceBlockInfo
*SuccTBI
=
368 getHeightResources(Succ
);
369 // Ignore cycles that aren't natural loops.
372 // Pick the successor that would give this block the smallest InstrHeight.
373 unsigned Height
= SuccTBI
->InstrHeight
;
374 if (!Best
|| Height
< BestHeight
) {
382 // Get an Ensemble sub-class for the requested trace strategy.
383 MachineTraceMetrics::Ensemble
*
384 MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy
) {
385 assert(strategy
< TS_NumStrategies
&& "Invalid trace strategy enum");
386 Ensemble
*&E
= Ensembles
[strategy
];
390 // Allocate new Ensemble on demand.
392 case TS_MinInstrCount
: return (E
= new MinInstrCountEnsemble(this));
393 default: llvm_unreachable("Invalid trace strategy enum");
397 void MachineTraceMetrics::invalidate(const MachineBasicBlock
*MBB
) {
398 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB
)
400 BlockInfo
[MBB
->getNumber()].invalidate();
401 for (unsigned i
= 0; i
!= TS_NumStrategies
; ++i
)
403 Ensembles
[i
]->invalidate(MBB
);
406 void MachineTraceMetrics::verifyAnalysis() const {
410 assert(BlockInfo
.size() == MF
->getNumBlockIDs() && "Outdated BlockInfo size");
411 for (unsigned i
= 0; i
!= TS_NumStrategies
; ++i
)
413 Ensembles
[i
]->verify();
417 //===----------------------------------------------------------------------===//
419 //===----------------------------------------------------------------------===//
421 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
422 // set abstraction that confines the search to the current loop, and doesn't
428 MutableArrayRef
<MachineTraceMetrics::TraceBlockInfo
> Blocks
;
429 SmallPtrSet
<const MachineBasicBlock
*, 8> Visited
;
430 const MachineLoopInfo
*Loops
;
431 bool Downward
= false;
433 LoopBounds(MutableArrayRef
<MachineTraceMetrics::TraceBlockInfo
> blocks
,
434 const MachineLoopInfo
*loops
) : Blocks(blocks
), Loops(loops
) {}
437 } // end anonymous namespace
439 // Specialize po_iterator_storage in order to prune the post-order traversal so
440 // it is limited to the current loop and doesn't traverse the loop back edges.
444 class po_iterator_storage
<LoopBounds
, true> {
448 po_iterator_storage(LoopBounds
&lb
) : LB(lb
) {}
450 void finishPostorder(const MachineBasicBlock
*) {}
452 bool insertEdge(Optional
<const MachineBasicBlock
*> From
,
453 const MachineBasicBlock
*To
) {
454 // Skip already visited To blocks.
455 MachineTraceMetrics::TraceBlockInfo
&TBI
= LB
.Blocks
[To
->getNumber()];
456 if (LB
.Downward
? TBI
.hasValidHeight() : TBI
.hasValidDepth())
458 // From is null once when To is the trace center block.
460 if (const MachineLoop
*FromLoop
= LB
.Loops
->getLoopFor(*From
)) {
461 // Don't follow backedges, don't leave FromLoop when going upwards.
462 if ((LB
.Downward
? To
: *From
) == FromLoop
->getHeader())
464 // Don't leave FromLoop.
465 if (isExitingLoop(FromLoop
, LB
.Loops
->getLoopFor(To
)))
469 // To is a new block. Mark the block as visited in case the CFG has cycles
470 // that MachineLoopInfo didn't recognize as a natural loop.
471 return LB
.Visited
.insert(To
).second
;
475 } // end namespace llvm
477 /// Compute the trace through MBB.
478 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock
*MBB
) {
479 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
480 << printMBBReference(*MBB
) << '\n');
481 // Set up loop bounds for the backwards post-order traversal.
482 LoopBounds
Bounds(BlockInfo
, MTM
.Loops
);
484 // Run an upwards post-order search for the trace start.
485 Bounds
.Downward
= false;
486 Bounds
.Visited
.clear();
487 for (auto I
: inverse_post_order_ext(MBB
, Bounds
)) {
488 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I
) << ": ");
489 TraceBlockInfo
&TBI
= BlockInfo
[I
->getNumber()];
490 // All the predecessors have been visited, pick the preferred one.
491 TBI
.Pred
= pickTracePred(I
);
494 dbgs() << printMBBReference(*TBI
.Pred
) << '\n';
498 // The trace leading to I is now known, compute the depth resources.
499 computeDepthResources(I
);
502 // Run a downwards post-order search for the trace end.
503 Bounds
.Downward
= true;
504 Bounds
.Visited
.clear();
505 for (auto I
: post_order_ext(MBB
, Bounds
)) {
506 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I
) << ": ");
507 TraceBlockInfo
&TBI
= BlockInfo
[I
->getNumber()];
508 // All the successors have been visited, pick the preferred one.
509 TBI
.Succ
= pickTraceSucc(I
);
512 dbgs() << printMBBReference(*TBI
.Succ
) << '\n';
516 // The trace leaving I is now known, compute the height resources.
517 computeHeightResources(I
);
521 /// Invalidate traces through BadMBB.
523 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock
*BadMBB
) {
524 SmallVector
<const MachineBasicBlock
*, 16> WorkList
;
525 TraceBlockInfo
&BadTBI
= BlockInfo
[BadMBB
->getNumber()];
527 // Invalidate height resources of blocks above MBB.
528 if (BadTBI
.hasValidHeight()) {
529 BadTBI
.invalidateHeight();
530 WorkList
.push_back(BadMBB
);
532 const MachineBasicBlock
*MBB
= WorkList
.pop_back_val();
533 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB
) << ' '
534 << getName() << " height.\n");
535 // Find any MBB predecessors that have MBB as their preferred successor.
536 // They are the only ones that need to be invalidated.
537 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
538 TraceBlockInfo
&TBI
= BlockInfo
[Pred
->getNumber()];
539 if (!TBI
.hasValidHeight())
541 if (TBI
.Succ
== MBB
) {
542 TBI
.invalidateHeight();
543 WorkList
.push_back(Pred
);
546 // Verify that TBI.Succ is actually a *I successor.
547 assert((!TBI
.Succ
|| Pred
->isSuccessor(TBI
.Succ
)) && "CFG changed");
549 } while (!WorkList
.empty());
552 // Invalidate depth resources of blocks below MBB.
553 if (BadTBI
.hasValidDepth()) {
554 BadTBI
.invalidateDepth();
555 WorkList
.push_back(BadMBB
);
557 const MachineBasicBlock
*MBB
= WorkList
.pop_back_val();
558 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB
) << ' '
559 << getName() << " depth.\n");
560 // Find any MBB successors that have MBB as their preferred predecessor.
561 // They are the only ones that need to be invalidated.
562 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
563 TraceBlockInfo
&TBI
= BlockInfo
[Succ
->getNumber()];
564 if (!TBI
.hasValidDepth())
566 if (TBI
.Pred
== MBB
) {
567 TBI
.invalidateDepth();
568 WorkList
.push_back(Succ
);
571 // Verify that TBI.Pred is actually a *I predecessor.
572 assert((!TBI
.Pred
|| Succ
->isPredecessor(TBI
.Pred
)) && "CFG changed");
574 } while (!WorkList
.empty());
577 // Clear any per-instruction data. We only have to do this for BadMBB itself
578 // because the instructions in that block may change. Other blocks may be
579 // invalidated, but their instructions will stay the same, so there is no
580 // need to erase the Cycle entries. They will be overwritten when we
582 for (const auto &I
: *BadMBB
)
586 void MachineTraceMetrics::Ensemble::verify() const {
588 assert(BlockInfo
.size() == MTM
.MF
->getNumBlockIDs() &&
589 "Outdated BlockInfo size");
590 for (unsigned Num
= 0, e
= BlockInfo
.size(); Num
!= e
; ++Num
) {
591 const TraceBlockInfo
&TBI
= BlockInfo
[Num
];
592 if (TBI
.hasValidDepth() && TBI
.Pred
) {
593 const MachineBasicBlock
*MBB
= MTM
.MF
->getBlockNumbered(Num
);
594 assert(MBB
->isPredecessor(TBI
.Pred
) && "CFG doesn't match trace");
595 assert(BlockInfo
[TBI
.Pred
->getNumber()].hasValidDepth() &&
596 "Trace is broken, depth should have been invalidated.");
597 const MachineLoop
*Loop
= getLoopFor(MBB
);
598 assert(!(Loop
&& MBB
== Loop
->getHeader()) && "Trace contains backedge");
600 if (TBI
.hasValidHeight() && TBI
.Succ
) {
601 const MachineBasicBlock
*MBB
= MTM
.MF
->getBlockNumbered(Num
);
602 assert(MBB
->isSuccessor(TBI
.Succ
) && "CFG doesn't match trace");
603 assert(BlockInfo
[TBI
.Succ
->getNumber()].hasValidHeight() &&
604 "Trace is broken, height should have been invalidated.");
605 const MachineLoop
*Loop
= getLoopFor(MBB
);
606 const MachineLoop
*SuccLoop
= getLoopFor(TBI
.Succ
);
607 assert(!(Loop
&& Loop
== SuccLoop
&& TBI
.Succ
== Loop
->getHeader()) &&
608 "Trace contains backedge");
614 //===----------------------------------------------------------------------===//
616 //===----------------------------------------------------------------------===//
618 // Compute the depth and height of each instruction based on data dependencies
619 // and instruction latencies. These cycle numbers assume that the CPU can issue
620 // an infinite number of instructions per cycle as long as their dependencies
623 // A data dependency is represented as a defining MI and operand numbers on the
624 // defining and using MI.
628 const MachineInstr
*DefMI
;
632 DataDep(const MachineInstr
*DefMI
, unsigned DefOp
, unsigned UseOp
)
633 : DefMI(DefMI
), DefOp(DefOp
), UseOp(UseOp
) {}
635 /// Create a DataDep from an SSA form virtual register.
636 DataDep(const MachineRegisterInfo
*MRI
, unsigned VirtReg
, unsigned UseOp
)
638 assert(Register::isVirtualRegister(VirtReg
));
639 MachineRegisterInfo::def_iterator DefI
= MRI
->def_begin(VirtReg
);
640 assert(!DefI
.atEnd() && "Register has no defs");
641 DefMI
= DefI
->getParent();
642 DefOp
= DefI
.getOperandNo();
643 assert((++DefI
).atEnd() && "Register has multiple defs");
647 } // end anonymous namespace
649 // Get the input data dependencies that must be ready before UseMI can issue.
650 // Return true if UseMI has any physreg operands.
651 static bool getDataDeps(const MachineInstr
&UseMI
,
652 SmallVectorImpl
<DataDep
> &Deps
,
653 const MachineRegisterInfo
*MRI
) {
654 // Debug values should not be included in any calculations.
655 if (UseMI
.isDebugInstr())
658 bool HasPhysRegs
= false;
659 for (MachineInstr::const_mop_iterator I
= UseMI
.operands_begin(),
660 E
= UseMI
.operands_end(); I
!= E
; ++I
) {
661 const MachineOperand
&MO
= *I
;
664 Register Reg
= MO
.getReg();
667 if (Register::isPhysicalRegister(Reg
)) {
671 // Collect virtual register reads.
673 Deps
.push_back(DataDep(MRI
, Reg
, UseMI
.getOperandNo(I
)));
678 // Get the input data dependencies of a PHI instruction, using Pred as the
679 // preferred predecessor.
680 // This will add at most one dependency to Deps.
681 static void getPHIDeps(const MachineInstr
&UseMI
,
682 SmallVectorImpl
<DataDep
> &Deps
,
683 const MachineBasicBlock
*Pred
,
684 const MachineRegisterInfo
*MRI
) {
685 // No predecessor at the beginning of a trace. Ignore dependencies.
688 assert(UseMI
.isPHI() && UseMI
.getNumOperands() % 2 && "Bad PHI");
689 for (unsigned i
= 1; i
!= UseMI
.getNumOperands(); i
+= 2) {
690 if (UseMI
.getOperand(i
+ 1).getMBB() == Pred
) {
691 Register Reg
= UseMI
.getOperand(i
).getReg();
692 Deps
.push_back(DataDep(MRI
, Reg
, i
));
698 // Identify physreg dependencies for UseMI, and update the live regunit
699 // tracking set when scanning instructions downwards.
700 static void updatePhysDepsDownwards(const MachineInstr
*UseMI
,
701 SmallVectorImpl
<DataDep
> &Deps
,
702 SparseSet
<LiveRegUnit
> &RegUnits
,
703 const TargetRegisterInfo
*TRI
) {
704 SmallVector
<MCRegister
, 8> Kills
;
705 SmallVector
<unsigned, 8> LiveDefOps
;
707 for (MachineInstr::const_mop_iterator MI
= UseMI
->operands_begin(),
708 ME
= UseMI
->operands_end(); MI
!= ME
; ++MI
) {
709 const MachineOperand
&MO
= *MI
;
710 if (!MO
.isReg() || !MO
.getReg().isPhysical())
712 MCRegister Reg
= MO
.getReg().asMCReg();
713 // Track live defs and kills for updating RegUnits.
716 Kills
.push_back(Reg
);
718 LiveDefOps
.push_back(UseMI
->getOperandNo(MI
));
719 } else if (MO
.isKill())
720 Kills
.push_back(Reg
);
721 // Identify dependencies.
724 for (MCRegUnitIterator
Units(Reg
, TRI
); Units
.isValid(); ++Units
) {
725 SparseSet
<LiveRegUnit
>::iterator I
= RegUnits
.find(*Units
);
726 if (I
== RegUnits
.end())
728 Deps
.push_back(DataDep(I
->MI
, I
->Op
, UseMI
->getOperandNo(MI
)));
733 // Update RegUnits to reflect live registers after UseMI.
735 for (MCRegister Kill
: Kills
)
736 for (MCRegUnitIterator
Units(Kill
, TRI
); Units
.isValid(); ++Units
)
737 RegUnits
.erase(*Units
);
739 // Second, live defs.
740 for (unsigned DefOp
: LiveDefOps
) {
741 for (MCRegUnitIterator
Units(UseMI
->getOperand(DefOp
).getReg().asMCReg(),
743 Units
.isValid(); ++Units
) {
744 LiveRegUnit
&LRU
= RegUnits
[*Units
];
751 /// The length of the critical path through a trace is the maximum of two path
754 /// 1. The maximum height+depth over all instructions in the trace center block.
756 /// 2. The longest cross-block dependency chain. For small blocks, it is
757 /// possible that the critical path through the trace doesn't include any
758 /// instructions in the block.
760 /// This function computes the second number from the live-in list of the
762 unsigned MachineTraceMetrics::Ensemble::
763 computeCrossBlockCriticalPath(const TraceBlockInfo
&TBI
) {
764 assert(TBI
.HasValidInstrDepths
&& "Missing depth info");
765 assert(TBI
.HasValidInstrHeights
&& "Missing height info");
767 for (const LiveInReg
&LIR
: TBI
.LiveIns
) {
768 if (!LIR
.Reg
.isVirtual())
770 const MachineInstr
*DefMI
= MTM
.MRI
->getVRegDef(LIR
.Reg
);
771 // Ignore dependencies outside the current trace.
772 const TraceBlockInfo
&DefTBI
= BlockInfo
[DefMI
->getParent()->getNumber()];
773 if (!DefTBI
.isUsefulDominator(TBI
))
775 unsigned Len
= LIR
.Height
+ Cycles
[DefMI
].Depth
;
776 MaxLen
= std::max(MaxLen
, Len
);
781 void MachineTraceMetrics::Ensemble::
782 updateDepth(MachineTraceMetrics::TraceBlockInfo
&TBI
, const MachineInstr
&UseMI
,
783 SparseSet
<LiveRegUnit
> &RegUnits
) {
784 SmallVector
<DataDep
, 8> Deps
;
785 // Collect all data dependencies.
787 getPHIDeps(UseMI
, Deps
, TBI
.Pred
, MTM
.MRI
);
788 else if (getDataDeps(UseMI
, Deps
, MTM
.MRI
))
789 updatePhysDepsDownwards(&UseMI
, Deps
, RegUnits
, MTM
.TRI
);
791 // Filter and process dependencies, computing the earliest issue cycle.
793 for (const DataDep
&Dep
: Deps
) {
794 const TraceBlockInfo
&DepTBI
=
795 BlockInfo
[Dep
.DefMI
->getParent()->getNumber()];
796 // Ignore dependencies from outside the current trace.
797 if (!DepTBI
.isUsefulDominator(TBI
))
799 assert(DepTBI
.HasValidInstrDepths
&& "Inconsistent dependency");
800 unsigned DepCycle
= Cycles
.lookup(Dep
.DefMI
).Depth
;
801 // Add latency if DefMI is a real instruction. Transients get latency 0.
802 if (!Dep
.DefMI
->isTransient())
803 DepCycle
+= MTM
.SchedModel
804 .computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
, &UseMI
, Dep
.UseOp
);
805 Cycle
= std::max(Cycle
, DepCycle
);
807 // Remember the instruction depth.
808 InstrCycles
&MICycles
= Cycles
[&UseMI
];
809 MICycles
.Depth
= Cycle
;
811 if (TBI
.HasValidInstrHeights
) {
812 // Update critical path length.
813 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
, Cycle
+ MICycles
.Height
);
814 LLVM_DEBUG(dbgs() << TBI
.CriticalPath
<< '\t' << Cycle
<< '\t' << UseMI
);
816 LLVM_DEBUG(dbgs() << Cycle
<< '\t' << UseMI
);
820 void MachineTraceMetrics::Ensemble::
821 updateDepth(const MachineBasicBlock
*MBB
, const MachineInstr
&UseMI
,
822 SparseSet
<LiveRegUnit
> &RegUnits
) {
823 updateDepth(BlockInfo
[MBB
->getNumber()], UseMI
, RegUnits
);
826 void MachineTraceMetrics::Ensemble::
827 updateDepths(MachineBasicBlock::iterator Start
,
828 MachineBasicBlock::iterator End
,
829 SparseSet
<LiveRegUnit
> &RegUnits
) {
830 for (; Start
!= End
; Start
++)
831 updateDepth(Start
->getParent(), *Start
, RegUnits
);
834 /// Compute instruction depths for all instructions above or in MBB in its
835 /// trace. This assumes that the trace through MBB has already been computed.
836 void MachineTraceMetrics::Ensemble::
837 computeInstrDepths(const MachineBasicBlock
*MBB
) {
838 // The top of the trace may already be computed, and HasValidInstrDepths
839 // implies Head->HasValidInstrDepths, so we only need to start from the first
840 // block in the trace that needs to be recomputed.
841 SmallVector
<const MachineBasicBlock
*, 8> Stack
;
843 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
844 assert(TBI
.hasValidDepth() && "Incomplete trace");
845 if (TBI
.HasValidInstrDepths
)
847 Stack
.push_back(MBB
);
851 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
852 // in the trace. We should track any live-out physregs that were defined in
853 // the trace. This is quite rare in SSA form, typically created by CSE
854 // hoisting a compare.
855 SparseSet
<LiveRegUnit
> RegUnits
;
856 RegUnits
.setUniverse(MTM
.TRI
->getNumRegUnits());
858 // Go through trace blocks in top-down order, stopping after the center block.
859 while (!Stack
.empty()) {
860 MBB
= Stack
.pop_back_val();
861 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB
) << ":\n");
862 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
863 TBI
.HasValidInstrDepths
= true;
864 TBI
.CriticalPath
= 0;
866 // Print out resource depths here as well.
868 dbgs() << format("%7u Instructions\n", TBI
.InstrDepth
);
869 ArrayRef
<unsigned> PRDepths
= getProcResourceDepths(MBB
->getNumber());
870 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
872 unsigned Factor
= MTM
.SchedModel
.getResourceFactor(K
);
873 dbgs() << format("%6uc @ ", MTM
.getCycles(PRDepths
[K
]))
874 << MTM
.SchedModel
.getProcResource(K
)->Name
<< " ("
875 << PRDepths
[K
]/Factor
<< " ops x" << Factor
<< ")\n";
879 // Also compute the critical path length through MBB when possible.
880 if (TBI
.HasValidInstrHeights
)
881 TBI
.CriticalPath
= computeCrossBlockCriticalPath(TBI
);
883 for (const auto &UseMI
: *MBB
) {
884 updateDepth(TBI
, UseMI
, RegUnits
);
889 // Identify physreg dependencies for MI when scanning instructions upwards.
890 // Return the issue height of MI after considering any live regunits.
891 // Height is the issue height computed from virtual register dependencies alone.
892 static unsigned updatePhysDepsUpwards(const MachineInstr
&MI
, unsigned Height
,
893 SparseSet
<LiveRegUnit
> &RegUnits
,
894 const TargetSchedModel
&SchedModel
,
895 const TargetInstrInfo
*TII
,
896 const TargetRegisterInfo
*TRI
) {
897 SmallVector
<unsigned, 8> ReadOps
;
899 for (MachineInstr::const_mop_iterator MOI
= MI
.operands_begin(),
900 MOE
= MI
.operands_end();
902 const MachineOperand
&MO
= *MOI
;
905 Register Reg
= MO
.getReg();
906 if (!Register::isPhysicalRegister(Reg
))
909 ReadOps
.push_back(MI
.getOperandNo(MOI
));
912 // This is a def of Reg. Remove corresponding entries from RegUnits, and
913 // update MI Height to consider the physreg dependencies.
914 for (MCRegUnitIterator
Units(Reg
.asMCReg(), TRI
); Units
.isValid();
916 SparseSet
<LiveRegUnit
>::iterator I
= RegUnits
.find(*Units
);
917 if (I
== RegUnits
.end())
919 unsigned DepHeight
= I
->Cycle
;
920 if (!MI
.isTransient()) {
921 // We may not know the UseMI of this dependency, if it came from the
922 // live-in list. SchedModel can handle a NULL UseMI.
923 DepHeight
+= SchedModel
.computeOperandLatency(&MI
, MI
.getOperandNo(MOI
),
926 Height
= std::max(Height
, DepHeight
);
927 // This regunit is dead above MI.
932 // Now we know the height of MI. Update any regunits read.
933 for (size_t I
= 0, E
= ReadOps
.size(); I
!= E
; ++I
) {
934 MCRegister Reg
= MI
.getOperand(ReadOps
[I
]).getReg().asMCReg();
935 for (MCRegUnitIterator
Units(Reg
, TRI
); Units
.isValid(); ++Units
) {
936 LiveRegUnit
&LRU
= RegUnits
[*Units
];
937 // Set the height to the highest reader of the unit.
938 if (LRU
.Cycle
<= Height
&& LRU
.MI
!= &MI
) {
949 using MIHeightMap
= DenseMap
<const MachineInstr
*, unsigned>;
951 // Push the height of DefMI upwards if required to match UseMI.
952 // Return true if this is the first time DefMI was seen.
953 static bool pushDepHeight(const DataDep
&Dep
, const MachineInstr
&UseMI
,
954 unsigned UseHeight
, MIHeightMap
&Heights
,
955 const TargetSchedModel
&SchedModel
,
956 const TargetInstrInfo
*TII
) {
957 // Adjust height by Dep.DefMI latency.
958 if (!Dep
.DefMI
->isTransient())
959 UseHeight
+= SchedModel
.computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
, &UseMI
,
962 // Update Heights[DefMI] to be the maximum height seen.
963 MIHeightMap::iterator I
;
965 std::tie(I
, New
) = Heights
.insert(std::make_pair(Dep
.DefMI
, UseHeight
));
969 // DefMI has been pushed before. Give it the max height.
970 if (I
->second
< UseHeight
)
971 I
->second
= UseHeight
;
975 /// Assuming that the virtual register defined by DefMI:DefOp was used by
976 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
977 /// when reaching the block that contains DefMI.
978 void MachineTraceMetrics::Ensemble::
979 addLiveIns(const MachineInstr
*DefMI
, unsigned DefOp
,
980 ArrayRef
<const MachineBasicBlock
*> Trace
) {
981 assert(!Trace
.empty() && "Trace should contain at least one block");
982 Register Reg
= DefMI
->getOperand(DefOp
).getReg();
983 assert(Register::isVirtualRegister(Reg
));
984 const MachineBasicBlock
*DefMBB
= DefMI
->getParent();
986 // Reg is live-in to all blocks in Trace that follow DefMBB.
987 for (unsigned i
= Trace
.size(); i
; --i
) {
988 const MachineBasicBlock
*MBB
= Trace
[i
-1];
991 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
992 // Just add the register. The height will be updated later.
993 TBI
.LiveIns
.push_back(Reg
);
997 /// Compute instruction heights in the trace through MBB. This updates MBB and
998 /// the blocks below it in the trace. It is assumed that the trace has already
1000 void MachineTraceMetrics::Ensemble::
1001 computeInstrHeights(const MachineBasicBlock
*MBB
) {
1002 // The bottom of the trace may already be computed.
1003 // Find the blocks that need updating.
1004 SmallVector
<const MachineBasicBlock
*, 8> Stack
;
1006 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1007 assert(TBI
.hasValidHeight() && "Incomplete trace");
1008 if (TBI
.HasValidInstrHeights
)
1010 Stack
.push_back(MBB
);
1011 TBI
.LiveIns
.clear();
1015 // As we move upwards in the trace, keep track of instructions that are
1016 // required by deeper trace instructions. Map MI -> height required so far.
1017 MIHeightMap Heights
;
1019 // For physregs, the def isn't known when we see the use.
1020 // Instead, keep track of the highest use of each regunit.
1021 SparseSet
<LiveRegUnit
> RegUnits
;
1022 RegUnits
.setUniverse(MTM
.TRI
->getNumRegUnits());
1024 // If the bottom of the trace was already precomputed, initialize heights
1025 // from its live-in list.
1026 // MBB is the highest precomputed block in the trace.
1028 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1029 for (LiveInReg
&LI
: TBI
.LiveIns
) {
1030 if (LI
.Reg
.isVirtual()) {
1031 // For virtual registers, the def latency is included.
1032 unsigned &Height
= Heights
[MTM
.MRI
->getVRegDef(LI
.Reg
)];
1033 if (Height
< LI
.Height
)
1036 // For register units, the def latency is not included because we don't
1037 // know the def yet.
1038 RegUnits
[LI
.Reg
].Cycle
= LI
.Height
;
1043 // Go through the trace blocks in bottom-up order.
1044 SmallVector
<DataDep
, 8> Deps
;
1045 for (;!Stack
.empty(); Stack
.pop_back()) {
1047 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB
) << ":\n");
1048 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1049 TBI
.HasValidInstrHeights
= true;
1050 TBI
.CriticalPath
= 0;
1053 dbgs() << format("%7u Instructions\n", TBI
.InstrHeight
);
1054 ArrayRef
<unsigned> PRHeights
= getProcResourceHeights(MBB
->getNumber());
1055 for (unsigned K
= 0; K
!= PRHeights
.size(); ++K
)
1057 unsigned Factor
= MTM
.SchedModel
.getResourceFactor(K
);
1058 dbgs() << format("%6uc @ ", MTM
.getCycles(PRHeights
[K
]))
1059 << MTM
.SchedModel
.getProcResource(K
)->Name
<< " ("
1060 << PRHeights
[K
]/Factor
<< " ops x" << Factor
<< ")\n";
1064 // Get dependencies from PHIs in the trace successor.
1065 const MachineBasicBlock
*Succ
= TBI
.Succ
;
1066 // If MBB is the last block in the trace, and it has a back-edge to the
1067 // loop header, get loop-carried dependencies from PHIs in the header. For
1068 // that purpose, pretend that all the loop header PHIs have height 0.
1070 if (const MachineLoop
*Loop
= getLoopFor(MBB
))
1071 if (MBB
->isSuccessor(Loop
->getHeader()))
1072 Succ
= Loop
->getHeader();
1075 for (const auto &PHI
: *Succ
) {
1079 getPHIDeps(PHI
, Deps
, MBB
, MTM
.MRI
);
1080 if (!Deps
.empty()) {
1081 // Loop header PHI heights are all 0.
1082 unsigned Height
= TBI
.Succ
? Cycles
.lookup(&PHI
).Height
: 0;
1083 LLVM_DEBUG(dbgs() << "pred\t" << Height
<< '\t' << PHI
);
1084 if (pushDepHeight(Deps
.front(), PHI
, Height
, Heights
, MTM
.SchedModel
,
1086 addLiveIns(Deps
.front().DefMI
, Deps
.front().DefOp
, Stack
);
1091 // Go through the block backwards.
1092 for (MachineBasicBlock::const_iterator BI
= MBB
->end(), BB
= MBB
->begin();
1094 const MachineInstr
&MI
= *--BI
;
1096 // Find the MI height as determined by virtual register uses in the
1099 MIHeightMap::iterator HeightI
= Heights
.find(&MI
);
1100 if (HeightI
!= Heights
.end()) {
1101 Cycle
= HeightI
->second
;
1102 // We won't be seeing any more MI uses.
1103 Heights
.erase(HeightI
);
1106 // Don't process PHI deps. They depend on the specific predecessor, and
1107 // we'll get them when visiting the predecessor.
1109 bool HasPhysRegs
= !MI
.isPHI() && getDataDeps(MI
, Deps
, MTM
.MRI
);
1111 // There may also be regunit dependencies to include in the height.
1113 Cycle
= updatePhysDepsUpwards(MI
, Cycle
, RegUnits
, MTM
.SchedModel
,
1116 // Update the required height of any virtual registers read by MI.
1117 for (const DataDep
&Dep
: Deps
)
1118 if (pushDepHeight(Dep
, MI
, Cycle
, Heights
, MTM
.SchedModel
, MTM
.TII
))
1119 addLiveIns(Dep
.DefMI
, Dep
.DefOp
, Stack
);
1121 InstrCycles
&MICycles
= Cycles
[&MI
];
1122 MICycles
.Height
= Cycle
;
1123 if (!TBI
.HasValidInstrDepths
) {
1124 LLVM_DEBUG(dbgs() << Cycle
<< '\t' << MI
);
1127 // Update critical path length.
1128 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
, Cycle
+ MICycles
.Depth
);
1129 LLVM_DEBUG(dbgs() << TBI
.CriticalPath
<< '\t' << Cycle
<< '\t' << MI
);
1132 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1133 // height because the final height isn't known until now.
1134 LLVM_DEBUG(dbgs() << printMBBReference(*MBB
) << " Live-ins:");
1135 for (LiveInReg
&LIR
: TBI
.LiveIns
) {
1136 const MachineInstr
*DefMI
= MTM
.MRI
->getVRegDef(LIR
.Reg
);
1137 LIR
.Height
= Heights
.lookup(DefMI
);
1138 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR
.Reg
) << '@' << LIR
.Height
);
1141 // Transfer the live regunits to the live-in list.
1142 for (SparseSet
<LiveRegUnit
>::const_iterator
1143 RI
= RegUnits
.begin(), RE
= RegUnits
.end(); RI
!= RE
; ++RI
) {
1144 TBI
.LiveIns
.push_back(LiveInReg(RI
->RegUnit
, RI
->Cycle
));
1145 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI
->RegUnit
, MTM
.TRI
) << '@'
1148 LLVM_DEBUG(dbgs() << '\n');
1150 if (!TBI
.HasValidInstrDepths
)
1152 // Add live-ins to the critical path length.
1153 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
,
1154 computeCrossBlockCriticalPath(TBI
));
1155 LLVM_DEBUG(dbgs() << "Critical path: " << TBI
.CriticalPath
<< '\n');
1159 MachineTraceMetrics::Trace
1160 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock
*MBB
) {
1161 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1163 if (!TBI
.hasValidDepth() || !TBI
.hasValidHeight())
1165 if (!TBI
.HasValidInstrDepths
)
1166 computeInstrDepths(MBB
);
1167 if (!TBI
.HasValidInstrHeights
)
1168 computeInstrHeights(MBB
);
1170 return Trace(*this, TBI
);
1174 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr
&MI
) const {
1175 assert(getBlockNum() == unsigned(MI
.getParent()->getNumber()) &&
1176 "MI must be in the trace center block");
1177 InstrCycles Cyc
= getInstrCycles(MI
);
1178 return getCriticalPath() - (Cyc
.Depth
+ Cyc
.Height
);
1182 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr
&PHI
) const {
1183 const MachineBasicBlock
*MBB
= TE
.MTM
.MF
->getBlockNumbered(getBlockNum());
1184 SmallVector
<DataDep
, 1> Deps
;
1185 getPHIDeps(PHI
, Deps
, MBB
, TE
.MTM
.MRI
);
1186 assert(Deps
.size() == 1 && "PHI doesn't have MBB as a predecessor");
1187 DataDep
&Dep
= Deps
.front();
1188 unsigned DepCycle
= getInstrCycles(*Dep
.DefMI
).Depth
;
1189 // Add latency if DefMI is a real instruction. Transients get latency 0.
1190 if (!Dep
.DefMI
->isTransient())
1191 DepCycle
+= TE
.MTM
.SchedModel
.computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
,
1196 /// When bottom is set include instructions in current block in estimate.
1197 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom
) const {
1198 // Find the limiting processor resource.
1199 // Numbers have been pre-scaled to be comparable.
1201 ArrayRef
<unsigned> PRDepths
= TE
.getProcResourceDepths(getBlockNum());
1203 ArrayRef
<unsigned> PRCycles
= TE
.MTM
.getProcResourceCycles(getBlockNum());
1204 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
1205 PRMax
= std::max(PRMax
, PRDepths
[K
] + PRCycles
[K
]);
1207 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
1208 PRMax
= std::max(PRMax
, PRDepths
[K
]);
1210 // Convert to cycle count.
1211 PRMax
= TE
.MTM
.getCycles(PRMax
);
1213 /// All instructions before current block
1214 unsigned Instrs
= TBI
.InstrDepth
;
1215 // plus instructions in current block
1217 Instrs
+= TE
.MTM
.BlockInfo
[getBlockNum()].InstrCount
;
1218 if (unsigned IW
= TE
.MTM
.SchedModel
.getIssueWidth())
1220 // Assume issue width 1 without a schedule model.
1221 return std::max(Instrs
, PRMax
);
1224 unsigned MachineTraceMetrics::Trace::getResourceLength(
1225 ArrayRef
<const MachineBasicBlock
*> Extrablocks
,
1226 ArrayRef
<const MCSchedClassDesc
*> ExtraInstrs
,
1227 ArrayRef
<const MCSchedClassDesc
*> RemoveInstrs
) const {
1228 // Add up resources above and below the center block.
1229 ArrayRef
<unsigned> PRDepths
= TE
.getProcResourceDepths(getBlockNum());
1230 ArrayRef
<unsigned> PRHeights
= TE
.getProcResourceHeights(getBlockNum());
1233 // Capture computing cycles from extra instructions
1234 auto extraCycles
= [this](ArrayRef
<const MCSchedClassDesc
*> Instrs
,
1235 unsigned ResourceIdx
)
1237 unsigned Cycles
= 0;
1238 for (const MCSchedClassDesc
*SC
: Instrs
) {
1241 for (TargetSchedModel::ProcResIter
1242 PI
= TE
.MTM
.SchedModel
.getWriteProcResBegin(SC
),
1243 PE
= TE
.MTM
.SchedModel
.getWriteProcResEnd(SC
);
1245 if (PI
->ProcResourceIdx
!= ResourceIdx
)
1248 (PI
->Cycles
* TE
.MTM
.SchedModel
.getResourceFactor(ResourceIdx
));
1254 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
) {
1255 unsigned PRCycles
= PRDepths
[K
] + PRHeights
[K
];
1256 for (const MachineBasicBlock
*MBB
: Extrablocks
)
1257 PRCycles
+= TE
.MTM
.getProcResourceCycles(MBB
->getNumber())[K
];
1258 PRCycles
+= extraCycles(ExtraInstrs
, K
);
1259 PRCycles
-= extraCycles(RemoveInstrs
, K
);
1260 PRMax
= std::max(PRMax
, PRCycles
);
1262 // Convert to cycle count.
1263 PRMax
= TE
.MTM
.getCycles(PRMax
);
1265 // Instrs: #instructions in current trace outside current block.
1266 unsigned Instrs
= TBI
.InstrDepth
+ TBI
.InstrHeight
;
1267 // Add instruction count from the extra blocks.
1268 for (const MachineBasicBlock
*MBB
: Extrablocks
)
1269 Instrs
+= TE
.MTM
.getResources(MBB
)->InstrCount
;
1270 Instrs
+= ExtraInstrs
.size();
1271 Instrs
-= RemoveInstrs
.size();
1272 if (unsigned IW
= TE
.MTM
.SchedModel
.getIssueWidth())
1274 // Assume issue width 1 without a schedule model.
1275 return std::max(Instrs
, PRMax
);
1278 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr
&DefMI
,
1279 const MachineInstr
&UseMI
) const {
1280 if (DefMI
.getParent() == UseMI
.getParent())
1283 const TraceBlockInfo
&DepTBI
= TE
.BlockInfo
[DefMI
.getParent()->getNumber()];
1284 const TraceBlockInfo
&TBI
= TE
.BlockInfo
[UseMI
.getParent()->getNumber()];
1286 return DepTBI
.isUsefulDominator(TBI
);
1289 void MachineTraceMetrics::Ensemble::print(raw_ostream
&OS
) const {
1290 OS
<< getName() << " ensemble:\n";
1291 for (unsigned i
= 0, e
= BlockInfo
.size(); i
!= e
; ++i
) {
1292 OS
<< " %bb." << i
<< '\t';
1293 BlockInfo
[i
].print(OS
);
1298 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream
&OS
) const {
1299 if (hasValidDepth()) {
1300 OS
<< "depth=" << InstrDepth
;
1302 OS
<< " pred=" << printMBBReference(*Pred
);
1305 OS
<< " head=%bb." << Head
;
1306 if (HasValidInstrDepths
)
1309 OS
<< "depth invalid";
1311 if (hasValidHeight()) {
1312 OS
<< "height=" << InstrHeight
;
1314 OS
<< " succ=" << printMBBReference(*Succ
);
1317 OS
<< " tail=%bb." << Tail
;
1318 if (HasValidInstrHeights
)
1321 OS
<< "height invalid";
1322 if (HasValidInstrDepths
&& HasValidInstrHeights
)
1323 OS
<< ", crit=" << CriticalPath
;
1326 void MachineTraceMetrics::Trace::print(raw_ostream
&OS
) const {
1327 unsigned MBBNum
= &TBI
- &TE
.BlockInfo
[0];
1329 OS
<< TE
.getName() << " trace %bb." << TBI
.Head
<< " --> %bb." << MBBNum
1330 << " --> %bb." << TBI
.Tail
<< ':';
1331 if (TBI
.hasValidHeight() && TBI
.hasValidDepth())
1332 OS
<< ' ' << getInstrCount() << " instrs.";
1333 if (TBI
.HasValidInstrDepths
&& TBI
.HasValidInstrHeights
)
1334 OS
<< ' ' << TBI
.CriticalPath
<< " cycles.";
1336 const MachineTraceMetrics::TraceBlockInfo
*Block
= &TBI
;
1337 OS
<< "\n%bb." << MBBNum
;
1338 while (Block
->hasValidDepth() && Block
->Pred
) {
1339 unsigned Num
= Block
->Pred
->getNumber();
1340 OS
<< " <- " << printMBBReference(*Block
->Pred
);
1341 Block
= &TE
.BlockInfo
[Num
];
1346 while (Block
->hasValidHeight() && Block
->Succ
) {
1347 unsigned Num
= Block
->Succ
->getNumber();
1348 OS
<< " -> " << printMBBReference(*Block
->Succ
);
1349 Block
= &TE
.BlockInfo
[Num
];