1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/MachineTraceMetrics.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/PostOrderIterator.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/SparseSet.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineInstr.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/MachineOperand.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/TargetRegisterInfo.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/CodeGen/TargetSubtargetInfo.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/Format.h"
32 #include "llvm/Support/raw_ostream.h"
41 #define DEBUG_TYPE "machine-trace-metrics"
43 char MachineTraceMetrics::ID
= 0;
45 char &llvm::MachineTraceMetricsID
= MachineTraceMetrics::ID
;
47 INITIALIZE_PASS_BEGIN(MachineTraceMetrics
, DEBUG_TYPE
,
48 "Machine Trace Metrics", false, true)
49 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo
)
50 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
51 INITIALIZE_PASS_END(MachineTraceMetrics
, DEBUG_TYPE
,
52 "Machine Trace Metrics", false, true)
54 MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID
) {
55 std::fill(std::begin(Ensembles
), std::end(Ensembles
), nullptr);
58 void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage
&AU
) const {
60 AU
.addRequired
<MachineBranchProbabilityInfo
>();
61 AU
.addRequired
<MachineLoopInfo
>();
62 MachineFunctionPass::getAnalysisUsage(AU
);
65 bool MachineTraceMetrics::runOnMachineFunction(MachineFunction
&Func
) {
67 const TargetSubtargetInfo
&ST
= MF
->getSubtarget();
68 TII
= ST
.getInstrInfo();
69 TRI
= ST
.getRegisterInfo();
70 MRI
= &MF
->getRegInfo();
71 Loops
= &getAnalysis
<MachineLoopInfo
>();
73 BlockInfo
.resize(MF
->getNumBlockIDs());
74 ProcResourceCycles
.resize(MF
->getNumBlockIDs() *
75 SchedModel
.getNumProcResourceKinds());
79 void MachineTraceMetrics::releaseMemory() {
82 for (unsigned i
= 0; i
!= TS_NumStrategies
; ++i
) {
84 Ensembles
[i
] = nullptr;
88 //===----------------------------------------------------------------------===//
89 // Fixed block information
90 //===----------------------------------------------------------------------===//
92 // The number of instructions in a basic block and the CPU resources used by
93 // those instructions don't depend on any given trace strategy.
95 /// Compute the resource usage in basic block MBB.
96 const MachineTraceMetrics::FixedBlockInfo
*
97 MachineTraceMetrics::getResources(const MachineBasicBlock
*MBB
) {
98 assert(MBB
&& "No basic block");
99 FixedBlockInfo
*FBI
= &BlockInfo
[MBB
->getNumber()];
100 if (FBI
->hasResources())
103 // Compute resource usage in the block.
104 FBI
->HasCalls
= false;
105 unsigned InstrCount
= 0;
107 // Add up per-processor resource cycles as well.
108 unsigned PRKinds
= SchedModel
.getNumProcResourceKinds();
109 SmallVector
<unsigned, 32> PRCycles(PRKinds
);
111 for (const auto &MI
: *MBB
) {
112 if (MI
.isTransient())
116 FBI
->HasCalls
= true;
118 // Count processor resources used.
119 if (!SchedModel
.hasInstrSchedModel())
121 const MCSchedClassDesc
*SC
= SchedModel
.resolveSchedClass(&MI
);
125 for (TargetSchedModel::ProcResIter
126 PI
= SchedModel
.getWriteProcResBegin(SC
),
127 PE
= SchedModel
.getWriteProcResEnd(SC
); PI
!= PE
; ++PI
) {
128 assert(PI
->ProcResourceIdx
< PRKinds
&& "Bad processor resource kind");
129 PRCycles
[PI
->ProcResourceIdx
] += PI
->Cycles
;
132 FBI
->InstrCount
= InstrCount
;
134 // Scale the resource cycles so they are comparable.
135 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
136 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
137 ProcResourceCycles
[PROffset
+ K
] =
138 PRCycles
[K
] * SchedModel
.getResourceFactor(K
);
144 MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum
) const {
145 assert(BlockInfo
[MBBNum
].hasResources() &&
146 "getResources() must be called before getProcResourceCycles()");
147 unsigned PRKinds
= SchedModel
.getNumProcResourceKinds();
148 assert((MBBNum
+1) * PRKinds
<= ProcResourceCycles
.size());
149 return makeArrayRef(ProcResourceCycles
.data() + MBBNum
* PRKinds
, PRKinds
);
152 //===----------------------------------------------------------------------===//
153 // Ensemble utility functions
154 //===----------------------------------------------------------------------===//
156 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics
*ct
)
158 BlockInfo
.resize(MTM
.BlockInfo
.size());
159 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
160 ProcResourceDepths
.resize(MTM
.BlockInfo
.size() * PRKinds
);
161 ProcResourceHeights
.resize(MTM
.BlockInfo
.size() * PRKinds
);
164 // Virtual destructor serves as an anchor.
165 MachineTraceMetrics::Ensemble::~Ensemble() = default;
168 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock
*MBB
) const {
169 return MTM
.Loops
->getLoopFor(MBB
);
172 // Update resource-related information in the TraceBlockInfo for MBB.
173 // Only update resources related to the trace above MBB.
174 void MachineTraceMetrics::Ensemble::
175 computeDepthResources(const MachineBasicBlock
*MBB
) {
176 TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
177 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
178 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
180 // Compute resources from trace above. The top block is simple.
183 TBI
->Head
= MBB
->getNumber();
184 std::fill(ProcResourceDepths
.begin() + PROffset
,
185 ProcResourceDepths
.begin() + PROffset
+ PRKinds
, 0);
189 // Compute from the block above. A post-order traversal ensures the
190 // predecessor is always computed first.
191 unsigned PredNum
= TBI
->Pred
->getNumber();
192 TraceBlockInfo
*PredTBI
= &BlockInfo
[PredNum
];
193 assert(PredTBI
->hasValidDepth() && "Trace above has not been computed yet");
194 const FixedBlockInfo
*PredFBI
= MTM
.getResources(TBI
->Pred
);
195 TBI
->InstrDepth
= PredTBI
->InstrDepth
+ PredFBI
->InstrCount
;
196 TBI
->Head
= PredTBI
->Head
;
198 // Compute per-resource depths.
199 ArrayRef
<unsigned> PredPRDepths
= getProcResourceDepths(PredNum
);
200 ArrayRef
<unsigned> PredPRCycles
= MTM
.getProcResourceCycles(PredNum
);
201 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
202 ProcResourceDepths
[PROffset
+ K
] = PredPRDepths
[K
] + PredPRCycles
[K
];
205 // Update resource-related information in the TraceBlockInfo for MBB.
206 // Only update resources related to the trace below MBB.
207 void MachineTraceMetrics::Ensemble::
208 computeHeightResources(const MachineBasicBlock
*MBB
) {
209 TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
210 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
211 unsigned PROffset
= MBB
->getNumber() * PRKinds
;
213 // Compute resources for the current block.
214 TBI
->InstrHeight
= MTM
.getResources(MBB
)->InstrCount
;
215 ArrayRef
<unsigned> PRCycles
= MTM
.getProcResourceCycles(MBB
->getNumber());
217 // The trace tail is done.
219 TBI
->Tail
= MBB
->getNumber();
220 llvm::copy(PRCycles
, ProcResourceHeights
.begin() + PROffset
);
224 // Compute from the block below. A post-order traversal ensures the
225 // predecessor is always computed first.
226 unsigned SuccNum
= TBI
->Succ
->getNumber();
227 TraceBlockInfo
*SuccTBI
= &BlockInfo
[SuccNum
];
228 assert(SuccTBI
->hasValidHeight() && "Trace below has not been computed yet");
229 TBI
->InstrHeight
+= SuccTBI
->InstrHeight
;
230 TBI
->Tail
= SuccTBI
->Tail
;
232 // Compute per-resource heights.
233 ArrayRef
<unsigned> SuccPRHeights
= getProcResourceHeights(SuccNum
);
234 for (unsigned K
= 0; K
!= PRKinds
; ++K
)
235 ProcResourceHeights
[PROffset
+ K
] = SuccPRHeights
[K
] + PRCycles
[K
];
238 // Check if depth resources for MBB are valid and return the TBI.
239 // Return NULL if the resources have been invalidated.
240 const MachineTraceMetrics::TraceBlockInfo
*
241 MachineTraceMetrics::Ensemble::
242 getDepthResources(const MachineBasicBlock
*MBB
) const {
243 const TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
244 return TBI
->hasValidDepth() ? TBI
: nullptr;
247 // Check if height resources for MBB are valid and return the TBI.
248 // Return NULL if the resources have been invalidated.
249 const MachineTraceMetrics::TraceBlockInfo
*
250 MachineTraceMetrics::Ensemble::
251 getHeightResources(const MachineBasicBlock
*MBB
) const {
252 const TraceBlockInfo
*TBI
= &BlockInfo
[MBB
->getNumber()];
253 return TBI
->hasValidHeight() ? TBI
: nullptr;
256 /// Get an array of processor resource depths for MBB. Indexed by processor
257 /// resource kind, this array contains the scaled processor resources consumed
258 /// by all blocks preceding MBB in its trace. It does not include instructions
261 /// Compare TraceBlockInfo::InstrDepth.
263 MachineTraceMetrics::Ensemble::
264 getProcResourceDepths(unsigned MBBNum
) const {
265 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
266 assert((MBBNum
+1) * PRKinds
<= ProcResourceDepths
.size());
267 return makeArrayRef(ProcResourceDepths
.data() + MBBNum
* PRKinds
, PRKinds
);
270 /// Get an array of processor resource heights for MBB. Indexed by processor
271 /// resource kind, this array contains the scaled processor resources consumed
272 /// by this block and all blocks following it in its trace.
274 /// Compare TraceBlockInfo::InstrHeight.
276 MachineTraceMetrics::Ensemble::
277 getProcResourceHeights(unsigned MBBNum
) const {
278 unsigned PRKinds
= MTM
.SchedModel
.getNumProcResourceKinds();
279 assert((MBBNum
+1) * PRKinds
<= ProcResourceHeights
.size());
280 return makeArrayRef(ProcResourceHeights
.data() + MBBNum
* PRKinds
, PRKinds
);
283 //===----------------------------------------------------------------------===//
284 // Trace Selection Strategies
285 //===----------------------------------------------------------------------===//
287 // A trace selection strategy is implemented as a sub-class of Ensemble. The
288 // trace through a block B is computed by two DFS traversals of the CFG
289 // starting from B. One upwards, and one downwards. During the upwards DFS,
290 // pickTracePred() is called on the post-ordered blocks. During the downwards
291 // DFS, pickTraceSucc() is called in a post-order.
294 // We never allow traces that leave loops, but we do allow traces to enter
295 // nested loops. We also never allow traces to contain back-edges.
297 // This means that a loop header can never appear above the center block of a
298 // trace, except as the trace head. Below the center block, loop exiting edges
301 // Return true if an edge from the From loop to the To loop is leaving a loop.
302 // Either of To and From can be null.
303 static bool isExitingLoop(const MachineLoop
*From
, const MachineLoop
*To
) {
304 return From
&& !From
->contains(To
);
307 // MinInstrCountEnsemble - Pick the trace that executes the least number of
311 class MinInstrCountEnsemble
: public MachineTraceMetrics::Ensemble
{
312 const char *getName() const override
{ return "MinInstr"; }
313 const MachineBasicBlock
*pickTracePred(const MachineBasicBlock
*) override
;
314 const MachineBasicBlock
*pickTraceSucc(const MachineBasicBlock
*) override
;
317 MinInstrCountEnsemble(MachineTraceMetrics
*mtm
)
318 : MachineTraceMetrics::Ensemble(mtm
) {}
321 } // end anonymous namespace
323 // Select the preferred predecessor for MBB.
324 const MachineBasicBlock
*
325 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock
*MBB
) {
326 if (MBB
->pred_empty())
328 const MachineLoop
*CurLoop
= getLoopFor(MBB
);
329 // Don't leave loops, and never follow back-edges.
330 if (CurLoop
&& MBB
== CurLoop
->getHeader())
332 unsigned CurCount
= MTM
.getResources(MBB
)->InstrCount
;
333 const MachineBasicBlock
*Best
= nullptr;
334 unsigned BestDepth
= 0;
335 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
336 const MachineTraceMetrics::TraceBlockInfo
*PredTBI
=
337 getDepthResources(Pred
);
338 // Ignore cycles that aren't natural loops.
341 // Pick the predecessor that would give this block the smallest InstrDepth.
342 unsigned Depth
= PredTBI
->InstrDepth
+ CurCount
;
343 if (!Best
|| Depth
< BestDepth
) {
351 // Select the preferred successor for MBB.
352 const MachineBasicBlock
*
353 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock
*MBB
) {
354 if (MBB
->pred_empty())
356 const MachineLoop
*CurLoop
= getLoopFor(MBB
);
357 const MachineBasicBlock
*Best
= nullptr;
358 unsigned BestHeight
= 0;
359 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
360 // Don't consider back-edges.
361 if (CurLoop
&& Succ
== CurLoop
->getHeader())
363 // Don't consider successors exiting CurLoop.
364 if (isExitingLoop(CurLoop
, getLoopFor(Succ
)))
366 const MachineTraceMetrics::TraceBlockInfo
*SuccTBI
=
367 getHeightResources(Succ
);
368 // Ignore cycles that aren't natural loops.
371 // Pick the successor that would give this block the smallest InstrHeight.
372 unsigned Height
= SuccTBI
->InstrHeight
;
373 if (!Best
|| Height
< BestHeight
) {
381 // Get an Ensemble sub-class for the requested trace strategy.
382 MachineTraceMetrics::Ensemble
*
383 MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy
) {
384 assert(strategy
< TS_NumStrategies
&& "Invalid trace strategy enum");
385 Ensemble
*&E
= Ensembles
[strategy
];
389 // Allocate new Ensemble on demand.
391 case TS_MinInstrCount
: return (E
= new MinInstrCountEnsemble(this));
392 default: llvm_unreachable("Invalid trace strategy enum");
396 void MachineTraceMetrics::invalidate(const MachineBasicBlock
*MBB
) {
397 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB
)
399 BlockInfo
[MBB
->getNumber()].invalidate();
400 for (unsigned i
= 0; i
!= TS_NumStrategies
; ++i
)
402 Ensembles
[i
]->invalidate(MBB
);
405 void MachineTraceMetrics::verifyAnalysis() const {
409 assert(BlockInfo
.size() == MF
->getNumBlockIDs() && "Outdated BlockInfo size");
410 for (unsigned i
= 0; i
!= TS_NumStrategies
; ++i
)
412 Ensembles
[i
]->verify();
416 //===----------------------------------------------------------------------===//
418 //===----------------------------------------------------------------------===//
420 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
421 // set abstraction that confines the search to the current loop, and doesn't
427 MutableArrayRef
<MachineTraceMetrics::TraceBlockInfo
> Blocks
;
428 SmallPtrSet
<const MachineBasicBlock
*, 8> Visited
;
429 const MachineLoopInfo
*Loops
;
430 bool Downward
= false;
432 LoopBounds(MutableArrayRef
<MachineTraceMetrics::TraceBlockInfo
> blocks
,
433 const MachineLoopInfo
*loops
) : Blocks(blocks
), Loops(loops
) {}
436 } // end anonymous namespace
438 // Specialize po_iterator_storage in order to prune the post-order traversal so
439 // it is limited to the current loop and doesn't traverse the loop back edges.
443 class po_iterator_storage
<LoopBounds
, true> {
447 po_iterator_storage(LoopBounds
&lb
) : LB(lb
) {}
449 void finishPostorder(const MachineBasicBlock
*) {}
451 bool insertEdge(Optional
<const MachineBasicBlock
*> From
,
452 const MachineBasicBlock
*To
) {
453 // Skip already visited To blocks.
454 MachineTraceMetrics::TraceBlockInfo
&TBI
= LB
.Blocks
[To
->getNumber()];
455 if (LB
.Downward
? TBI
.hasValidHeight() : TBI
.hasValidDepth())
457 // From is null once when To is the trace center block.
459 if (const MachineLoop
*FromLoop
= LB
.Loops
->getLoopFor(*From
)) {
460 // Don't follow backedges, don't leave FromLoop when going upwards.
461 if ((LB
.Downward
? To
: *From
) == FromLoop
->getHeader())
463 // Don't leave FromLoop.
464 if (isExitingLoop(FromLoop
, LB
.Loops
->getLoopFor(To
)))
468 // To is a new block. Mark the block as visited in case the CFG has cycles
469 // that MachineLoopInfo didn't recognize as a natural loop.
470 return LB
.Visited
.insert(To
).second
;
474 } // end namespace llvm
476 /// Compute the trace through MBB.
477 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock
*MBB
) {
478 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
479 << printMBBReference(*MBB
) << '\n');
480 // Set up loop bounds for the backwards post-order traversal.
481 LoopBounds
Bounds(BlockInfo
, MTM
.Loops
);
483 // Run an upwards post-order search for the trace start.
484 Bounds
.Downward
= false;
485 Bounds
.Visited
.clear();
486 for (auto I
: inverse_post_order_ext(MBB
, Bounds
)) {
487 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I
) << ": ");
488 TraceBlockInfo
&TBI
= BlockInfo
[I
->getNumber()];
489 // All the predecessors have been visited, pick the preferred one.
490 TBI
.Pred
= pickTracePred(I
);
493 dbgs() << printMBBReference(*TBI
.Pred
) << '\n';
497 // The trace leading to I is now known, compute the depth resources.
498 computeDepthResources(I
);
501 // Run a downwards post-order search for the trace end.
502 Bounds
.Downward
= true;
503 Bounds
.Visited
.clear();
504 for (auto I
: post_order_ext(MBB
, Bounds
)) {
505 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I
) << ": ");
506 TraceBlockInfo
&TBI
= BlockInfo
[I
->getNumber()];
507 // All the successors have been visited, pick the preferred one.
508 TBI
.Succ
= pickTraceSucc(I
);
511 dbgs() << printMBBReference(*TBI
.Succ
) << '\n';
515 // The trace leaving I is now known, compute the height resources.
516 computeHeightResources(I
);
520 /// Invalidate traces through BadMBB.
522 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock
*BadMBB
) {
523 SmallVector
<const MachineBasicBlock
*, 16> WorkList
;
524 TraceBlockInfo
&BadTBI
= BlockInfo
[BadMBB
->getNumber()];
526 // Invalidate height resources of blocks above MBB.
527 if (BadTBI
.hasValidHeight()) {
528 BadTBI
.invalidateHeight();
529 WorkList
.push_back(BadMBB
);
531 const MachineBasicBlock
*MBB
= WorkList
.pop_back_val();
532 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB
) << ' '
533 << getName() << " height.\n");
534 // Find any MBB predecessors that have MBB as their preferred successor.
535 // They are the only ones that need to be invalidated.
536 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
537 TraceBlockInfo
&TBI
= BlockInfo
[Pred
->getNumber()];
538 if (!TBI
.hasValidHeight())
540 if (TBI
.Succ
== MBB
) {
541 TBI
.invalidateHeight();
542 WorkList
.push_back(Pred
);
545 // Verify that TBI.Succ is actually a *I successor.
546 assert((!TBI
.Succ
|| Pred
->isSuccessor(TBI
.Succ
)) && "CFG changed");
548 } while (!WorkList
.empty());
551 // Invalidate depth resources of blocks below MBB.
552 if (BadTBI
.hasValidDepth()) {
553 BadTBI
.invalidateDepth();
554 WorkList
.push_back(BadMBB
);
556 const MachineBasicBlock
*MBB
= WorkList
.pop_back_val();
557 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB
) << ' '
558 << getName() << " depth.\n");
559 // Find any MBB successors that have MBB as their preferred predecessor.
560 // They are the only ones that need to be invalidated.
561 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
562 TraceBlockInfo
&TBI
= BlockInfo
[Succ
->getNumber()];
563 if (!TBI
.hasValidDepth())
565 if (TBI
.Pred
== MBB
) {
566 TBI
.invalidateDepth();
567 WorkList
.push_back(Succ
);
570 // Verify that TBI.Pred is actually a *I predecessor.
571 assert((!TBI
.Pred
|| Succ
->isPredecessor(TBI
.Pred
)) && "CFG changed");
573 } while (!WorkList
.empty());
576 // Clear any per-instruction data. We only have to do this for BadMBB itself
577 // because the instructions in that block may change. Other blocks may be
578 // invalidated, but their instructions will stay the same, so there is no
579 // need to erase the Cycle entries. They will be overwritten when we
581 for (const auto &I
: *BadMBB
)
585 void MachineTraceMetrics::Ensemble::verify() const {
587 assert(BlockInfo
.size() == MTM
.MF
->getNumBlockIDs() &&
588 "Outdated BlockInfo size");
589 for (unsigned Num
= 0, e
= BlockInfo
.size(); Num
!= e
; ++Num
) {
590 const TraceBlockInfo
&TBI
= BlockInfo
[Num
];
591 if (TBI
.hasValidDepth() && TBI
.Pred
) {
592 const MachineBasicBlock
*MBB
= MTM
.MF
->getBlockNumbered(Num
);
593 assert(MBB
->isPredecessor(TBI
.Pred
) && "CFG doesn't match trace");
594 assert(BlockInfo
[TBI
.Pred
->getNumber()].hasValidDepth() &&
595 "Trace is broken, depth should have been invalidated.");
596 const MachineLoop
*Loop
= getLoopFor(MBB
);
597 assert(!(Loop
&& MBB
== Loop
->getHeader()) && "Trace contains backedge");
599 if (TBI
.hasValidHeight() && TBI
.Succ
) {
600 const MachineBasicBlock
*MBB
= MTM
.MF
->getBlockNumbered(Num
);
601 assert(MBB
->isSuccessor(TBI
.Succ
) && "CFG doesn't match trace");
602 assert(BlockInfo
[TBI
.Succ
->getNumber()].hasValidHeight() &&
603 "Trace is broken, height should have been invalidated.");
604 const MachineLoop
*Loop
= getLoopFor(MBB
);
605 const MachineLoop
*SuccLoop
= getLoopFor(TBI
.Succ
);
606 assert(!(Loop
&& Loop
== SuccLoop
&& TBI
.Succ
== Loop
->getHeader()) &&
607 "Trace contains backedge");
613 //===----------------------------------------------------------------------===//
615 //===----------------------------------------------------------------------===//
617 // Compute the depth and height of each instruction based on data dependencies
618 // and instruction latencies. These cycle numbers assume that the CPU can issue
619 // an infinite number of instructions per cycle as long as their dependencies
622 // A data dependency is represented as a defining MI and operand numbers on the
623 // defining and using MI.
627 const MachineInstr
*DefMI
;
631 DataDep(const MachineInstr
*DefMI
, unsigned DefOp
, unsigned UseOp
)
632 : DefMI(DefMI
), DefOp(DefOp
), UseOp(UseOp
) {}
634 /// Create a DataDep from an SSA form virtual register.
635 DataDep(const MachineRegisterInfo
*MRI
, unsigned VirtReg
, unsigned UseOp
)
637 assert(Register::isVirtualRegister(VirtReg
));
638 MachineRegisterInfo::def_iterator DefI
= MRI
->def_begin(VirtReg
);
639 assert(!DefI
.atEnd() && "Register has no defs");
640 DefMI
= DefI
->getParent();
641 DefOp
= DefI
.getOperandNo();
642 assert((++DefI
).atEnd() && "Register has multiple defs");
646 } // end anonymous namespace
648 // Get the input data dependencies that must be ready before UseMI can issue.
649 // Return true if UseMI has any physreg operands.
650 static bool getDataDeps(const MachineInstr
&UseMI
,
651 SmallVectorImpl
<DataDep
> &Deps
,
652 const MachineRegisterInfo
*MRI
) {
653 // Debug values should not be included in any calculations.
654 if (UseMI
.isDebugInstr())
657 bool HasPhysRegs
= false;
658 for (MachineInstr::const_mop_iterator I
= UseMI
.operands_begin(),
659 E
= UseMI
.operands_end(); I
!= E
; ++I
) {
660 const MachineOperand
&MO
= *I
;
663 Register Reg
= MO
.getReg();
666 if (Register::isPhysicalRegister(Reg
)) {
670 // Collect virtual register reads.
672 Deps
.push_back(DataDep(MRI
, Reg
, UseMI
.getOperandNo(I
)));
677 // Get the input data dependencies of a PHI instruction, using Pred as the
678 // preferred predecessor.
679 // This will add at most one dependency to Deps.
680 static void getPHIDeps(const MachineInstr
&UseMI
,
681 SmallVectorImpl
<DataDep
> &Deps
,
682 const MachineBasicBlock
*Pred
,
683 const MachineRegisterInfo
*MRI
) {
684 // No predecessor at the beginning of a trace. Ignore dependencies.
687 assert(UseMI
.isPHI() && UseMI
.getNumOperands() % 2 && "Bad PHI");
688 for (unsigned i
= 1; i
!= UseMI
.getNumOperands(); i
+= 2) {
689 if (UseMI
.getOperand(i
+ 1).getMBB() == Pred
) {
690 Register Reg
= UseMI
.getOperand(i
).getReg();
691 Deps
.push_back(DataDep(MRI
, Reg
, i
));
697 // Identify physreg dependencies for UseMI, and update the live regunit
698 // tracking set when scanning instructions downwards.
699 static void updatePhysDepsDownwards(const MachineInstr
*UseMI
,
700 SmallVectorImpl
<DataDep
> &Deps
,
701 SparseSet
<LiveRegUnit
> &RegUnits
,
702 const TargetRegisterInfo
*TRI
) {
703 SmallVector
<unsigned, 8> Kills
;
704 SmallVector
<unsigned, 8> LiveDefOps
;
706 for (MachineInstr::const_mop_iterator MI
= UseMI
->operands_begin(),
707 ME
= UseMI
->operands_end(); MI
!= ME
; ++MI
) {
708 const MachineOperand
&MO
= *MI
;
711 Register Reg
= MO
.getReg();
712 if (!Register::isPhysicalRegister(Reg
))
714 // Track live defs and kills for updating RegUnits.
717 Kills
.push_back(Reg
);
719 LiveDefOps
.push_back(UseMI
->getOperandNo(MI
));
720 } else if (MO
.isKill())
721 Kills
.push_back(Reg
);
722 // Identify dependencies.
725 for (MCRegUnitIterator
Units(Reg
, TRI
); Units
.isValid(); ++Units
) {
726 SparseSet
<LiveRegUnit
>::iterator I
= RegUnits
.find(*Units
);
727 if (I
== RegUnits
.end())
729 Deps
.push_back(DataDep(I
->MI
, I
->Op
, UseMI
->getOperandNo(MI
)));
734 // Update RegUnits to reflect live registers after UseMI.
736 for (unsigned Kill
: Kills
)
737 for (MCRegUnitIterator
Units(Kill
, TRI
); Units
.isValid(); ++Units
)
738 RegUnits
.erase(*Units
);
740 // Second, live defs.
741 for (unsigned DefOp
: LiveDefOps
) {
742 for (MCRegUnitIterator
Units(UseMI
->getOperand(DefOp
).getReg(), TRI
);
743 Units
.isValid(); ++Units
) {
744 LiveRegUnit
&LRU
= RegUnits
[*Units
];
751 /// The length of the critical path through a trace is the maximum of two path
754 /// 1. The maximum height+depth over all instructions in the trace center block.
756 /// 2. The longest cross-block dependency chain. For small blocks, it is
757 /// possible that the critical path through the trace doesn't include any
758 /// instructions in the block.
760 /// This function computes the second number from the live-in list of the
762 unsigned MachineTraceMetrics::Ensemble::
763 computeCrossBlockCriticalPath(const TraceBlockInfo
&TBI
) {
764 assert(TBI
.HasValidInstrDepths
&& "Missing depth info");
765 assert(TBI
.HasValidInstrHeights
&& "Missing height info");
767 for (const LiveInReg
&LIR
: TBI
.LiveIns
) {
768 if (!Register::isVirtualRegister(LIR
.Reg
))
770 const MachineInstr
*DefMI
= MTM
.MRI
->getVRegDef(LIR
.Reg
);
771 // Ignore dependencies outside the current trace.
772 const TraceBlockInfo
&DefTBI
= BlockInfo
[DefMI
->getParent()->getNumber()];
773 if (!DefTBI
.isUsefulDominator(TBI
))
775 unsigned Len
= LIR
.Height
+ Cycles
[DefMI
].Depth
;
776 MaxLen
= std::max(MaxLen
, Len
);
781 void MachineTraceMetrics::Ensemble::
782 updateDepth(MachineTraceMetrics::TraceBlockInfo
&TBI
, const MachineInstr
&UseMI
,
783 SparseSet
<LiveRegUnit
> &RegUnits
) {
784 SmallVector
<DataDep
, 8> Deps
;
785 // Collect all data dependencies.
787 getPHIDeps(UseMI
, Deps
, TBI
.Pred
, MTM
.MRI
);
788 else if (getDataDeps(UseMI
, Deps
, MTM
.MRI
))
789 updatePhysDepsDownwards(&UseMI
, Deps
, RegUnits
, MTM
.TRI
);
791 // Filter and process dependencies, computing the earliest issue cycle.
793 for (const DataDep
&Dep
: Deps
) {
794 const TraceBlockInfo
&DepTBI
=
795 BlockInfo
[Dep
.DefMI
->getParent()->getNumber()];
796 // Ignore dependencies from outside the current trace.
797 if (!DepTBI
.isUsefulDominator(TBI
))
799 assert(DepTBI
.HasValidInstrDepths
&& "Inconsistent dependency");
800 unsigned DepCycle
= Cycles
.lookup(Dep
.DefMI
).Depth
;
801 // Add latency if DefMI is a real instruction. Transients get latency 0.
802 if (!Dep
.DefMI
->isTransient())
803 DepCycle
+= MTM
.SchedModel
804 .computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
, &UseMI
, Dep
.UseOp
);
805 Cycle
= std::max(Cycle
, DepCycle
);
807 // Remember the instruction depth.
808 InstrCycles
&MICycles
= Cycles
[&UseMI
];
809 MICycles
.Depth
= Cycle
;
811 if (TBI
.HasValidInstrHeights
) {
812 // Update critical path length.
813 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
, Cycle
+ MICycles
.Height
);
814 LLVM_DEBUG(dbgs() << TBI
.CriticalPath
<< '\t' << Cycle
<< '\t' << UseMI
);
816 LLVM_DEBUG(dbgs() << Cycle
<< '\t' << UseMI
);
820 void MachineTraceMetrics::Ensemble::
821 updateDepth(const MachineBasicBlock
*MBB
, const MachineInstr
&UseMI
,
822 SparseSet
<LiveRegUnit
> &RegUnits
) {
823 updateDepth(BlockInfo
[MBB
->getNumber()], UseMI
, RegUnits
);
826 void MachineTraceMetrics::Ensemble::
827 updateDepths(MachineBasicBlock::iterator Start
,
828 MachineBasicBlock::iterator End
,
829 SparseSet
<LiveRegUnit
> &RegUnits
) {
830 for (; Start
!= End
; Start
++)
831 updateDepth(Start
->getParent(), *Start
, RegUnits
);
834 /// Compute instruction depths for all instructions above or in MBB in its
835 /// trace. This assumes that the trace through MBB has already been computed.
836 void MachineTraceMetrics::Ensemble::
837 computeInstrDepths(const MachineBasicBlock
*MBB
) {
838 // The top of the trace may already be computed, and HasValidInstrDepths
839 // implies Head->HasValidInstrDepths, so we only need to start from the first
840 // block in the trace that needs to be recomputed.
841 SmallVector
<const MachineBasicBlock
*, 8> Stack
;
843 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
844 assert(TBI
.hasValidDepth() && "Incomplete trace");
845 if (TBI
.HasValidInstrDepths
)
847 Stack
.push_back(MBB
);
851 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
852 // in the trace. We should track any live-out physregs that were defined in
853 // the trace. This is quite rare in SSA form, typically created by CSE
854 // hoisting a compare.
855 SparseSet
<LiveRegUnit
> RegUnits
;
856 RegUnits
.setUniverse(MTM
.TRI
->getNumRegUnits());
858 // Go through trace blocks in top-down order, stopping after the center block.
859 while (!Stack
.empty()) {
860 MBB
= Stack
.pop_back_val();
861 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB
) << ":\n");
862 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
863 TBI
.HasValidInstrDepths
= true;
864 TBI
.CriticalPath
= 0;
866 // Print out resource depths here as well.
868 dbgs() << format("%7u Instructions\n", TBI
.InstrDepth
);
869 ArrayRef
<unsigned> PRDepths
= getProcResourceDepths(MBB
->getNumber());
870 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
872 unsigned Factor
= MTM
.SchedModel
.getResourceFactor(K
);
873 dbgs() << format("%6uc @ ", MTM
.getCycles(PRDepths
[K
]))
874 << MTM
.SchedModel
.getProcResource(K
)->Name
<< " ("
875 << PRDepths
[K
]/Factor
<< " ops x" << Factor
<< ")\n";
879 // Also compute the critical path length through MBB when possible.
880 if (TBI
.HasValidInstrHeights
)
881 TBI
.CriticalPath
= computeCrossBlockCriticalPath(TBI
);
883 for (const auto &UseMI
: *MBB
) {
884 updateDepth(TBI
, UseMI
, RegUnits
);
889 // Identify physreg dependencies for MI when scanning instructions upwards.
890 // Return the issue height of MI after considering any live regunits.
891 // Height is the issue height computed from virtual register dependencies alone.
892 static unsigned updatePhysDepsUpwards(const MachineInstr
&MI
, unsigned Height
,
893 SparseSet
<LiveRegUnit
> &RegUnits
,
894 const TargetSchedModel
&SchedModel
,
895 const TargetInstrInfo
*TII
,
896 const TargetRegisterInfo
*TRI
) {
897 SmallVector
<unsigned, 8> ReadOps
;
899 for (MachineInstr::const_mop_iterator MOI
= MI
.operands_begin(),
900 MOE
= MI
.operands_end();
902 const MachineOperand
&MO
= *MOI
;
905 Register Reg
= MO
.getReg();
906 if (!Register::isPhysicalRegister(Reg
))
909 ReadOps
.push_back(MI
.getOperandNo(MOI
));
912 // This is a def of Reg. Remove corresponding entries from RegUnits, and
913 // update MI Height to consider the physreg dependencies.
914 for (MCRegUnitIterator
Units(Reg
, TRI
); Units
.isValid(); ++Units
) {
915 SparseSet
<LiveRegUnit
>::iterator I
= RegUnits
.find(*Units
);
916 if (I
== RegUnits
.end())
918 unsigned DepHeight
= I
->Cycle
;
919 if (!MI
.isTransient()) {
920 // We may not know the UseMI of this dependency, if it came from the
921 // live-in list. SchedModel can handle a NULL UseMI.
922 DepHeight
+= SchedModel
.computeOperandLatency(&MI
, MI
.getOperandNo(MOI
),
925 Height
= std::max(Height
, DepHeight
);
926 // This regunit is dead above MI.
931 // Now we know the height of MI. Update any regunits read.
932 for (unsigned i
= 0, e
= ReadOps
.size(); i
!= e
; ++i
) {
933 Register Reg
= MI
.getOperand(ReadOps
[i
]).getReg();
934 for (MCRegUnitIterator
Units(Reg
, TRI
); Units
.isValid(); ++Units
) {
935 LiveRegUnit
&LRU
= RegUnits
[*Units
];
936 // Set the height to the highest reader of the unit.
937 if (LRU
.Cycle
<= Height
&& LRU
.MI
!= &MI
) {
948 using MIHeightMap
= DenseMap
<const MachineInstr
*, unsigned>;
950 // Push the height of DefMI upwards if required to match UseMI.
951 // Return true if this is the first time DefMI was seen.
952 static bool pushDepHeight(const DataDep
&Dep
, const MachineInstr
&UseMI
,
953 unsigned UseHeight
, MIHeightMap
&Heights
,
954 const TargetSchedModel
&SchedModel
,
955 const TargetInstrInfo
*TII
) {
956 // Adjust height by Dep.DefMI latency.
957 if (!Dep
.DefMI
->isTransient())
958 UseHeight
+= SchedModel
.computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
, &UseMI
,
961 // Update Heights[DefMI] to be the maximum height seen.
962 MIHeightMap::iterator I
;
964 std::tie(I
, New
) = Heights
.insert(std::make_pair(Dep
.DefMI
, UseHeight
));
968 // DefMI has been pushed before. Give it the max height.
969 if (I
->second
< UseHeight
)
970 I
->second
= UseHeight
;
974 /// Assuming that the virtual register defined by DefMI:DefOp was used by
975 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
976 /// when reaching the block that contains DefMI.
977 void MachineTraceMetrics::Ensemble::
978 addLiveIns(const MachineInstr
*DefMI
, unsigned DefOp
,
979 ArrayRef
<const MachineBasicBlock
*> Trace
) {
980 assert(!Trace
.empty() && "Trace should contain at least one block");
981 unsigned Reg
= DefMI
->getOperand(DefOp
).getReg();
982 assert(Register::isVirtualRegister(Reg
));
983 const MachineBasicBlock
*DefMBB
= DefMI
->getParent();
985 // Reg is live-in to all blocks in Trace that follow DefMBB.
986 for (unsigned i
= Trace
.size(); i
; --i
) {
987 const MachineBasicBlock
*MBB
= Trace
[i
-1];
990 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
991 // Just add the register. The height will be updated later.
992 TBI
.LiveIns
.push_back(Reg
);
996 /// Compute instruction heights in the trace through MBB. This updates MBB and
997 /// the blocks below it in the trace. It is assumed that the trace has already
999 void MachineTraceMetrics::Ensemble::
1000 computeInstrHeights(const MachineBasicBlock
*MBB
) {
1001 // The bottom of the trace may already be computed.
1002 // Find the blocks that need updating.
1003 SmallVector
<const MachineBasicBlock
*, 8> Stack
;
1005 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1006 assert(TBI
.hasValidHeight() && "Incomplete trace");
1007 if (TBI
.HasValidInstrHeights
)
1009 Stack
.push_back(MBB
);
1010 TBI
.LiveIns
.clear();
1014 // As we move upwards in the trace, keep track of instructions that are
1015 // required by deeper trace instructions. Map MI -> height required so far.
1016 MIHeightMap Heights
;
1018 // For physregs, the def isn't known when we see the use.
1019 // Instead, keep track of the highest use of each regunit.
1020 SparseSet
<LiveRegUnit
> RegUnits
;
1021 RegUnits
.setUniverse(MTM
.TRI
->getNumRegUnits());
1023 // If the bottom of the trace was already precomputed, initialize heights
1024 // from its live-in list.
1025 // MBB is the highest precomputed block in the trace.
1027 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1028 for (LiveInReg
&LI
: TBI
.LiveIns
) {
1029 if (Register::isVirtualRegister(LI
.Reg
)) {
1030 // For virtual registers, the def latency is included.
1031 unsigned &Height
= Heights
[MTM
.MRI
->getVRegDef(LI
.Reg
)];
1032 if (Height
< LI
.Height
)
1035 // For register units, the def latency is not included because we don't
1036 // know the def yet.
1037 RegUnits
[LI
.Reg
].Cycle
= LI
.Height
;
1042 // Go through the trace blocks in bottom-up order.
1043 SmallVector
<DataDep
, 8> Deps
;
1044 for (;!Stack
.empty(); Stack
.pop_back()) {
1046 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB
) << ":\n");
1047 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1048 TBI
.HasValidInstrHeights
= true;
1049 TBI
.CriticalPath
= 0;
1052 dbgs() << format("%7u Instructions\n", TBI
.InstrHeight
);
1053 ArrayRef
<unsigned> PRHeights
= getProcResourceHeights(MBB
->getNumber());
1054 for (unsigned K
= 0; K
!= PRHeights
.size(); ++K
)
1056 unsigned Factor
= MTM
.SchedModel
.getResourceFactor(K
);
1057 dbgs() << format("%6uc @ ", MTM
.getCycles(PRHeights
[K
]))
1058 << MTM
.SchedModel
.getProcResource(K
)->Name
<< " ("
1059 << PRHeights
[K
]/Factor
<< " ops x" << Factor
<< ")\n";
1063 // Get dependencies from PHIs in the trace successor.
1064 const MachineBasicBlock
*Succ
= TBI
.Succ
;
1065 // If MBB is the last block in the trace, and it has a back-edge to the
1066 // loop header, get loop-carried dependencies from PHIs in the header. For
1067 // that purpose, pretend that all the loop header PHIs have height 0.
1069 if (const MachineLoop
*Loop
= getLoopFor(MBB
))
1070 if (MBB
->isSuccessor(Loop
->getHeader()))
1071 Succ
= Loop
->getHeader();
1074 for (const auto &PHI
: *Succ
) {
1078 getPHIDeps(PHI
, Deps
, MBB
, MTM
.MRI
);
1079 if (!Deps
.empty()) {
1080 // Loop header PHI heights are all 0.
1081 unsigned Height
= TBI
.Succ
? Cycles
.lookup(&PHI
).Height
: 0;
1082 LLVM_DEBUG(dbgs() << "pred\t" << Height
<< '\t' << PHI
);
1083 if (pushDepHeight(Deps
.front(), PHI
, Height
, Heights
, MTM
.SchedModel
,
1085 addLiveIns(Deps
.front().DefMI
, Deps
.front().DefOp
, Stack
);
1090 // Go through the block backwards.
1091 for (MachineBasicBlock::const_iterator BI
= MBB
->end(), BB
= MBB
->begin();
1093 const MachineInstr
&MI
= *--BI
;
1095 // Find the MI height as determined by virtual register uses in the
1098 MIHeightMap::iterator HeightI
= Heights
.find(&MI
);
1099 if (HeightI
!= Heights
.end()) {
1100 Cycle
= HeightI
->second
;
1101 // We won't be seeing any more MI uses.
1102 Heights
.erase(HeightI
);
1105 // Don't process PHI deps. They depend on the specific predecessor, and
1106 // we'll get them when visiting the predecessor.
1108 bool HasPhysRegs
= !MI
.isPHI() && getDataDeps(MI
, Deps
, MTM
.MRI
);
1110 // There may also be regunit dependencies to include in the height.
1112 Cycle
= updatePhysDepsUpwards(MI
, Cycle
, RegUnits
, MTM
.SchedModel
,
1115 // Update the required height of any virtual registers read by MI.
1116 for (const DataDep
&Dep
: Deps
)
1117 if (pushDepHeight(Dep
, MI
, Cycle
, Heights
, MTM
.SchedModel
, MTM
.TII
))
1118 addLiveIns(Dep
.DefMI
, Dep
.DefOp
, Stack
);
1120 InstrCycles
&MICycles
= Cycles
[&MI
];
1121 MICycles
.Height
= Cycle
;
1122 if (!TBI
.HasValidInstrDepths
) {
1123 LLVM_DEBUG(dbgs() << Cycle
<< '\t' << MI
);
1126 // Update critical path length.
1127 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
, Cycle
+ MICycles
.Depth
);
1128 LLVM_DEBUG(dbgs() << TBI
.CriticalPath
<< '\t' << Cycle
<< '\t' << MI
);
1131 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1132 // height because the final height isn't known until now.
1133 LLVM_DEBUG(dbgs() << printMBBReference(*MBB
) << " Live-ins:");
1134 for (LiveInReg
&LIR
: TBI
.LiveIns
) {
1135 const MachineInstr
*DefMI
= MTM
.MRI
->getVRegDef(LIR
.Reg
);
1136 LIR
.Height
= Heights
.lookup(DefMI
);
1137 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR
.Reg
) << '@' << LIR
.Height
);
1140 // Transfer the live regunits to the live-in list.
1141 for (SparseSet
<LiveRegUnit
>::const_iterator
1142 RI
= RegUnits
.begin(), RE
= RegUnits
.end(); RI
!= RE
; ++RI
) {
1143 TBI
.LiveIns
.push_back(LiveInReg(RI
->RegUnit
, RI
->Cycle
));
1144 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI
->RegUnit
, MTM
.TRI
) << '@'
1147 LLVM_DEBUG(dbgs() << '\n');
1149 if (!TBI
.HasValidInstrDepths
)
1151 // Add live-ins to the critical path length.
1152 TBI
.CriticalPath
= std::max(TBI
.CriticalPath
,
1153 computeCrossBlockCriticalPath(TBI
));
1154 LLVM_DEBUG(dbgs() << "Critical path: " << TBI
.CriticalPath
<< '\n');
1158 MachineTraceMetrics::Trace
1159 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock
*MBB
) {
1160 TraceBlockInfo
&TBI
= BlockInfo
[MBB
->getNumber()];
1162 if (!TBI
.hasValidDepth() || !TBI
.hasValidHeight())
1164 if (!TBI
.HasValidInstrDepths
)
1165 computeInstrDepths(MBB
);
1166 if (!TBI
.HasValidInstrHeights
)
1167 computeInstrHeights(MBB
);
1169 return Trace(*this, TBI
);
1173 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr
&MI
) const {
1174 assert(getBlockNum() == unsigned(MI
.getParent()->getNumber()) &&
1175 "MI must be in the trace center block");
1176 InstrCycles Cyc
= getInstrCycles(MI
);
1177 return getCriticalPath() - (Cyc
.Depth
+ Cyc
.Height
);
1181 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr
&PHI
) const {
1182 const MachineBasicBlock
*MBB
= TE
.MTM
.MF
->getBlockNumbered(getBlockNum());
1183 SmallVector
<DataDep
, 1> Deps
;
1184 getPHIDeps(PHI
, Deps
, MBB
, TE
.MTM
.MRI
);
1185 assert(Deps
.size() == 1 && "PHI doesn't have MBB as a predecessor");
1186 DataDep
&Dep
= Deps
.front();
1187 unsigned DepCycle
= getInstrCycles(*Dep
.DefMI
).Depth
;
1188 // Add latency if DefMI is a real instruction. Transients get latency 0.
1189 if (!Dep
.DefMI
->isTransient())
1190 DepCycle
+= TE
.MTM
.SchedModel
.computeOperandLatency(Dep
.DefMI
, Dep
.DefOp
,
1195 /// When bottom is set include instructions in current block in estimate.
1196 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom
) const {
1197 // Find the limiting processor resource.
1198 // Numbers have been pre-scaled to be comparable.
1200 ArrayRef
<unsigned> PRDepths
= TE
.getProcResourceDepths(getBlockNum());
1202 ArrayRef
<unsigned> PRCycles
= TE
.MTM
.getProcResourceCycles(getBlockNum());
1203 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
1204 PRMax
= std::max(PRMax
, PRDepths
[K
] + PRCycles
[K
]);
1206 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
)
1207 PRMax
= std::max(PRMax
, PRDepths
[K
]);
1209 // Convert to cycle count.
1210 PRMax
= TE
.MTM
.getCycles(PRMax
);
1212 /// All instructions before current block
1213 unsigned Instrs
= TBI
.InstrDepth
;
1214 // plus instructions in current block
1216 Instrs
+= TE
.MTM
.BlockInfo
[getBlockNum()].InstrCount
;
1217 if (unsigned IW
= TE
.MTM
.SchedModel
.getIssueWidth())
1219 // Assume issue width 1 without a schedule model.
1220 return std::max(Instrs
, PRMax
);
1223 unsigned MachineTraceMetrics::Trace::getResourceLength(
1224 ArrayRef
<const MachineBasicBlock
*> Extrablocks
,
1225 ArrayRef
<const MCSchedClassDesc
*> ExtraInstrs
,
1226 ArrayRef
<const MCSchedClassDesc
*> RemoveInstrs
) const {
1227 // Add up resources above and below the center block.
1228 ArrayRef
<unsigned> PRDepths
= TE
.getProcResourceDepths(getBlockNum());
1229 ArrayRef
<unsigned> PRHeights
= TE
.getProcResourceHeights(getBlockNum());
1232 // Capture computing cycles from extra instructions
1233 auto extraCycles
= [this](ArrayRef
<const MCSchedClassDesc
*> Instrs
,
1234 unsigned ResourceIdx
)
1236 unsigned Cycles
= 0;
1237 for (const MCSchedClassDesc
*SC
: Instrs
) {
1240 for (TargetSchedModel::ProcResIter
1241 PI
= TE
.MTM
.SchedModel
.getWriteProcResBegin(SC
),
1242 PE
= TE
.MTM
.SchedModel
.getWriteProcResEnd(SC
);
1244 if (PI
->ProcResourceIdx
!= ResourceIdx
)
1247 (PI
->Cycles
* TE
.MTM
.SchedModel
.getResourceFactor(ResourceIdx
));
1253 for (unsigned K
= 0; K
!= PRDepths
.size(); ++K
) {
1254 unsigned PRCycles
= PRDepths
[K
] + PRHeights
[K
];
1255 for (const MachineBasicBlock
*MBB
: Extrablocks
)
1256 PRCycles
+= TE
.MTM
.getProcResourceCycles(MBB
->getNumber())[K
];
1257 PRCycles
+= extraCycles(ExtraInstrs
, K
);
1258 PRCycles
-= extraCycles(RemoveInstrs
, K
);
1259 PRMax
= std::max(PRMax
, PRCycles
);
1261 // Convert to cycle count.
1262 PRMax
= TE
.MTM
.getCycles(PRMax
);
1264 // Instrs: #instructions in current trace outside current block.
1265 unsigned Instrs
= TBI
.InstrDepth
+ TBI
.InstrHeight
;
1266 // Add instruction count from the extra blocks.
1267 for (const MachineBasicBlock
*MBB
: Extrablocks
)
1268 Instrs
+= TE
.MTM
.getResources(MBB
)->InstrCount
;
1269 Instrs
+= ExtraInstrs
.size();
1270 Instrs
-= RemoveInstrs
.size();
1271 if (unsigned IW
= TE
.MTM
.SchedModel
.getIssueWidth())
1273 // Assume issue width 1 without a schedule model.
1274 return std::max(Instrs
, PRMax
);
1277 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr
&DefMI
,
1278 const MachineInstr
&UseMI
) const {
1279 if (DefMI
.getParent() == UseMI
.getParent())
1282 const TraceBlockInfo
&DepTBI
= TE
.BlockInfo
[DefMI
.getParent()->getNumber()];
1283 const TraceBlockInfo
&TBI
= TE
.BlockInfo
[UseMI
.getParent()->getNumber()];
1285 return DepTBI
.isUsefulDominator(TBI
);
1288 void MachineTraceMetrics::Ensemble::print(raw_ostream
&OS
) const {
1289 OS
<< getName() << " ensemble:\n";
1290 for (unsigned i
= 0, e
= BlockInfo
.size(); i
!= e
; ++i
) {
1291 OS
<< " %bb." << i
<< '\t';
1292 BlockInfo
[i
].print(OS
);
1297 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream
&OS
) const {
1298 if (hasValidDepth()) {
1299 OS
<< "depth=" << InstrDepth
;
1301 OS
<< " pred=" << printMBBReference(*Pred
);
1304 OS
<< " head=%bb." << Head
;
1305 if (HasValidInstrDepths
)
1308 OS
<< "depth invalid";
1310 if (hasValidHeight()) {
1311 OS
<< "height=" << InstrHeight
;
1313 OS
<< " succ=" << printMBBReference(*Succ
);
1316 OS
<< " tail=%bb." << Tail
;
1317 if (HasValidInstrHeights
)
1320 OS
<< "height invalid";
1321 if (HasValidInstrDepths
&& HasValidInstrHeights
)
1322 OS
<< ", crit=" << CriticalPath
;
1325 void MachineTraceMetrics::Trace::print(raw_ostream
&OS
) const {
1326 unsigned MBBNum
= &TBI
- &TE
.BlockInfo
[0];
1328 OS
<< TE
.getName() << " trace %bb." << TBI
.Head
<< " --> %bb." << MBBNum
1329 << " --> %bb." << TBI
.Tail
<< ':';
1330 if (TBI
.hasValidHeight() && TBI
.hasValidDepth())
1331 OS
<< ' ' << getInstrCount() << " instrs.";
1332 if (TBI
.HasValidInstrDepths
&& TBI
.HasValidInstrHeights
)
1333 OS
<< ' ' << TBI
.CriticalPath
<< " cycles.";
1335 const MachineTraceMetrics::TraceBlockInfo
*Block
= &TBI
;
1336 OS
<< "\n%bb." << MBBNum
;
1337 while (Block
->hasValidDepth() && Block
->Pred
) {
1338 unsigned Num
= Block
->Pred
->getNumber();
1339 OS
<< " <- " << printMBBReference(*Block
->Pred
);
1340 Block
= &TE
.BlockInfo
[Num
];
1345 while (Block
->hasValidHeight() && Block
->Succ
) {
1346 unsigned Num
= Block
->Succ
->getNumber();
1347 OS
<< " -> " << printMBBReference(*Block
->Succ
);
1348 Block
= &TE
.BlockInfo
[Num
];