1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements the functionalities used by the BottleneckAnalysis
11 /// to report bottleneck info.
13 //===----------------------------------------------------------------------===//
15 #include "Views/BottleneckAnalysis.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MCA/Support.h"
18 #include "llvm/Support/Format.h"
19 #include "llvm/Support/FormattedStream.h"
24 #define DEBUG_TYPE "llvm-mca"
26 PressureTracker::PressureTracker(const MCSchedModel
&Model
)
28 ResourcePressureDistribution(Model
.getNumProcResourceKinds(), 0),
29 ProcResID2Mask(Model
.getNumProcResourceKinds(), 0),
30 ResIdx2ProcResID(Model
.getNumProcResourceKinds(), 0),
31 ProcResID2ResourceUsersIndex(Model
.getNumProcResourceKinds(), 0) {
32 computeProcResourceMasks(SM
, ProcResID2Mask
);
34 // Ignore the invalid resource at index zero.
35 unsigned NextResourceUsersIdx
= 0;
36 for (unsigned I
= 1, E
= Model
.getNumProcResourceKinds(); I
< E
; ++I
) {
37 const MCProcResourceDesc
&ProcResource
= *SM
.getProcResource(I
);
38 ProcResID2ResourceUsersIndex
[I
] = NextResourceUsersIdx
;
39 NextResourceUsersIdx
+= ProcResource
.NumUnits
;
40 uint64_t ResourceMask
= ProcResID2Mask
[I
];
41 ResIdx2ProcResID
[getResourceStateIndex(ResourceMask
)] = I
;
44 ResourceUsers
.resize(NextResourceUsersIdx
);
45 std::fill(ResourceUsers
.begin(), ResourceUsers
.end(),
46 std::make_pair
<unsigned, unsigned>(~0U, 0U));
49 void PressureTracker::getResourceUsers(uint64_t ResourceMask
,
50 SmallVectorImpl
<User
> &Users
) const {
51 unsigned Index
= getResourceStateIndex(ResourceMask
);
52 unsigned ProcResID
= ResIdx2ProcResID
[Index
];
53 const MCProcResourceDesc
&PRDesc
= *SM
.getProcResource(ProcResID
);
54 for (unsigned I
= 0, E
= PRDesc
.NumUnits
; I
< E
; ++I
) {
55 const User U
= getResourceUser(ProcResID
, I
);
56 if (U
.second
&& IPI
.find(U
.first
) != IPI
.end())
57 Users
.emplace_back(U
);
61 void PressureTracker::onInstructionDispatched(unsigned IID
) {
62 IPI
.insert(std::make_pair(IID
, InstructionPressureInfo()));
65 void PressureTracker::onInstructionExecuted(unsigned IID
) { IPI
.erase(IID
); }
67 void PressureTracker::handleInstructionIssuedEvent(
68 const HWInstructionIssuedEvent
&Event
) {
69 unsigned IID
= Event
.IR
.getSourceIndex();
70 using ResourceRef
= HWInstructionIssuedEvent::ResourceRef
;
71 using ResourceUse
= std::pair
<ResourceRef
, ResourceCycles
>;
72 for (const ResourceUse
&Use
: Event
.UsedResources
) {
73 const ResourceRef
&RR
= Use
.first
;
74 unsigned Index
= ProcResID2ResourceUsersIndex
[RR
.first
];
75 Index
+= countTrailingZeros(RR
.second
);
76 ResourceUsers
[Index
] = std::make_pair(IID
, Use
.second
.getNumerator());
80 void PressureTracker::updateResourcePressureDistribution(
81 uint64_t CumulativeMask
) {
82 while (CumulativeMask
) {
83 uint64_t Current
= CumulativeMask
& (-CumulativeMask
);
84 unsigned ResIdx
= getResourceStateIndex(Current
);
85 unsigned ProcResID
= ResIdx2ProcResID
[ResIdx
];
86 uint64_t Mask
= ProcResID2Mask
[ProcResID
];
88 if (Mask
== Current
) {
89 ResourcePressureDistribution
[ProcResID
]++;
90 CumulativeMask
^= Current
;
96 uint64_t SubUnit
= Mask
& (-Mask
);
97 ResIdx
= getResourceStateIndex(SubUnit
);
98 ProcResID
= ResIdx2ProcResID
[ResIdx
];
99 ResourcePressureDistribution
[ProcResID
]++;
103 CumulativeMask
^= Current
;
107 void PressureTracker::handlePressureEvent(const HWPressureEvent
&Event
) {
108 assert(Event
.Reason
!= HWPressureEvent::INVALID
&&
109 "Unexpected invalid event!");
111 switch (Event
.Reason
) {
115 case HWPressureEvent::RESOURCES
: {
116 const uint64_t ResourceMask
= Event
.ResourceMask
;
117 updateResourcePressureDistribution(Event
.ResourceMask
);
119 for (const InstRef
&IR
: Event
.AffectedInstructions
) {
120 const Instruction
&IS
= *IR
.getInstruction();
121 unsigned BusyResources
= IS
.getCriticalResourceMask() & ResourceMask
;
125 unsigned IID
= IR
.getSourceIndex();
126 IPI
[IID
].ResourcePressureCycles
++;
131 case HWPressureEvent::REGISTER_DEPS
:
132 for (const InstRef
&IR
: Event
.AffectedInstructions
) {
133 unsigned IID
= IR
.getSourceIndex();
134 IPI
[IID
].RegisterPressureCycles
++;
138 case HWPressureEvent::MEMORY_DEPS
:
139 for (const InstRef
&IR
: Event
.AffectedInstructions
) {
140 unsigned IID
= IR
.getSourceIndex();
141 IPI
[IID
].MemoryPressureCycles
++;
147 void DependencyGraph::dumpDependencyEdge(raw_ostream
&OS
,
148 const DependencyEdge
&DepEdge
,
149 MCInstPrinter
&MCIP
) const {
150 unsigned FromIID
= DepEdge
.FromIID
;
151 unsigned ToIID
= DepEdge
.ToIID
;
152 assert(FromIID
< ToIID
&& "Graph should be acyclic!");
154 const DependencyEdge::Dependency
&DE
= DepEdge
.Dep
;
155 assert(DE
.Type
!= DependencyEdge::DT_INVALID
&& "Unexpected invalid edge!");
157 OS
<< " FROM: " << FromIID
<< " TO: " << ToIID
<< " ";
158 if (DE
.Type
== DependencyEdge::DT_REGISTER
) {
159 OS
<< " - REGISTER: ";
160 MCIP
.printRegName(OS
, DE
.ResourceOrRegID
);
161 } else if (DE
.Type
== DependencyEdge::DT_MEMORY
) {
164 assert(DE
.Type
== DependencyEdge::DT_RESOURCE
&&
165 "Unsupported dependency type!");
166 OS
<< " - RESOURCE MASK: " << DE
.ResourceOrRegID
;
168 OS
<< " - COST: " << DE
.Cost
<< '\n';
172 void DependencyGraph::pruneEdges(unsigned Iterations
) {
173 for (DGNode
&N
: Nodes
) {
174 unsigned NumPruned
= 0;
175 const unsigned Size
= N
.OutgoingEdges
.size();
176 // Use a cut-off threshold to prune edges with a low frequency.
177 for (unsigned I
= 0, E
= Size
; I
< E
; ++I
) {
178 DependencyEdge
&Edge
= N
.OutgoingEdges
[I
];
179 if (Edge
.Frequency
== Iterations
)
181 double Factor
= (double)Edge
.Frequency
/ Iterations
;
184 Nodes
[Edge
.ToIID
].NumPredecessors
--;
185 std::swap(Edge
, N
.OutgoingEdges
[E
- 1]);
191 N
.OutgoingEdges
.resize(Size
- NumPruned
);
195 void DependencyGraph::initializeRootSet(
196 SmallVectorImpl
<unsigned> &RootSet
) const {
197 for (unsigned I
= 0, E
= Nodes
.size(); I
< E
; ++I
) {
198 const DGNode
&N
= Nodes
[I
];
199 if (N
.NumPredecessors
== 0 && !N
.OutgoingEdges
.empty())
200 RootSet
.emplace_back(I
);
204 void DependencyGraph::propagateThroughEdges(
205 SmallVectorImpl
<unsigned> &RootSet
, unsigned Iterations
) {
206 SmallVector
<unsigned, 8> ToVisit
;
208 // A critical sequence is computed as the longest path from a node of the
209 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
210 // composed of nodes with at least one successor, and no predecessors.
212 // Each node of the graph starts with an initial default cost of zero. The
213 // cost of a node is a measure of criticality: the higher the cost, the bigger
214 // is the performance impact.
215 // For register and memory dependencies, the cost is a function of the write
216 // latency as well as the actual delay (in cycles) caused to users.
217 // For processor resource dependencies, the cost is a function of the resource
218 // pressure. Resource interferences with low frequency values are ignored.
220 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
221 // the inner loop selects (i.e. visits) a node N from a set of `unvisited
222 // nodes`, and then propagates the cost of N to all its neighbors.
224 // The `unvisited nodes` set initially contains all the nodes from the
225 // RootSet. A node N is added to the `unvisited nodes` if all its
226 // predecessors have been visited already.
228 // For simplicity, every node tracks the number of unvisited incoming edges in
229 // field `NumVisitedPredecessors`. When the value of that field drops to
230 // zero, then the corresponding node is added to a `ToVisit` set.
232 // At the end of every iteration of the outer loop, set `ToVisit` becomes our
233 // new `unvisited nodes` set.
235 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
236 // is empty. This algorithm works under the assumption that the graph is
239 for (unsigned IID
: RootSet
) {
240 const DGNode
&N
= Nodes
[IID
];
241 for (const DependencyEdge
&DepEdge
: N
.OutgoingEdges
) {
242 unsigned ToIID
= DepEdge
.ToIID
;
243 DGNode
&To
= Nodes
[ToIID
];
244 uint64_t Cost
= N
.Cost
+ DepEdge
.Dep
.Cost
;
245 // Check if this is the most expensive incoming edge seen so far. In
246 // case, update the total cost of the destination node (ToIID), as well
247 // its field `CriticalPredecessor`.
248 if (Cost
> To
.Cost
) {
249 To
.CriticalPredecessor
= DepEdge
;
251 To
.Depth
= N
.Depth
+ 1;
253 To
.NumVisitedPredecessors
++;
254 if (To
.NumVisitedPredecessors
== To
.NumPredecessors
)
255 ToVisit
.emplace_back(ToIID
);
259 std::swap(RootSet
, ToVisit
);
261 } while (!RootSet
.empty());
264 void DependencyGraph::getCriticalSequence(
265 SmallVectorImpl
<const DependencyEdge
*> &Seq
) const {
266 // At this stage, nodes of the graph have been already visited, and costs have
267 // been propagated through the edges (see method `propagateThroughEdges()`).
269 // Identify the node N with the highest cost in the graph. By construction,
270 // that node is the last instruction of our critical sequence.
271 // Field N.Depth would tell us the total length of the sequence.
273 // To obtain the sequence of critical edges, we simply follow the chain of critical
274 // predecessors starting from node N (field DGNode::CriticalPredecessor).
275 const auto It
= std::max_element(
276 Nodes
.begin(), Nodes
.end(),
277 [](const DGNode
&Lhs
, const DGNode
&Rhs
) { return Lhs
.Cost
< Rhs
.Cost
; });
278 unsigned IID
= std::distance(Nodes
.begin(), It
);
279 Seq
.resize(Nodes
[IID
].Depth
);
280 for (unsigned I
= Seq
.size(), E
= 0; I
> E
; --I
) {
281 const DGNode
&N
= Nodes
[IID
];
282 Seq
[I
- 1] = &N
.CriticalPredecessor
;
283 IID
= N
.CriticalPredecessor
.FromIID
;
287 static void printInstruction(formatted_raw_ostream
&FOS
,
288 const MCSubtargetInfo
&STI
, MCInstPrinter
&MCIP
,
290 bool UseDifferentColor
= false) {
291 std::string Instruction
;
292 raw_string_ostream
InstrStream(Instruction
);
296 MCIP
.printInst(&MCI
, 0, "", STI
, InstrStream
);
299 if (UseDifferentColor
)
300 FOS
.changeColor(raw_ostream::CYAN
, true, false);
301 FOS
<< StringRef(Instruction
).ltrim();
302 if (UseDifferentColor
)
306 void BottleneckAnalysis::printCriticalSequence(raw_ostream
&OS
) const {
307 // Early exit if no bottlenecks were found during the simulation.
308 if (!SeenStallCycles
|| !BPI
.PressureIncreaseCycles
)
311 SmallVector
<const DependencyEdge
*, 16> Seq
;
312 DG
.getCriticalSequence(Seq
);
316 OS
<< "\nCritical sequence based on the simulation:\n\n";
318 const DependencyEdge
&FirstEdge
= *Seq
[0];
319 unsigned FromIID
= FirstEdge
.FromIID
% Source
.size();
320 unsigned ToIID
= FirstEdge
.ToIID
% Source
.size();
321 bool IsLoopCarried
= FromIID
>= ToIID
;
323 formatted_raw_ostream
FOS(OS
);
325 FOS
<< "Instruction";
327 FOS
<< "Dependency Information";
329 bool HasColors
= FOS
.has_colors();
331 unsigned CurrentIID
= 0;
333 FOS
<< "\n +----< " << FromIID
<< ".";
334 printInstruction(FOS
, STI
, MCIP
, Source
[FromIID
], HasColors
);
335 FOS
<< "\n |\n | < loop carried > \n |";
337 while (CurrentIID
< FromIID
) {
338 FOS
<< "\n " << CurrentIID
<< ".";
339 printInstruction(FOS
, STI
, MCIP
, Source
[CurrentIID
]);
343 FOS
<< "\n +----< " << CurrentIID
<< ".";
344 printInstruction(FOS
, STI
, MCIP
, Source
[CurrentIID
], HasColors
);
348 for (const DependencyEdge
*&DE
: Seq
) {
349 ToIID
= DE
->ToIID
% Source
.size();
350 unsigned LastIID
= CurrentIID
> ToIID
? Source
.size() : ToIID
;
352 while (CurrentIID
< LastIID
) {
353 FOS
<< "\n | " << CurrentIID
<< ".";
354 printInstruction(FOS
, STI
, MCIP
, Source
[CurrentIID
]);
358 if (CurrentIID
== ToIID
) {
359 FOS
<< "\n +----> " << ToIID
<< ".";
360 printInstruction(FOS
, STI
, MCIP
, Source
[CurrentIID
], HasColors
);
362 FOS
<< "\n |\n | < loop carried > \n |"
363 << "\n +----> " << ToIID
<< ".";
364 printInstruction(FOS
, STI
, MCIP
, Source
[ToIID
], HasColors
);
368 const DependencyEdge::Dependency
&Dep
= DE
->Dep
;
370 FOS
.changeColor(raw_ostream::SAVEDCOLOR
, true, false);
372 if (Dep
.Type
== DependencyEdge::DT_REGISTER
) {
373 FOS
<< "## REGISTER dependency: ";
375 FOS
.changeColor(raw_ostream::MAGENTA
, true, false);
376 MCIP
.printRegName(FOS
, Dep
.ResourceOrRegID
);
377 } else if (Dep
.Type
== DependencyEdge::DT_MEMORY
) {
378 FOS
<< "## MEMORY dependency.";
380 assert(Dep
.Type
== DependencyEdge::DT_RESOURCE
&&
381 "Unsupported dependency type!");
382 FOS
<< "## RESOURCE interference: ";
384 FOS
.changeColor(raw_ostream::MAGENTA
, true, false);
385 FOS
<< Tracker
.resolveResourceName(Dep
.ResourceOrRegID
);
388 FOS
.changeColor(raw_ostream::SAVEDCOLOR
, true, false);
390 FOS
<< " [ probability: " << ((DE
->Frequency
* 100) / Iterations
)
398 while (CurrentIID
< Source
.size()) {
399 FOS
<< "\n " << CurrentIID
<< ".";
400 printInstruction(FOS
, STI
, MCIP
, Source
[CurrentIID
]);
409 void DependencyGraph::dump(raw_ostream
&OS
, MCInstPrinter
&MCIP
) const {
410 OS
<< "\nREG DEPS\n";
411 for (const DGNode
&Node
: Nodes
)
412 for (const DependencyEdge
&DE
: Node
.OutgoingEdges
)
413 if (DE
.Dep
.Type
== DependencyEdge::DT_REGISTER
)
414 dumpDependencyEdge(OS
, DE
, MCIP
);
416 OS
<< "\nMEM DEPS\n";
417 for (const DGNode
&Node
: Nodes
)
418 for (const DependencyEdge
&DE
: Node
.OutgoingEdges
)
419 if (DE
.Dep
.Type
== DependencyEdge::DT_MEMORY
)
420 dumpDependencyEdge(OS
, DE
, MCIP
);
422 OS
<< "\nRESOURCE DEPS\n";
423 for (const DGNode
&Node
: Nodes
)
424 for (const DependencyEdge
&DE
: Node
.OutgoingEdges
)
425 if (DE
.Dep
.Type
== DependencyEdge::DT_RESOURCE
)
426 dumpDependencyEdge(OS
, DE
, MCIP
);
430 void DependencyGraph::addDependency(unsigned From
, unsigned To
,
431 DependencyEdge::Dependency
&&Dep
) {
432 DGNode
&NodeFrom
= Nodes
[From
];
433 DGNode
&NodeTo
= Nodes
[To
];
434 SmallVectorImpl
<DependencyEdge
> &Vec
= NodeFrom
.OutgoingEdges
;
436 auto It
= find_if(Vec
, [To
, Dep
](DependencyEdge
&DE
) {
437 return DE
.ToIID
== To
&& DE
.Dep
.ResourceOrRegID
== Dep
.ResourceOrRegID
;
440 if (It
!= Vec
.end()) {
441 It
->Dep
.Cost
+= Dep
.Cost
;
446 DependencyEdge DE
= {Dep
, From
, To
, 1};
447 Vec
.emplace_back(DE
);
448 NodeTo
.NumPredecessors
++;
451 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo
&sti
,
452 MCInstPrinter
&Printer
,
453 ArrayRef
<MCInst
> S
, unsigned NumIter
)
454 : STI(sti
), MCIP(Printer
), Tracker(STI
.getSchedModel()), DG(S
.size() * 3),
455 Source(S
), Iterations(NumIter
), TotalCycles(0),
456 PressureIncreasedBecauseOfResources(false),
457 PressureIncreasedBecauseOfRegisterDependencies(false),
458 PressureIncreasedBecauseOfMemoryDependencies(false),
459 SeenStallCycles(false), BPI() {}
461 void BottleneckAnalysis::addRegisterDep(unsigned From
, unsigned To
,
462 unsigned RegID
, unsigned Cost
) {
463 bool IsLoopCarried
= From
>= To
;
464 unsigned SourceSize
= Source
.size();
466 DG
.addRegisterDep(From
, To
+ SourceSize
, RegID
, Cost
);
467 DG
.addRegisterDep(From
+ SourceSize
, To
+ (SourceSize
* 2), RegID
, Cost
);
470 DG
.addRegisterDep(From
+ SourceSize
, To
+ SourceSize
, RegID
, Cost
);
473 void BottleneckAnalysis::addMemoryDep(unsigned From
, unsigned To
,
475 bool IsLoopCarried
= From
>= To
;
476 unsigned SourceSize
= Source
.size();
478 DG
.addMemoryDep(From
, To
+ SourceSize
, Cost
);
479 DG
.addMemoryDep(From
+ SourceSize
, To
+ (SourceSize
* 2), Cost
);
482 DG
.addMemoryDep(From
+ SourceSize
, To
+ SourceSize
, Cost
);
485 void BottleneckAnalysis::addResourceDep(unsigned From
, unsigned To
,
486 uint64_t Mask
, unsigned Cost
) {
487 bool IsLoopCarried
= From
>= To
;
488 unsigned SourceSize
= Source
.size();
490 DG
.addResourceDep(From
, To
+ SourceSize
, Mask
, Cost
);
491 DG
.addResourceDep(From
+ SourceSize
, To
+ (SourceSize
* 2), Mask
, Cost
);
494 DG
.addResourceDep(From
+ SourceSize
, To
+ SourceSize
, Mask
, Cost
);
497 void BottleneckAnalysis::onEvent(const HWInstructionEvent
&Event
) {
498 const unsigned IID
= Event
.IR
.getSourceIndex();
499 if (Event
.Type
== HWInstructionEvent::Dispatched
) {
500 Tracker
.onInstructionDispatched(IID
);
503 if (Event
.Type
== HWInstructionEvent::Executed
) {
504 Tracker
.onInstructionExecuted(IID
);
508 if (Event
.Type
!= HWInstructionEvent::Issued
)
511 const Instruction
&IS
= *Event
.IR
.getInstruction();
512 unsigned To
= IID
% Source
.size();
514 unsigned Cycles
= 2 * Tracker
.getResourcePressureCycles(IID
);
515 uint64_t ResourceMask
= IS
.getCriticalResourceMask();
516 SmallVector
<std::pair
<unsigned, unsigned>, 4> Users
;
517 while (ResourceMask
) {
518 uint64_t Current
= ResourceMask
& (-ResourceMask
);
519 Tracker
.getResourceUsers(Current
, Users
);
520 for (const std::pair
<unsigned, unsigned> &U
: Users
)
521 addResourceDep(U
.first
% Source
.size(), To
, Current
, U
.second
+ Cycles
);
523 ResourceMask
^= Current
;
526 const CriticalDependency
&RegDep
= IS
.getCriticalRegDep();
528 Cycles
= RegDep
.Cycles
+ 2 * Tracker
.getRegisterPressureCycles(IID
);
529 unsigned From
= RegDep
.IID
% Source
.size();
530 addRegisterDep(From
, To
, RegDep
.RegID
, Cycles
);
533 const CriticalDependency
&MemDep
= IS
.getCriticalMemDep();
535 Cycles
= MemDep
.Cycles
+ 2 * Tracker
.getMemoryPressureCycles(IID
);
536 unsigned From
= MemDep
.IID
% Source
.size();
537 addMemoryDep(From
, To
, Cycles
);
540 Tracker
.handleInstructionIssuedEvent(
541 static_cast<const HWInstructionIssuedEvent
&>(Event
));
543 // Check if this is the last simulated instruction.
544 if (IID
== ((Iterations
* Source
.size()) - 1))
545 DG
.finalizeGraph(Iterations
);
548 void BottleneckAnalysis::onEvent(const HWPressureEvent
&Event
) {
549 assert(Event
.Reason
!= HWPressureEvent::INVALID
&&
550 "Unexpected invalid event!");
552 Tracker
.handlePressureEvent(Event
);
554 switch (Event
.Reason
) {
558 case HWPressureEvent::RESOURCES
:
559 PressureIncreasedBecauseOfResources
= true;
561 case HWPressureEvent::REGISTER_DEPS
:
562 PressureIncreasedBecauseOfRegisterDependencies
= true;
564 case HWPressureEvent::MEMORY_DEPS
:
565 PressureIncreasedBecauseOfMemoryDependencies
= true;
570 void BottleneckAnalysis::onCycleEnd() {
573 bool PressureIncreasedBecauseOfDataDependencies
=
574 PressureIncreasedBecauseOfRegisterDependencies
||
575 PressureIncreasedBecauseOfMemoryDependencies
;
576 if (!PressureIncreasedBecauseOfResources
&&
577 !PressureIncreasedBecauseOfDataDependencies
)
580 ++BPI
.PressureIncreaseCycles
;
581 if (PressureIncreasedBecauseOfRegisterDependencies
)
582 ++BPI
.RegisterDependencyCycles
;
583 if (PressureIncreasedBecauseOfMemoryDependencies
)
584 ++BPI
.MemoryDependencyCycles
;
585 if (PressureIncreasedBecauseOfDataDependencies
)
586 ++BPI
.DataDependencyCycles
;
587 if (PressureIncreasedBecauseOfResources
)
588 ++BPI
.ResourcePressureCycles
;
589 PressureIncreasedBecauseOfResources
= false;
590 PressureIncreasedBecauseOfRegisterDependencies
= false;
591 PressureIncreasedBecauseOfMemoryDependencies
= false;
594 void BottleneckAnalysis::printBottleneckHints(raw_ostream
&OS
) const {
595 if (!SeenStallCycles
|| !BPI
.PressureIncreaseCycles
) {
596 OS
<< "\n\nNo resource or data dependency bottlenecks discovered.\n";
600 double PressurePerCycle
=
601 (double)BPI
.PressureIncreaseCycles
* 100 / TotalCycles
;
602 double ResourcePressurePerCycle
=
603 (double)BPI
.ResourcePressureCycles
* 100 / TotalCycles
;
604 double DDPerCycle
= (double)BPI
.DataDependencyCycles
* 100 / TotalCycles
;
605 double RegDepPressurePerCycle
=
606 (double)BPI
.RegisterDependencyCycles
* 100 / TotalCycles
;
607 double MemDepPressurePerCycle
=
608 (double)BPI
.MemoryDependencyCycles
* 100 / TotalCycles
;
610 OS
<< "\n\nCycles with backend pressure increase [ "
611 << format("%.2f", floor((PressurePerCycle
* 100) + 0.5) / 100) << "% ]";
613 OS
<< "\nThroughput Bottlenecks: "
614 << "\n Resource Pressure [ "
615 << format("%.2f", floor((ResourcePressurePerCycle
* 100) + 0.5) / 100)
618 if (BPI
.PressureIncreaseCycles
) {
619 ArrayRef
<unsigned> Distribution
= Tracker
.getResourcePressureDistribution();
620 const MCSchedModel
&SM
= STI
.getSchedModel();
621 for (unsigned I
= 0, E
= Distribution
.size(); I
< E
; ++I
) {
622 unsigned ResourceCycles
= Distribution
[I
];
623 if (ResourceCycles
) {
624 double Frequency
= (double)ResourceCycles
* 100 / TotalCycles
;
625 const MCProcResourceDesc
&PRDesc
= *SM
.getProcResource(I
);
626 OS
<< "\n - " << PRDesc
.Name
<< " [ "
627 << format("%.2f", floor((Frequency
* 100) + 0.5) / 100) << "% ]";
632 OS
<< "\n Data Dependencies: [ "
633 << format("%.2f", floor((DDPerCycle
* 100) + 0.5) / 100) << "% ]";
634 OS
<< "\n - Register Dependencies [ "
635 << format("%.2f", floor((RegDepPressurePerCycle
* 100) + 0.5) / 100)
637 OS
<< "\n - Memory Dependencies [ "
638 << format("%.2f", floor((MemDepPressurePerCycle
* 100) + 0.5) / 100)
642 void BottleneckAnalysis::printView(raw_ostream
&OS
) const {
644 raw_string_ostream
TempStream(Buffer
);
645 printBottleneckHints(TempStream
);
648 printCriticalSequence(OS
);