1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements the functionalities used by the BottleneckAnalysis
11 /// to report bottleneck info.
13 //===----------------------------------------------------------------------===//
15 #include "Views/BottleneckAnalysis.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MCA/Support.h"
18 #include "llvm/Support/Format.h"
23 #define DEBUG_TYPE "llvm-mca"
25 PressureTracker::PressureTracker(const MCSchedModel
&Model
)
27 ResourcePressureDistribution(Model
.getNumProcResourceKinds(), 0),
28 ProcResID2Mask(Model
.getNumProcResourceKinds(), 0),
29 ResIdx2ProcResID(Model
.getNumProcResourceKinds(), 0),
30 ProcResID2ResourceUsersIndex(Model
.getNumProcResourceKinds(), 0) {
31 computeProcResourceMasks(SM
, ProcResID2Mask
);
33 // Ignore the invalid resource at index zero.
34 unsigned NextResourceUsersIdx
= 0;
35 for (unsigned I
= 1, E
= Model
.getNumProcResourceKinds(); I
< E
; ++I
) {
36 const MCProcResourceDesc
&ProcResource
= *SM
.getProcResource(I
);
37 ProcResID2ResourceUsersIndex
[I
] = NextResourceUsersIdx
;
38 NextResourceUsersIdx
+= ProcResource
.NumUnits
;
39 uint64_t ResourceMask
= ProcResID2Mask
[I
];
40 ResIdx2ProcResID
[getResourceStateIndex(ResourceMask
)] = I
;
43 ResourceUsers
.resize(NextResourceUsersIdx
);
44 std::fill(ResourceUsers
.begin(), ResourceUsers
.end(),
45 std::make_pair
<unsigned, unsigned>(~0U, 0U));
48 void PressureTracker::getResourceUsers(uint64_t ResourceMask
,
49 SmallVectorImpl
<User
> &Users
) const {
50 unsigned Index
= getResourceStateIndex(ResourceMask
);
51 unsigned ProcResID
= ResIdx2ProcResID
[Index
];
52 const MCProcResourceDesc
&PRDesc
= *SM
.getProcResource(ProcResID
);
53 for (unsigned I
= 0, E
= PRDesc
.NumUnits
; I
< E
; ++I
) {
54 const User U
= getResourceUser(ProcResID
, I
);
55 if (U
.second
&& IPI
.contains(U
.first
))
56 Users
.emplace_back(U
);
60 void PressureTracker::onInstructionDispatched(unsigned IID
) {
61 IPI
.insert(std::make_pair(IID
, InstructionPressureInfo()));
64 void PressureTracker::onInstructionExecuted(unsigned IID
) { IPI
.erase(IID
); }
66 void PressureTracker::handleInstructionIssuedEvent(
67 const HWInstructionIssuedEvent
&Event
) {
68 unsigned IID
= Event
.IR
.getSourceIndex();
69 for (const ResourceUse
&Use
: Event
.UsedResources
) {
70 const ResourceRef
&RR
= Use
.first
;
71 unsigned Index
= ProcResID2ResourceUsersIndex
[RR
.first
];
72 Index
+= llvm::countr_zero(RR
.second
);
73 ResourceUsers
[Index
] = std::make_pair(IID
, Use
.second
.getNumerator());
77 void PressureTracker::updateResourcePressureDistribution(
78 uint64_t CumulativeMask
) {
79 while (CumulativeMask
) {
80 uint64_t Current
= CumulativeMask
& (-CumulativeMask
);
81 unsigned ResIdx
= getResourceStateIndex(Current
);
82 unsigned ProcResID
= ResIdx2ProcResID
[ResIdx
];
83 uint64_t Mask
= ProcResID2Mask
[ProcResID
];
85 if (Mask
== Current
) {
86 ResourcePressureDistribution
[ProcResID
]++;
87 CumulativeMask
^= Current
;
93 uint64_t SubUnit
= Mask
& (-Mask
);
94 ResIdx
= getResourceStateIndex(SubUnit
);
95 ProcResID
= ResIdx2ProcResID
[ResIdx
];
96 ResourcePressureDistribution
[ProcResID
]++;
100 CumulativeMask
^= Current
;
104 void PressureTracker::handlePressureEvent(const HWPressureEvent
&Event
) {
105 assert(Event
.Reason
!= HWPressureEvent::INVALID
&&
106 "Unexpected invalid event!");
108 switch (Event
.Reason
) {
112 case HWPressureEvent::RESOURCES
: {
113 const uint64_t ResourceMask
= Event
.ResourceMask
;
114 updateResourcePressureDistribution(Event
.ResourceMask
);
116 for (const InstRef
&IR
: Event
.AffectedInstructions
) {
117 const Instruction
&IS
= *IR
.getInstruction();
118 unsigned BusyResources
= IS
.getCriticalResourceMask() & ResourceMask
;
122 unsigned IID
= IR
.getSourceIndex();
123 IPI
[IID
].ResourcePressureCycles
++;
128 case HWPressureEvent::REGISTER_DEPS
:
129 for (const InstRef
&IR
: Event
.AffectedInstructions
) {
130 unsigned IID
= IR
.getSourceIndex();
131 IPI
[IID
].RegisterPressureCycles
++;
135 case HWPressureEvent::MEMORY_DEPS
:
136 for (const InstRef
&IR
: Event
.AffectedInstructions
) {
137 unsigned IID
= IR
.getSourceIndex();
138 IPI
[IID
].MemoryPressureCycles
++;
144 void DependencyGraph::dumpDependencyEdge(raw_ostream
&OS
,
145 const DependencyEdge
&DepEdge
,
146 MCInstPrinter
&MCIP
) const {
147 unsigned FromIID
= DepEdge
.FromIID
;
148 unsigned ToIID
= DepEdge
.ToIID
;
149 assert(FromIID
< ToIID
&& "Graph should be acyclic!");
151 const DependencyEdge::Dependency
&DE
= DepEdge
.Dep
;
152 assert(DE
.Type
!= DependencyEdge::DT_INVALID
&& "Unexpected invalid edge!");
154 OS
<< " FROM: " << FromIID
<< " TO: " << ToIID
<< " ";
155 if (DE
.Type
== DependencyEdge::DT_REGISTER
) {
156 OS
<< " - REGISTER: ";
157 MCIP
.printRegName(OS
, DE
.ResourceOrRegID
);
158 } else if (DE
.Type
== DependencyEdge::DT_MEMORY
) {
161 assert(DE
.Type
== DependencyEdge::DT_RESOURCE
&&
162 "Unsupported dependency type!");
163 OS
<< " - RESOURCE MASK: " << DE
.ResourceOrRegID
;
165 OS
<< " - COST: " << DE
.Cost
<< '\n';
169 void DependencyGraph::pruneEdges(unsigned Iterations
) {
170 for (DGNode
&N
: Nodes
) {
171 unsigned NumPruned
= 0;
172 const unsigned Size
= N
.OutgoingEdges
.size();
173 // Use a cut-off threshold to prune edges with a low frequency.
174 for (unsigned I
= 0, E
= Size
; I
< E
; ++I
) {
175 DependencyEdge
&Edge
= N
.OutgoingEdges
[I
];
176 if (Edge
.Frequency
== Iterations
)
178 double Factor
= (double)Edge
.Frequency
/ Iterations
;
181 Nodes
[Edge
.ToIID
].NumPredecessors
--;
182 std::swap(Edge
, N
.OutgoingEdges
[E
- 1]);
188 N
.OutgoingEdges
.resize(Size
- NumPruned
);
192 void DependencyGraph::initializeRootSet(
193 SmallVectorImpl
<unsigned> &RootSet
) const {
194 for (unsigned I
= 0, E
= Nodes
.size(); I
< E
; ++I
) {
195 const DGNode
&N
= Nodes
[I
];
196 if (N
.NumPredecessors
== 0 && !N
.OutgoingEdges
.empty())
197 RootSet
.emplace_back(I
);
201 void DependencyGraph::propagateThroughEdges(SmallVectorImpl
<unsigned> &RootSet
,
202 unsigned Iterations
) {
203 SmallVector
<unsigned, 8> ToVisit
;
205 // A critical sequence is computed as the longest path from a node of the
206 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
207 // composed of nodes with at least one successor, and no predecessors.
209 // Each node of the graph starts with an initial default cost of zero. The
210 // cost of a node is a measure of criticality: the higher the cost, the bigger
211 // is the performance impact.
212 // For register and memory dependencies, the cost is a function of the write
213 // latency as well as the actual delay (in cycles) caused to users.
214 // For processor resource dependencies, the cost is a function of the resource
215 // pressure. Resource interferences with low frequency values are ignored.
217 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
218 // the inner loop selects (i.e. visits) a node N from a set of `unvisited
219 // nodes`, and then propagates the cost of N to all its neighbors.
221 // The `unvisited nodes` set initially contains all the nodes from the
222 // RootSet. A node N is added to the `unvisited nodes` if all its
223 // predecessors have been visited already.
225 // For simplicity, every node tracks the number of unvisited incoming edges in
226 // field `NumVisitedPredecessors`. When the value of that field drops to
227 // zero, then the corresponding node is added to a `ToVisit` set.
229 // At the end of every iteration of the outer loop, set `ToVisit` becomes our
230 // new `unvisited nodes` set.
232 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
233 // is empty. This algorithm works under the assumption that the graph is
236 for (unsigned IID
: RootSet
) {
237 const DGNode
&N
= Nodes
[IID
];
238 for (const DependencyEdge
&DepEdge
: N
.OutgoingEdges
) {
239 unsigned ToIID
= DepEdge
.ToIID
;
240 DGNode
&To
= Nodes
[ToIID
];
241 uint64_t Cost
= N
.Cost
+ DepEdge
.Dep
.Cost
;
242 // Check if this is the most expensive incoming edge seen so far. In
243 // case, update the total cost of the destination node (ToIID), as well
244 // its field `CriticalPredecessor`.
245 if (Cost
> To
.Cost
) {
246 To
.CriticalPredecessor
= DepEdge
;
248 To
.Depth
= N
.Depth
+ 1;
250 To
.NumVisitedPredecessors
++;
251 if (To
.NumVisitedPredecessors
== To
.NumPredecessors
)
252 ToVisit
.emplace_back(ToIID
);
256 std::swap(RootSet
, ToVisit
);
258 } while (!RootSet
.empty());
261 void DependencyGraph::getCriticalSequence(
262 SmallVectorImpl
<const DependencyEdge
*> &Seq
) const {
263 // At this stage, nodes of the graph have been already visited, and costs have
264 // been propagated through the edges (see method `propagateThroughEdges()`).
266 // Identify the node N with the highest cost in the graph. By construction,
267 // that node is the last instruction of our critical sequence.
268 // Field N.Depth would tell us the total length of the sequence.
270 // To obtain the sequence of critical edges, we simply follow the chain of
271 // critical predecessors starting from node N (field
272 // DGNode::CriticalPredecessor).
274 llvm::max_element(Nodes
, [](const DGNode
&Lhs
, const DGNode
&Rhs
) {
275 return Lhs
.Cost
< Rhs
.Cost
;
277 unsigned IID
= std::distance(Nodes
.begin(), It
);
278 Seq
.resize(Nodes
[IID
].Depth
);
279 for (const DependencyEdge
*&DE
: llvm::reverse(Seq
)) {
280 const DGNode
&N
= Nodes
[IID
];
281 DE
= &N
.CriticalPredecessor
;
282 IID
= N
.CriticalPredecessor
.FromIID
;
286 void BottleneckAnalysis::printInstruction(formatted_raw_ostream
&FOS
,
288 bool UseDifferentColor
) const {
290 if (UseDifferentColor
)
291 FOS
.changeColor(raw_ostream::CYAN
, true, false);
292 FOS
<< printInstructionString(MCI
);
293 if (UseDifferentColor
)
297 void BottleneckAnalysis::printCriticalSequence(raw_ostream
&OS
) const {
298 // Early exit if no bottlenecks were found during the simulation.
299 if (!SeenStallCycles
|| !BPI
.PressureIncreaseCycles
)
302 SmallVector
<const DependencyEdge
*, 16> Seq
;
303 DG
.getCriticalSequence(Seq
);
307 OS
<< "\nCritical sequence based on the simulation:\n\n";
309 const DependencyEdge
&FirstEdge
= *Seq
[0];
310 ArrayRef
<llvm::MCInst
> Source
= getSource();
311 unsigned FromIID
= FirstEdge
.FromIID
% Source
.size();
312 unsigned ToIID
= FirstEdge
.ToIID
% Source
.size();
313 bool IsLoopCarried
= FromIID
>= ToIID
;
315 formatted_raw_ostream
FOS(OS
);
317 FOS
<< "Instruction";
319 FOS
<< "Dependency Information";
321 bool HasColors
= FOS
.has_colors();
323 unsigned CurrentIID
= 0;
325 FOS
<< "\n +----< " << FromIID
<< ".";
326 printInstruction(FOS
, Source
[FromIID
], HasColors
);
327 FOS
<< "\n |\n | < loop carried > \n |";
329 while (CurrentIID
< FromIID
) {
330 FOS
<< "\n " << CurrentIID
<< ".";
331 printInstruction(FOS
, Source
[CurrentIID
]);
335 FOS
<< "\n +----< " << CurrentIID
<< ".";
336 printInstruction(FOS
, Source
[CurrentIID
], HasColors
);
340 for (const DependencyEdge
*&DE
: Seq
) {
341 ToIID
= DE
->ToIID
% Source
.size();
342 unsigned LastIID
= CurrentIID
> ToIID
? Source
.size() : ToIID
;
344 while (CurrentIID
< LastIID
) {
345 FOS
<< "\n | " << CurrentIID
<< ".";
346 printInstruction(FOS
, Source
[CurrentIID
]);
350 if (CurrentIID
== ToIID
) {
351 FOS
<< "\n +----> " << ToIID
<< ".";
352 printInstruction(FOS
, Source
[CurrentIID
], HasColors
);
354 FOS
<< "\n |\n | < loop carried > \n |"
355 << "\n +----> " << ToIID
<< ".";
356 printInstruction(FOS
, Source
[ToIID
], HasColors
);
360 const DependencyEdge::Dependency
&Dep
= DE
->Dep
;
362 FOS
.changeColor(raw_ostream::SAVEDCOLOR
, true, false);
364 if (Dep
.Type
== DependencyEdge::DT_REGISTER
) {
365 FOS
<< "## REGISTER dependency: ";
367 FOS
.changeColor(raw_ostream::MAGENTA
, true, false);
368 getInstPrinter().printRegName(FOS
, Dep
.ResourceOrRegID
);
369 } else if (Dep
.Type
== DependencyEdge::DT_MEMORY
) {
370 FOS
<< "## MEMORY dependency.";
372 assert(Dep
.Type
== DependencyEdge::DT_RESOURCE
&&
373 "Unsupported dependency type!");
374 FOS
<< "## RESOURCE interference: ";
376 FOS
.changeColor(raw_ostream::MAGENTA
, true, false);
377 FOS
<< Tracker
.resolveResourceName(Dep
.ResourceOrRegID
);
380 FOS
.changeColor(raw_ostream::SAVEDCOLOR
, true, false);
382 FOS
<< " [ probability: " << ((DE
->Frequency
* 100) / Iterations
)
390 while (CurrentIID
< Source
.size()) {
391 FOS
<< "\n " << CurrentIID
<< ".";
392 printInstruction(FOS
, Source
[CurrentIID
]);
401 void DependencyGraph::dump(raw_ostream
&OS
, MCInstPrinter
&MCIP
) const {
402 OS
<< "\nREG DEPS\n";
403 for (const DGNode
&Node
: Nodes
)
404 for (const DependencyEdge
&DE
: Node
.OutgoingEdges
)
405 if (DE
.Dep
.Type
== DependencyEdge::DT_REGISTER
)
406 dumpDependencyEdge(OS
, DE
, MCIP
);
408 OS
<< "\nMEM DEPS\n";
409 for (const DGNode
&Node
: Nodes
)
410 for (const DependencyEdge
&DE
: Node
.OutgoingEdges
)
411 if (DE
.Dep
.Type
== DependencyEdge::DT_MEMORY
)
412 dumpDependencyEdge(OS
, DE
, MCIP
);
414 OS
<< "\nRESOURCE DEPS\n";
415 for (const DGNode
&Node
: Nodes
)
416 for (const DependencyEdge
&DE
: Node
.OutgoingEdges
)
417 if (DE
.Dep
.Type
== DependencyEdge::DT_RESOURCE
)
418 dumpDependencyEdge(OS
, DE
, MCIP
);
422 void DependencyGraph::addDependency(unsigned From
, unsigned To
,
423 DependencyEdge::Dependency
&&Dep
) {
424 DGNode
&NodeFrom
= Nodes
[From
];
425 DGNode
&NodeTo
= Nodes
[To
];
426 SmallVectorImpl
<DependencyEdge
> &Vec
= NodeFrom
.OutgoingEdges
;
428 auto It
= find_if(Vec
, [To
, Dep
](DependencyEdge
&DE
) {
429 return DE
.ToIID
== To
&& DE
.Dep
.ResourceOrRegID
== Dep
.ResourceOrRegID
;
432 if (It
!= Vec
.end()) {
433 It
->Dep
.Cost
+= Dep
.Cost
;
438 DependencyEdge DE
= {Dep
, From
, To
, 1};
439 Vec
.emplace_back(DE
);
440 NodeTo
.NumPredecessors
++;
443 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo
&sti
,
444 MCInstPrinter
&Printer
,
445 ArrayRef
<MCInst
> S
, unsigned NumIter
)
446 : InstructionView(sti
, Printer
, S
), Tracker(sti
.getSchedModel()),
447 DG(S
.size() * 3), Iterations(NumIter
), TotalCycles(0),
448 PressureIncreasedBecauseOfResources(false),
449 PressureIncreasedBecauseOfRegisterDependencies(false),
450 PressureIncreasedBecauseOfMemoryDependencies(false),
451 SeenStallCycles(false), BPI() {}
453 void BottleneckAnalysis::addRegisterDep(unsigned From
, unsigned To
,
454 unsigned RegID
, unsigned Cost
) {
455 bool IsLoopCarried
= From
>= To
;
456 unsigned SourceSize
= getSource().size();
458 DG
.addRegisterDep(From
, To
+ SourceSize
, RegID
, Cost
);
459 DG
.addRegisterDep(From
+ SourceSize
, To
+ (SourceSize
* 2), RegID
, Cost
);
462 DG
.addRegisterDep(From
+ SourceSize
, To
+ SourceSize
, RegID
, Cost
);
465 void BottleneckAnalysis::addMemoryDep(unsigned From
, unsigned To
,
467 bool IsLoopCarried
= From
>= To
;
468 unsigned SourceSize
= getSource().size();
470 DG
.addMemoryDep(From
, To
+ SourceSize
, Cost
);
471 DG
.addMemoryDep(From
+ SourceSize
, To
+ (SourceSize
* 2), Cost
);
474 DG
.addMemoryDep(From
+ SourceSize
, To
+ SourceSize
, Cost
);
477 void BottleneckAnalysis::addResourceDep(unsigned From
, unsigned To
,
478 uint64_t Mask
, unsigned Cost
) {
479 bool IsLoopCarried
= From
>= To
;
480 unsigned SourceSize
= getSource().size();
482 DG
.addResourceDep(From
, To
+ SourceSize
, Mask
, Cost
);
483 DG
.addResourceDep(From
+ SourceSize
, To
+ (SourceSize
* 2), Mask
, Cost
);
486 DG
.addResourceDep(From
+ SourceSize
, To
+ SourceSize
, Mask
, Cost
);
489 void BottleneckAnalysis::onEvent(const HWInstructionEvent
&Event
) {
490 const unsigned IID
= Event
.IR
.getSourceIndex();
491 if (Event
.Type
== HWInstructionEvent::Dispatched
) {
492 Tracker
.onInstructionDispatched(IID
);
495 if (Event
.Type
== HWInstructionEvent::Executed
) {
496 Tracker
.onInstructionExecuted(IID
);
500 if (Event
.Type
!= HWInstructionEvent::Issued
)
503 ArrayRef
<llvm::MCInst
> Source
= getSource();
504 const Instruction
&IS
= *Event
.IR
.getInstruction();
505 unsigned To
= IID
% Source
.size();
507 unsigned Cycles
= 2 * Tracker
.getResourcePressureCycles(IID
);
508 uint64_t ResourceMask
= IS
.getCriticalResourceMask();
509 SmallVector
<std::pair
<unsigned, unsigned>, 4> Users
;
510 while (ResourceMask
) {
511 uint64_t Current
= ResourceMask
& (-ResourceMask
);
512 Tracker
.getResourceUsers(Current
, Users
);
513 for (const std::pair
<unsigned, unsigned> &U
: Users
)
514 addResourceDep(U
.first
% Source
.size(), To
, Current
, U
.second
+ Cycles
);
516 ResourceMask
^= Current
;
519 const CriticalDependency
&RegDep
= IS
.getCriticalRegDep();
521 Cycles
= RegDep
.Cycles
+ 2 * Tracker
.getRegisterPressureCycles(IID
);
522 unsigned From
= RegDep
.IID
% Source
.size();
523 addRegisterDep(From
, To
, RegDep
.RegID
, Cycles
);
526 const CriticalDependency
&MemDep
= IS
.getCriticalMemDep();
528 Cycles
= MemDep
.Cycles
+ 2 * Tracker
.getMemoryPressureCycles(IID
);
529 unsigned From
= MemDep
.IID
% Source
.size();
530 addMemoryDep(From
, To
, Cycles
);
533 Tracker
.handleInstructionIssuedEvent(
534 static_cast<const HWInstructionIssuedEvent
&>(Event
));
536 // Check if this is the last simulated instruction.
537 if (IID
== ((Iterations
* Source
.size()) - 1))
538 DG
.finalizeGraph(Iterations
);
541 void BottleneckAnalysis::onEvent(const HWPressureEvent
&Event
) {
542 assert(Event
.Reason
!= HWPressureEvent::INVALID
&&
543 "Unexpected invalid event!");
545 Tracker
.handlePressureEvent(Event
);
547 switch (Event
.Reason
) {
551 case HWPressureEvent::RESOURCES
:
552 PressureIncreasedBecauseOfResources
= true;
554 case HWPressureEvent::REGISTER_DEPS
:
555 PressureIncreasedBecauseOfRegisterDependencies
= true;
557 case HWPressureEvent::MEMORY_DEPS
:
558 PressureIncreasedBecauseOfMemoryDependencies
= true;
563 void BottleneckAnalysis::onCycleEnd() {
566 bool PressureIncreasedBecauseOfDataDependencies
=
567 PressureIncreasedBecauseOfRegisterDependencies
||
568 PressureIncreasedBecauseOfMemoryDependencies
;
569 if (!PressureIncreasedBecauseOfResources
&&
570 !PressureIncreasedBecauseOfDataDependencies
)
573 ++BPI
.PressureIncreaseCycles
;
574 if (PressureIncreasedBecauseOfRegisterDependencies
)
575 ++BPI
.RegisterDependencyCycles
;
576 if (PressureIncreasedBecauseOfMemoryDependencies
)
577 ++BPI
.MemoryDependencyCycles
;
578 if (PressureIncreasedBecauseOfDataDependencies
)
579 ++BPI
.DataDependencyCycles
;
580 if (PressureIncreasedBecauseOfResources
)
581 ++BPI
.ResourcePressureCycles
;
582 PressureIncreasedBecauseOfResources
= false;
583 PressureIncreasedBecauseOfRegisterDependencies
= false;
584 PressureIncreasedBecauseOfMemoryDependencies
= false;
587 void BottleneckAnalysis::printBottleneckHints(raw_ostream
&OS
) const {
588 if (!SeenStallCycles
|| !BPI
.PressureIncreaseCycles
) {
589 OS
<< "\n\nNo resource or data dependency bottlenecks discovered.\n";
593 double PressurePerCycle
=
594 (double)BPI
.PressureIncreaseCycles
* 100 / TotalCycles
;
595 double ResourcePressurePerCycle
=
596 (double)BPI
.ResourcePressureCycles
* 100 / TotalCycles
;
597 double DDPerCycle
= (double)BPI
.DataDependencyCycles
* 100 / TotalCycles
;
598 double RegDepPressurePerCycle
=
599 (double)BPI
.RegisterDependencyCycles
* 100 / TotalCycles
;
600 double MemDepPressurePerCycle
=
601 (double)BPI
.MemoryDependencyCycles
* 100 / TotalCycles
;
603 OS
<< "\n\nCycles with backend pressure increase [ "
604 << format("%.2f", floor((PressurePerCycle
* 100) + 0.5) / 100) << "% ]";
606 OS
<< "\nThroughput Bottlenecks: "
607 << "\n Resource Pressure [ "
608 << format("%.2f", floor((ResourcePressurePerCycle
* 100) + 0.5) / 100)
611 if (BPI
.PressureIncreaseCycles
) {
612 ArrayRef
<unsigned> Distribution
= Tracker
.getResourcePressureDistribution();
613 const MCSchedModel
&SM
= getSubTargetInfo().getSchedModel();
614 for (unsigned I
= 0, E
= Distribution
.size(); I
< E
; ++I
) {
615 unsigned ReleaseAtCycles
= Distribution
[I
];
616 if (ReleaseAtCycles
) {
617 double Frequency
= (double)ReleaseAtCycles
* 100 / TotalCycles
;
618 const MCProcResourceDesc
&PRDesc
= *SM
.getProcResource(I
);
619 OS
<< "\n - " << PRDesc
.Name
<< " [ "
620 << format("%.2f", floor((Frequency
* 100) + 0.5) / 100) << "% ]";
625 OS
<< "\n Data Dependencies: [ "
626 << format("%.2f", floor((DDPerCycle
* 100) + 0.5) / 100) << "% ]";
627 OS
<< "\n - Register Dependencies [ "
628 << format("%.2f", floor((RegDepPressurePerCycle
* 100) + 0.5) / 100)
630 OS
<< "\n - Memory Dependencies [ "
631 << format("%.2f", floor((MemDepPressurePerCycle
* 100) + 0.5) / 100)
635 void BottleneckAnalysis::printView(raw_ostream
&OS
) const {
637 raw_string_ostream
TempStream(Buffer
);
638 printBottleneckHints(TempStream
);
641 printCriticalSequence(OS
);
644 json::Value
BottleneckAnalysis::toJSON() const {
645 if (!SeenStallCycles
|| !BPI
.PressureIncreaseCycles
) {
646 json::Object
JO({{"PressureIncreaseCycles", 0}});
650 json::Array CriticalSequence
;
651 // get critical sequence
652 SmallVector
<const DependencyEdge
*, 16> Seq
;
653 DG
.getCriticalSequence(Seq
);
655 for (const DependencyEdge
*&DE
: Seq
) {
656 json::Object
DEJO({{"FromID", DE
->FromIID
},
658 {"Type", static_cast<unsigned>(DE
->Dep
.Type
)},
659 {"ResourceOrRegID", DE
->Dep
.ResourceOrRegID
}});
660 CriticalSequence
.push_back(std::move(DEJO
));
664 json::Array ResourcePressure
;
665 if (BPI
.PressureIncreaseCycles
) {
666 ArrayRef
<unsigned> Distribution
= Tracker
.getResourcePressureDistribution();
667 const MCSchedModel
&SM
= getSubTargetInfo().getSchedModel();
668 for (unsigned I
= 0, E
= Distribution
.size(); I
< E
; ++I
) {
669 unsigned ReleaseAtCycles
= Distribution
[I
];
670 if (ReleaseAtCycles
) {
671 const MCProcResourceDesc
&PRDesc
= *SM
.getProcResource(I
);
672 json::Object
RPJO({{PRDesc
.Name
, ReleaseAtCycles
}});
673 ResourcePressure
.push_back(std::move(RPJO
));
678 json::Object
JO({{"PressureIncreaseCycles", BPI
.PressureIncreaseCycles
},
679 {"ResourcePressureCycles", BPI
.ResourcePressureCycles
},
680 {"DataDependencyCycles", BPI
.DataDependencyCycles
},
681 {"RegisterDependencyCycles", BPI
.RegisterDependencyCycles
},
682 {"MemoryDependencyCycles", BPI
.MemoryDependencyCycles
},
683 {"TotalCycles", TotalCycles
},
684 {"DependencyEdge", std::move(CriticalSequence
)},
685 {"ResourcePressure", std::move(ResourcePressure
)}});