[yaml2obj/obj2yaml] - Add support for .stack_sizes sections.
[llvm-complete.git] / tools / llvm-mca / Views / BottleneckAnalysis.cpp
blobfeff0cd6d524015d741d8b8069b94e96bb2f9062
1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the functionalities used by the BottleneckAnalysis
11 /// to report bottleneck info.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "Views/BottleneckAnalysis.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MCA/Support.h"
18 #include "llvm/Support/Format.h"
19 #include "llvm/Support/FormattedStream.h"
21 namespace llvm {
22 namespace mca {
24 #define DEBUG_TYPE "llvm-mca"
26 PressureTracker::PressureTracker(const MCSchedModel &Model)
27 : SM(Model),
28 ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
29 ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
30 ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
31 ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
32 computeProcResourceMasks(SM, ProcResID2Mask);
34 // Ignore the invalid resource at index zero.
35 unsigned NextResourceUsersIdx = 0;
36 for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
37 const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
38 ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
39 NextResourceUsersIdx += ProcResource.NumUnits;
40 uint64_t ResourceMask = ProcResID2Mask[I];
41 ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
44 ResourceUsers.resize(NextResourceUsersIdx);
45 std::fill(ResourceUsers.begin(), ResourceUsers.end(),
46 std::make_pair<unsigned, unsigned>(~0U, 0U));
49 void PressureTracker::getResourceUsers(uint64_t ResourceMask,
50 SmallVectorImpl<User> &Users) const {
51 unsigned Index = getResourceStateIndex(ResourceMask);
52 unsigned ProcResID = ResIdx2ProcResID[Index];
53 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
54 for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
55 const User U = getResourceUser(ProcResID, I);
56 if (U.second && IPI.find(U.first) != IPI.end())
57 Users.emplace_back(U);
61 void PressureTracker::onInstructionDispatched(unsigned IID) {
62 IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
65 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
67 void PressureTracker::handleInstructionIssuedEvent(
68 const HWInstructionIssuedEvent &Event) {
69 unsigned IID = Event.IR.getSourceIndex();
70 using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
71 using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
72 for (const ResourceUse &Use : Event.UsedResources) {
73 const ResourceRef &RR = Use.first;
74 unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
75 Index += countTrailingZeros(RR.second);
76 ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
80 void PressureTracker::updateResourcePressureDistribution(
81 uint64_t CumulativeMask) {
82 while (CumulativeMask) {
83 uint64_t Current = CumulativeMask & (-CumulativeMask);
84 unsigned ResIdx = getResourceStateIndex(Current);
85 unsigned ProcResID = ResIdx2ProcResID[ResIdx];
86 uint64_t Mask = ProcResID2Mask[ProcResID];
88 if (Mask == Current) {
89 ResourcePressureDistribution[ProcResID]++;
90 CumulativeMask ^= Current;
91 continue;
94 Mask ^= Current;
95 while (Mask) {
96 uint64_t SubUnit = Mask & (-Mask);
97 ResIdx = getResourceStateIndex(SubUnit);
98 ProcResID = ResIdx2ProcResID[ResIdx];
99 ResourcePressureDistribution[ProcResID]++;
100 Mask ^= SubUnit;
103 CumulativeMask ^= Current;
107 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
108 assert(Event.Reason != HWPressureEvent::INVALID &&
109 "Unexpected invalid event!");
111 switch (Event.Reason) {
112 default:
113 break;
115 case HWPressureEvent::RESOURCES: {
116 const uint64_t ResourceMask = Event.ResourceMask;
117 updateResourcePressureDistribution(Event.ResourceMask);
119 for (const InstRef &IR : Event.AffectedInstructions) {
120 const Instruction &IS = *IR.getInstruction();
121 unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
122 if (!BusyResources)
123 continue;
125 unsigned IID = IR.getSourceIndex();
126 IPI[IID].ResourcePressureCycles++;
128 break;
131 case HWPressureEvent::REGISTER_DEPS:
132 for (const InstRef &IR : Event.AffectedInstructions) {
133 unsigned IID = IR.getSourceIndex();
134 IPI[IID].RegisterPressureCycles++;
136 break;
138 case HWPressureEvent::MEMORY_DEPS:
139 for (const InstRef &IR : Event.AffectedInstructions) {
140 unsigned IID = IR.getSourceIndex();
141 IPI[IID].MemoryPressureCycles++;
146 #ifndef NDEBUG
147 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
148 const DependencyEdge &DepEdge,
149 MCInstPrinter &MCIP) const {
150 unsigned FromIID = DepEdge.FromIID;
151 unsigned ToIID = DepEdge.ToIID;
152 assert(FromIID < ToIID && "Graph should be acyclic!");
154 const DependencyEdge::Dependency &DE = DepEdge.Dep;
155 assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
157 OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
158 if (DE.Type == DependencyEdge::DT_REGISTER) {
159 OS << " - REGISTER: ";
160 MCIP.printRegName(OS, DE.ResourceOrRegID);
161 } else if (DE.Type == DependencyEdge::DT_MEMORY) {
162 OS << " - MEMORY";
163 } else {
164 assert(DE.Type == DependencyEdge::DT_RESOURCE &&
165 "Unsupported dependency type!");
166 OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
168 OS << " - COST: " << DE.Cost << '\n';
170 #endif // NDEBUG
172 void DependencyGraph::pruneEdges(unsigned Iterations) {
173 for (DGNode &N : Nodes) {
174 unsigned NumPruned = 0;
175 const unsigned Size = N.OutgoingEdges.size();
176 // Use a cut-off threshold to prune edges with a low frequency.
177 for (unsigned I = 0, E = Size; I < E; ++I) {
178 DependencyEdge &Edge = N.OutgoingEdges[I];
179 if (Edge.Frequency == Iterations)
180 continue;
181 double Factor = (double)Edge.Frequency / Iterations;
182 if (0.10 < Factor)
183 continue;
184 Nodes[Edge.ToIID].NumPredecessors--;
185 std::swap(Edge, N.OutgoingEdges[E - 1]);
186 --E;
187 ++NumPruned;
190 if (NumPruned)
191 N.OutgoingEdges.resize(Size - NumPruned);
195 void DependencyGraph::initializeRootSet(
196 SmallVectorImpl<unsigned> &RootSet) const {
197 for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
198 const DGNode &N = Nodes[I];
199 if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
200 RootSet.emplace_back(I);
204 void DependencyGraph::propagateThroughEdges(
205 SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
206 SmallVector<unsigned, 8> ToVisit;
208 // A critical sequence is computed as the longest path from a node of the
209 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
210 // composed of nodes with at least one successor, and no predecessors.
212 // Each node of the graph starts with an initial default cost of zero. The
213 // cost of a node is a measure of criticality: the higher the cost, the bigger
214 // is the performance impact.
215 // For register and memory dependencies, the cost is a function of the write
216 // latency as well as the actual delay (in cycles) caused to users.
217 // For processor resource dependencies, the cost is a function of the resource
218 // pressure. Resource interferences with low frequency values are ignored.
220 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
221 // the inner loop selects (i.e. visits) a node N from a set of `unvisited
222 // nodes`, and then propagates the cost of N to all its neighbors.
224 // The `unvisited nodes` set initially contains all the nodes from the
225 // RootSet. A node N is added to the `unvisited nodes` if all its
226 // predecessors have been visited already.
228 // For simplicity, every node tracks the number of unvisited incoming edges in
229 // field `NumVisitedPredecessors`. When the value of that field drops to
230 // zero, then the corresponding node is added to a `ToVisit` set.
232 // At the end of every iteration of the outer loop, set `ToVisit` becomes our
233 // new `unvisited nodes` set.
235 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
236 // is empty. This algorithm works under the assumption that the graph is
237 // acyclic.
238 do {
239 for (unsigned IID : RootSet) {
240 const DGNode &N = Nodes[IID];
241 for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
242 unsigned ToIID = DepEdge.ToIID;
243 DGNode &To = Nodes[ToIID];
244 uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
245 // Check if this is the most expensive incoming edge seen so far. In
246 // case, update the total cost of the destination node (ToIID), as well
247 // its field `CriticalPredecessor`.
248 if (Cost > To.Cost) {
249 To.CriticalPredecessor = DepEdge;
250 To.Cost = Cost;
251 To.Depth = N.Depth + 1;
253 To.NumVisitedPredecessors++;
254 if (To.NumVisitedPredecessors == To.NumPredecessors)
255 ToVisit.emplace_back(ToIID);
259 std::swap(RootSet, ToVisit);
260 ToVisit.clear();
261 } while (!RootSet.empty());
264 void DependencyGraph::getCriticalSequence(
265 SmallVectorImpl<const DependencyEdge *> &Seq) const {
266 // At this stage, nodes of the graph have been already visited, and costs have
267 // been propagated through the edges (see method `propagateThroughEdges()`).
269 // Identify the node N with the highest cost in the graph. By construction,
270 // that node is the last instruction of our critical sequence.
271 // Field N.Depth would tell us the total length of the sequence.
273 // To obtain the sequence of critical edges, we simply follow the chain of critical
274 // predecessors starting from node N (field DGNode::CriticalPredecessor).
275 const auto It = std::max_element(
276 Nodes.begin(), Nodes.end(),
277 [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
278 unsigned IID = std::distance(Nodes.begin(), It);
279 Seq.resize(Nodes[IID].Depth);
280 for (unsigned I = Seq.size(), E = 0; I > E; --I) {
281 const DGNode &N = Nodes[IID];
282 Seq[I - 1] = &N.CriticalPredecessor;
283 IID = N.CriticalPredecessor.FromIID;
287 static void printInstruction(formatted_raw_ostream &FOS,
288 const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
289 const MCInst &MCI,
290 bool UseDifferentColor = false) {
291 std::string Instruction;
292 raw_string_ostream InstrStream(Instruction);
294 FOS.PadToColumn(14);
296 MCIP.printInst(&MCI, InstrStream, "", STI);
297 InstrStream.flush();
299 if (UseDifferentColor)
300 FOS.changeColor(raw_ostream::CYAN, true, false);
301 FOS << StringRef(Instruction).ltrim();
302 if (UseDifferentColor)
303 FOS.resetColor();
306 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
307 // Early exit if no bottlenecks were found during the simulation.
308 if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
309 return;
311 SmallVector<const DependencyEdge *, 16> Seq;
312 DG.getCriticalSequence(Seq);
313 if (Seq.empty())
314 return;
316 OS << "\nCritical sequence based on the simulation:\n\n";
318 const DependencyEdge &FirstEdge = *Seq[0];
319 unsigned FromIID = FirstEdge.FromIID % Source.size();
320 unsigned ToIID = FirstEdge.ToIID % Source.size();
321 bool IsLoopCarried = FromIID >= ToIID;
323 formatted_raw_ostream FOS(OS);
324 FOS.PadToColumn(14);
325 FOS << "Instruction";
326 FOS.PadToColumn(58);
327 FOS << "Dependency Information";
329 bool HasColors = FOS.has_colors();
331 unsigned CurrentIID = 0;
332 if (IsLoopCarried) {
333 FOS << "\n +----< " << FromIID << ".";
334 printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
335 FOS << "\n |\n | < loop carried > \n |";
336 } else {
337 while (CurrentIID < FromIID) {
338 FOS << "\n " << CurrentIID << ".";
339 printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
340 CurrentIID++;
343 FOS << "\n +----< " << CurrentIID << ".";
344 printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
345 CurrentIID++;
348 for (const DependencyEdge *&DE : Seq) {
349 ToIID = DE->ToIID % Source.size();
350 unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
352 while (CurrentIID < LastIID) {
353 FOS << "\n | " << CurrentIID << ".";
354 printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
355 CurrentIID++;
358 if (CurrentIID == ToIID) {
359 FOS << "\n +----> " << ToIID << ".";
360 printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
361 } else {
362 FOS << "\n |\n | < loop carried > \n |"
363 << "\n +----> " << ToIID << ".";
364 printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
366 FOS.PadToColumn(58);
368 const DependencyEdge::Dependency &Dep = DE->Dep;
369 if (HasColors)
370 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
372 if (Dep.Type == DependencyEdge::DT_REGISTER) {
373 FOS << "## REGISTER dependency: ";
374 if (HasColors)
375 FOS.changeColor(raw_ostream::MAGENTA, true, false);
376 MCIP.printRegName(FOS, Dep.ResourceOrRegID);
377 } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
378 FOS << "## MEMORY dependency.";
379 } else {
380 assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
381 "Unsupported dependency type!");
382 FOS << "## RESOURCE interference: ";
383 if (HasColors)
384 FOS.changeColor(raw_ostream::MAGENTA, true, false);
385 FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
386 if (HasColors) {
387 FOS.resetColor();
388 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
390 FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
391 << "% ]";
393 if (HasColors)
394 FOS.resetColor();
395 ++CurrentIID;
398 while (CurrentIID < Source.size()) {
399 FOS << "\n " << CurrentIID << ".";
400 printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
401 CurrentIID++;
404 FOS << '\n';
405 FOS.flush();
408 #ifndef NDEBUG
409 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
410 OS << "\nREG DEPS\n";
411 for (const DGNode &Node : Nodes)
412 for (const DependencyEdge &DE : Node.OutgoingEdges)
413 if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
414 dumpDependencyEdge(OS, DE, MCIP);
416 OS << "\nMEM DEPS\n";
417 for (const DGNode &Node : Nodes)
418 for (const DependencyEdge &DE : Node.OutgoingEdges)
419 if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
420 dumpDependencyEdge(OS, DE, MCIP);
422 OS << "\nRESOURCE DEPS\n";
423 for (const DGNode &Node : Nodes)
424 for (const DependencyEdge &DE : Node.OutgoingEdges)
425 if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
426 dumpDependencyEdge(OS, DE, MCIP);
428 #endif // NDEBUG
430 void DependencyGraph::addDependency(unsigned From, unsigned To,
431 DependencyEdge::Dependency &&Dep) {
432 DGNode &NodeFrom = Nodes[From];
433 DGNode &NodeTo = Nodes[To];
434 SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
436 auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
437 return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
440 if (It != Vec.end()) {
441 It->Dep.Cost += Dep.Cost;
442 It->Frequency++;
443 return;
446 DependencyEdge DE = {Dep, From, To, 1};
447 Vec.emplace_back(DE);
448 NodeTo.NumPredecessors++;
451 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
452 MCInstPrinter &Printer,
453 ArrayRef<MCInst> S, unsigned NumIter)
454 : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
455 Source(S), Iterations(NumIter), TotalCycles(0),
456 PressureIncreasedBecauseOfResources(false),
457 PressureIncreasedBecauseOfRegisterDependencies(false),
458 PressureIncreasedBecauseOfMemoryDependencies(false),
459 SeenStallCycles(false), BPI() {}
461 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
462 unsigned RegID, unsigned Cost) {
463 bool IsLoopCarried = From >= To;
464 unsigned SourceSize = Source.size();
465 if (IsLoopCarried) {
466 DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
467 DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
468 return;
470 DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
473 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
474 unsigned Cost) {
475 bool IsLoopCarried = From >= To;
476 unsigned SourceSize = Source.size();
477 if (IsLoopCarried) {
478 DG.addMemoryDep(From, To + SourceSize, Cost);
479 DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
480 return;
482 DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
485 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
486 uint64_t Mask, unsigned Cost) {
487 bool IsLoopCarried = From >= To;
488 unsigned SourceSize = Source.size();
489 if (IsLoopCarried) {
490 DG.addResourceDep(From, To + SourceSize, Mask, Cost);
491 DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
492 return;
494 DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
497 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
498 const unsigned IID = Event.IR.getSourceIndex();
499 if (Event.Type == HWInstructionEvent::Dispatched) {
500 Tracker.onInstructionDispatched(IID);
501 return;
503 if (Event.Type == HWInstructionEvent::Executed) {
504 Tracker.onInstructionExecuted(IID);
505 return;
508 if (Event.Type != HWInstructionEvent::Issued)
509 return;
511 const Instruction &IS = *Event.IR.getInstruction();
512 unsigned To = IID % Source.size();
514 unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
515 uint64_t ResourceMask = IS.getCriticalResourceMask();
516 SmallVector<std::pair<unsigned, unsigned>, 4> Users;
517 while (ResourceMask) {
518 uint64_t Current = ResourceMask & (-ResourceMask);
519 Tracker.getResourceUsers(Current, Users);
520 for (const std::pair<unsigned, unsigned> &U : Users)
521 addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
522 Users.clear();
523 ResourceMask ^= Current;
526 const CriticalDependency &RegDep = IS.getCriticalRegDep();
527 if (RegDep.Cycles) {
528 Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
529 unsigned From = RegDep.IID % Source.size();
530 addRegisterDep(From, To, RegDep.RegID, Cycles);
533 const CriticalDependency &MemDep = IS.getCriticalMemDep();
534 if (MemDep.Cycles) {
535 Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
536 unsigned From = MemDep.IID % Source.size();
537 addMemoryDep(From, To, Cycles);
540 Tracker.handleInstructionIssuedEvent(
541 static_cast<const HWInstructionIssuedEvent &>(Event));
543 // Check if this is the last simulated instruction.
544 if (IID == ((Iterations * Source.size()) - 1))
545 DG.finalizeGraph(Iterations);
548 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
549 assert(Event.Reason != HWPressureEvent::INVALID &&
550 "Unexpected invalid event!");
552 Tracker.handlePressureEvent(Event);
554 switch (Event.Reason) {
555 default:
556 break;
558 case HWPressureEvent::RESOURCES:
559 PressureIncreasedBecauseOfResources = true;
560 break;
561 case HWPressureEvent::REGISTER_DEPS:
562 PressureIncreasedBecauseOfRegisterDependencies = true;
563 break;
564 case HWPressureEvent::MEMORY_DEPS:
565 PressureIncreasedBecauseOfMemoryDependencies = true;
566 break;
570 void BottleneckAnalysis::onCycleEnd() {
571 ++TotalCycles;
573 bool PressureIncreasedBecauseOfDataDependencies =
574 PressureIncreasedBecauseOfRegisterDependencies ||
575 PressureIncreasedBecauseOfMemoryDependencies;
576 if (!PressureIncreasedBecauseOfResources &&
577 !PressureIncreasedBecauseOfDataDependencies)
578 return;
580 ++BPI.PressureIncreaseCycles;
581 if (PressureIncreasedBecauseOfRegisterDependencies)
582 ++BPI.RegisterDependencyCycles;
583 if (PressureIncreasedBecauseOfMemoryDependencies)
584 ++BPI.MemoryDependencyCycles;
585 if (PressureIncreasedBecauseOfDataDependencies)
586 ++BPI.DataDependencyCycles;
587 if (PressureIncreasedBecauseOfResources)
588 ++BPI.ResourcePressureCycles;
589 PressureIncreasedBecauseOfResources = false;
590 PressureIncreasedBecauseOfRegisterDependencies = false;
591 PressureIncreasedBecauseOfMemoryDependencies = false;
594 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
595 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
596 OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
597 return;
600 double PressurePerCycle =
601 (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
602 double ResourcePressurePerCycle =
603 (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
604 double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
605 double RegDepPressurePerCycle =
606 (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
607 double MemDepPressurePerCycle =
608 (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
610 OS << "\n\nCycles with backend pressure increase [ "
611 << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
613 OS << "\nThroughput Bottlenecks: "
614 << "\n Resource Pressure [ "
615 << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
616 << "% ]";
618 if (BPI.PressureIncreaseCycles) {
619 ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
620 const MCSchedModel &SM = STI.getSchedModel();
621 for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
622 unsigned ResourceCycles = Distribution[I];
623 if (ResourceCycles) {
624 double Frequency = (double)ResourceCycles * 100 / TotalCycles;
625 const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
626 OS << "\n - " << PRDesc.Name << " [ "
627 << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
632 OS << "\n Data Dependencies: [ "
633 << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
634 OS << "\n - Register Dependencies [ "
635 << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
636 << "% ]";
637 OS << "\n - Memory Dependencies [ "
638 << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
639 << "% ]\n";
642 void BottleneckAnalysis::printView(raw_ostream &OS) const {
643 std::string Buffer;
644 raw_string_ostream TempStream(Buffer);
645 printBottleneckHints(TempStream);
646 TempStream.flush();
647 OS << Buffer;
648 printCriticalSequence(OS);
651 } // namespace mca.
652 } // namespace llvm