[llvm-exegesis] Implements a cache of Instruction objects.
[llvm-core.git] / tools / llvm-exegesis / lib / Analysis.cpp
blob0dd6bcbd46619c73cad0947d72c6e04e5ceae953
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
10 #include "Analysis.h"
11 #include "BenchmarkResult.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <unordered_set>
16 #include <vector>
18 namespace llvm {
19 namespace exegesis {
21 static const char kCsvSep = ',';
23 static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
24 unsigned SchedClassId,
25 const llvm::MCInst &MCI) {
26 const auto &SM = STI.getSchedModel();
27 while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
28 SchedClassId =
29 STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
30 return SchedClassId;
33 namespace {
35 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
37 template <EscapeTag Tag>
38 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
40 template <>
41 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
42 if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
43 OS << S;
44 } else {
45 // Needs escaping.
46 OS << '"';
47 for (const char C : S) {
48 if (C == '"')
49 OS << "\"\"";
50 else
51 OS << C;
53 OS << '"';
57 template <>
58 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
59 for (const char C : S) {
60 if (C == '<')
61 OS << "&lt;";
62 else if (C == '>')
63 OS << "&gt;";
64 else if (C == '&')
65 OS << "&amp;";
66 else
67 OS << C;
71 template <>
72 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
73 const llvm::StringRef S) {
74 for (const char C : S) {
75 if (C == '"')
76 OS << "\\\"";
77 else
78 OS << C;
82 } // namespace
84 template <EscapeTag Tag>
85 static void
86 writeClusterId(llvm::raw_ostream &OS,
87 const InstructionBenchmarkClustering::ClusterId &CID) {
88 if (CID.isNoise())
89 writeEscaped<Tag>(OS, "[noise]");
90 else if (CID.isError())
91 writeEscaped<Tag>(OS, "[error]");
92 else
93 OS << CID.getId();
96 template <EscapeTag Tag>
97 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
98 writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
101 template <typename EscapeTag, EscapeTag Tag>
102 void Analysis::writeSnippet(llvm::raw_ostream &OS,
103 llvm::ArrayRef<uint8_t> Bytes,
104 const char *Separator) const {
105 llvm::SmallVector<std::string, 3> Lines;
106 // Parse the asm snippet and print it.
107 while (!Bytes.empty()) {
108 llvm::MCInst MI;
109 uint64_t MISize = 0;
110 if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
111 llvm::nulls())) {
112 writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
113 writeEscaped<Tag>(OS, Separator);
114 writeEscaped<Tag>(OS, "[error decoding asm snippet]");
115 return;
117 Lines.emplace_back();
118 std::string &Line = Lines.back();
119 llvm::raw_string_ostream OSS(Line);
120 InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
121 Bytes = Bytes.drop_front(MISize);
122 OSS.flush();
123 Line = llvm::StringRef(Line).trim().str();
125 writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
128 // Prints a row representing an instruction, along with scheduling info and
129 // point coordinates (measurements).
130 void Analysis::printInstructionRowCsv(const size_t PointId,
131 llvm::raw_ostream &OS) const {
132 const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
133 writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
134 OS << kCsvSep;
135 writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
136 OS << kCsvSep;
137 writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
138 OS << kCsvSep;
139 assert(!Point.Key.Instructions.empty());
140 const llvm::MCInst &MCI = Point.Key.Instructions[0];
141 const unsigned SchedClassId = resolveSchedClassId(
142 *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
144 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
145 const llvm::MCSchedClassDesc *const SCDesc =
146 SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId);
147 writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
148 #else
149 OS << SchedClassId;
150 #endif
151 for (const auto &Measurement : Point.Measurements) {
152 OS << kCsvSep;
153 writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
155 OS << "\n";
158 Analysis::Analysis(const llvm::Target &Target,
159 const InstructionBenchmarkClustering &Clustering)
160 : Clustering_(Clustering) {
161 if (Clustering.getPoints().empty())
162 return;
164 const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
165 InstrInfo_.reset(Target.createMCInstrInfo());
166 RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
167 AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
168 SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
169 FirstPoint.CpuName, ""));
170 InstPrinter_.reset(Target.createMCInstPrinter(
171 llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
172 *InstrInfo_, *RegInfo_));
174 Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
175 &ObjectFileInfo_);
176 Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
177 assert(Disasm_ && "cannot create MCDisassembler. missing call to "
178 "InitializeXXXTargetDisassembler ?");
181 template <>
182 llvm::Error
183 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
184 if (Clustering_.getPoints().empty())
185 return llvm::Error::success();
187 // Write the header.
188 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
189 << kCsvSep << "sched_class";
190 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
191 OS << kCsvSep;
192 writeEscaped<kEscapeCsv>(OS, Measurement.Key);
194 OS << "\n";
196 // Write the points.
197 const auto &Clusters = Clustering_.getValidClusters();
198 for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
199 for (const size_t PointId : Clusters[I].PointIndices) {
200 printInstructionRowCsv(PointId, OS);
202 OS << "\n\n";
204 return llvm::Error::success();
207 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
208 ResolvedSchedClass &&RSC)
209 : RSC(std::move(RSC)) {}
211 std::vector<Analysis::ResolvedSchedClassAndPoints>
212 Analysis::makePointsPerSchedClass() const {
213 std::vector<ResolvedSchedClassAndPoints> Entries;
214 // Maps SchedClassIds to index in result.
215 std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
216 const auto &Points = Clustering_.getPoints();
217 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
218 const InstructionBenchmark &Point = Points[PointId];
219 if (!Point.Error.empty())
220 continue;
221 assert(!Point.Key.Instructions.empty());
222 // FIXME: we should be using the tuple of classes for instructions in the
223 // snippet as key.
224 const llvm::MCInst &MCI = Point.Key.Instructions[0];
225 unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
226 const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
227 .getSchedClassDesc(SchedClassId)
228 ->isVariant();
229 SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
230 const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
231 if (IndexIt == SchedClassIdToIndex.end()) {
232 // Create a new entry.
233 SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
234 ResolvedSchedClassAndPoints Entry(
235 ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
236 Entry.PointIds.push_back(PointId);
237 Entries.push_back(std::move(Entry));
238 } else {
239 // Append to the existing entry.
240 Entries[IndexIt->second].PointIds.push_back(PointId);
243 return Entries;
246 // Uops repeat the same opcode over again. Just show this opcode and show the
247 // whole snippet only on hover.
248 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
249 const std::vector<llvm::MCInst> &Instructions,
250 const llvm::MCInstrInfo &InstrInfo) {
251 if (Instructions.empty())
252 return;
253 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
254 if (Instructions.size() > 1)
255 OS << " (x" << Instructions.size() << ")";
258 // Latency tries to find a serial path. Just show the opcode path and show the
259 // whole snippet only on hover.
260 static void
261 writeLatencySnippetHtml(llvm::raw_ostream &OS,
262 const std::vector<llvm::MCInst> &Instructions,
263 const llvm::MCInstrInfo &InstrInfo) {
264 bool First = true;
265 for (const llvm::MCInst &Instr : Instructions) {
266 if (First)
267 First = false;
268 else
269 OS << " &rarr; ";
270 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
274 void Analysis::printSchedClassClustersHtml(
275 const std::vector<SchedClassCluster> &Clusters,
276 const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
277 const auto &Points = Clustering_.getPoints();
278 OS << "<table class=\"sched-class-clusters\">";
279 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
280 assert(!Clusters.empty());
281 for (const auto &Measurement :
282 Points[Clusters[0].getPointIds()[0]].Measurements) {
283 OS << "<th>";
284 writeEscaped<kEscapeHtml>(OS, Measurement.Key);
285 OS << "</th>";
287 OS << "</tr>";
288 for (const SchedClassCluster &Cluster : Clusters) {
289 OS << "<tr class=\""
290 << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
291 ? "good-cluster"
292 : "bad-cluster")
293 << "\"><td>";
294 writeClusterId<kEscapeHtml>(OS, Cluster.id());
295 OS << "</td><td><ul>";
296 for (const size_t PointId : Cluster.getPointIds()) {
297 const auto &Point = Points[PointId];
298 OS << "<li><span class=\"mono\" title=\"";
299 writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
300 "\n");
301 OS << "\">";
302 switch (Point.Mode) {
303 case InstructionBenchmark::Latency:
304 writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
305 break;
306 case InstructionBenchmark::Uops:
307 writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
308 break;
309 default:
310 llvm_unreachable("invalid mode");
312 OS << "</span> <span class=\"mono\">";
313 writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
314 OS << "</span></li>";
316 OS << "</ul></td>";
317 for (const auto &Stats : Cluster.getRepresentative()) {
318 OS << "<td class=\"measurement\">";
319 writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
320 OS << "<br><span class=\"minmax\">[";
321 writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
322 OS << ";";
323 writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
324 OS << "]</span></td>";
326 OS << "</tr>";
328 OS << "</table>";
331 // Return the non-redundant list of WriteProcRes used by the given sched class.
332 // The scheduling model for LLVM is such that each instruction has a certain
333 // number of uops which consume resources which are described by WriteProcRes
334 // entries. Each entry describe how many cycles are spent on a specific ProcRes
335 // kind.
336 // For example, an instruction might have 3 uOps, one dispatching on P0
337 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
338 // Note that LLVM additionally denormalizes resource consumption to include
339 // usage of super resources by subresources. So in practice if there exists a
340 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
341 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
342 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
343 // denote implied usage of superresources by subresources:
344 // P0 P06 P016
345 // uOp1 1 (1) (1)
346 // uOp2 1 (1)
347 // uOp3 1 (1)
348 // =============================
349 // 1 3 3
350 // Eventually we end up with three entries for the WriteProcRes of the
351 // instruction:
352 // {ProcResIdx=1, Cycles=1} // P0
353 // {ProcResIdx=7, Cycles=3} // P06
354 // {ProcResIdx=10, Cycles=3} // P016
356 // Note that in this case, P016 does not contribute any cycles, so it would
357 // be removed by this function.
358 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
359 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
360 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
361 const llvm::MCSubtargetInfo &STI) {
362 llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
363 const auto &SM = STI.getSchedModel();
364 const unsigned NumProcRes = SM.getNumProcResourceKinds();
366 // This assumes that the ProcResDescs are sorted in topological order, which
367 // is guaranteed by the tablegen backend.
368 llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
369 for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
370 *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
371 WPR != WPREnd; ++WPR) {
372 const llvm::MCProcResourceDesc *const ProcResDesc =
373 SM.getProcResource(WPR->ProcResourceIdx);
374 if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
375 // This is a ProcResUnit.
376 Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
377 ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
378 } else {
379 // This is a ProcResGroup. First see if it contributes any cycles or if
380 // it has cycles just from subunits.
381 float RemainingCycles = WPR->Cycles;
382 for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
383 SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
384 ++SubResIdx) {
385 RemainingCycles -= ProcResUnitUsage[*SubResIdx];
387 if (RemainingCycles < 0.01f) {
388 // The ProcResGroup contributes no cycles of its own.
389 continue;
391 // The ProcResGroup contributes `RemainingCycles` cycles of its own.
392 Result.push_back({WPR->ProcResourceIdx,
393 static_cast<uint16_t>(std::round(RemainingCycles))});
394 // Spread the remaining cycles over all subunits.
395 for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
396 SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
397 ++SubResIdx) {
398 ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
402 return Result;
405 Analysis::ResolvedSchedClass::ResolvedSchedClass(
406 const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
407 bool WasVariant)
408 : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
409 WasVariant(WasVariant),
410 NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
411 IdealizedProcResPressure(computeIdealizedProcResPressure(
412 STI.getSchedModel(), NonRedundantWriteProcRes)) {
413 assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
414 "ResolvedSchedClass should never be variant");
417 void Analysis::SchedClassCluster::addPoint(
418 size_t PointId, const InstructionBenchmarkClustering &Clustering) {
419 PointIds.push_back(PointId);
420 const auto &Point = Clustering.getPoints()[PointId];
421 if (ClusterId.isUndef()) {
422 ClusterId = Clustering.getClusterIdForPoint(PointId);
423 Representative.resize(Point.Measurements.size());
425 for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
426 Representative[I].push(Point.Measurements[I]);
428 assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
431 // Returns a ProxResIdx by id or name.
432 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
433 const llvm::StringRef NameOrId) {
434 // Interpret the key as an ProcResIdx.
435 unsigned ProcResIdx = 0;
436 if (llvm::to_integer(NameOrId, ProcResIdx, 10))
437 return ProcResIdx;
438 // Interpret the key as a ProcRes name.
439 const auto &SchedModel = STI.getSchedModel();
440 for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
441 if (NameOrId == SchedModel.getProcResource(I)->Name)
442 return I;
444 return 0;
447 bool Analysis::SchedClassCluster::measurementsMatch(
448 const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
449 const InstructionBenchmarkClustering &Clustering) const {
450 const size_t NumMeasurements = Representative.size();
451 std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
452 std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
453 // Latency case.
454 assert(!Clustering.getPoints().empty());
455 const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
456 if (Mode == InstructionBenchmark::Latency) {
457 if (NumMeasurements != 1) {
458 llvm::errs()
459 << "invalid number of measurements in latency mode: expected 1, got "
460 << NumMeasurements << "\n";
461 return false;
463 // Find the latency.
464 SchedClassPoint[0].PerInstructionValue = 0.0;
465 for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
466 const llvm::MCWriteLatencyEntry *const WLE =
467 STI.getWriteLatencyEntry(RSC.SCDesc, I);
468 SchedClassPoint[0].PerInstructionValue =
469 std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
471 ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
472 } else if (Mode == InstructionBenchmark::Uops) {
473 for (int I = 0, E = Representative.size(); I < E; ++I) {
474 const auto Key = Representative[I].key();
475 uint16_t ProcResIdx = findProcResIdx(STI, Key);
476 if (ProcResIdx > 0) {
477 // Find the pressure on ProcResIdx `Key`.
478 const auto ProcResPressureIt =
479 std::find_if(RSC.IdealizedProcResPressure.begin(),
480 RSC.IdealizedProcResPressure.end(),
481 [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
482 return WPR.first == ProcResIdx;
484 SchedClassPoint[I].PerInstructionValue =
485 ProcResPressureIt == RSC.IdealizedProcResPressure.end()
486 ? 0.0
487 : ProcResPressureIt->second;
488 } else if (Key == "NumMicroOps") {
489 SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
490 } else {
491 llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
492 "name, got "
493 << Key << "\n";
494 return false;
496 ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
498 } else {
499 llvm::errs() << "unimplemented measurement matching for mode " << Mode
500 << "\n";
501 return false;
503 return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
506 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
507 llvm::raw_ostream &OS) const {
508 OS << "<table class=\"sched-class-desc\">";
509 OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
510 "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
511 "resource (port) pressure assuming ideal distribution\">Idealized "
512 "Resource Pressure</th></tr>";
513 if (RSC.SCDesc->isValid()) {
514 const auto &SM = SubtargetInfo_->getSchedModel();
515 OS << "<tr><td>&#10004;</td>";
516 OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
517 OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
518 // Latencies.
519 OS << "<td><ul>";
520 for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
521 const auto *const Entry =
522 SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
523 OS << "<li>" << Entry->Cycles;
524 if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
525 // Dismabiguate if more than 1 latency.
526 OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
528 OS << "</li>";
530 OS << "</ul></td>";
531 // WriteProcRes.
532 OS << "<td><ul>";
533 for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
534 OS << "<li><span class=\"mono\">";
535 writeEscaped<kEscapeHtml>(OS,
536 SM.getProcResource(WPR.ProcResourceIdx)->Name);
537 OS << "</span>: " << WPR.Cycles << "</li>";
539 OS << "</ul></td>";
540 // Idealized port pressure.
541 OS << "<td><ul>";
542 for (const auto &Pressure : RSC.IdealizedProcResPressure) {
543 OS << "<li><span class=\"mono\">";
544 writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
545 .getProcResource(Pressure.first)
546 ->Name);
547 OS << "</span>: ";
548 writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
549 OS << "</li>";
551 OS << "</ul></td>";
552 OS << "</tr>";
553 } else {
554 OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
556 OS << "</table>";
559 static constexpr const char kHtmlHead[] = R"(
560 <head>
561 <title>llvm-exegesis Analysis Results</title>
562 <style>
563 body {
564 font-family: sans-serif
566 span.sched-class-name {
567 font-weight: bold;
568 font-family: monospace;
570 span.opcode {
571 font-family: monospace;
573 span.config {
574 font-family: monospace;
576 div.inconsistency {
577 margin-top: 50px;
579 table {
580 margin-left: 50px;
581 border-collapse: collapse;
583 table, table tr,td,th {
584 border: 1px solid #444;
586 table ul {
587 padding-left: 0px;
588 margin: 0px;
589 list-style-type: none;
591 table.sched-class-clusters td {
592 padding-left: 10px;
593 padding-right: 10px;
594 padding-top: 10px;
595 padding-bottom: 10px;
597 table.sched-class-desc td {
598 padding-left: 10px;
599 padding-right: 10px;
600 padding-top: 2px;
601 padding-bottom: 2px;
603 span.mono {
604 font-family: monospace;
606 td.measurement {
607 text-align: center;
609 tr.good-cluster td.measurement {
610 color: #292
612 tr.bad-cluster td.measurement {
613 color: #922
615 tr.good-cluster td.measurement span.minmax {
616 color: #888;
618 tr.bad-cluster td.measurement span.minmax {
619 color: #888;
621 </style>
622 </head>
625 template <>
626 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
627 llvm::raw_ostream &OS) const {
628 const auto &FirstPoint = Clustering_.getPoints()[0];
629 // Print the header.
630 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
631 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
632 OS << "<h3>Triple: <span class=\"mono\">";
633 writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
634 OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
635 writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
636 OS << "</span></h3>";
638 for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
639 if (!RSCAndPoints.RSC.SCDesc)
640 continue;
641 // Bucket sched class points into sched class clusters.
642 std::vector<SchedClassCluster> SchedClassClusters;
643 for (const size_t PointId : RSCAndPoints.PointIds) {
644 const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
645 if (!ClusterId.isValid())
646 continue; // Ignore noise and errors. FIXME: take noise into account ?
647 auto SchedClassClusterIt =
648 std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
649 [ClusterId](const SchedClassCluster &C) {
650 return C.id() == ClusterId;
652 if (SchedClassClusterIt == SchedClassClusters.end()) {
653 SchedClassClusters.emplace_back();
654 SchedClassClusterIt = std::prev(SchedClassClusters.end());
656 SchedClassClusterIt->addPoint(PointId, Clustering_);
659 // Print any scheduling class that has at least one cluster that does not
660 // match the checked-in data.
661 if (llvm::all_of(SchedClassClusters,
662 [this, &RSCAndPoints](const SchedClassCluster &C) {
663 return C.measurementsMatch(
664 *SubtargetInfo_, RSCAndPoints.RSC, Clustering_);
666 continue; // Nothing weird.
668 OS << "<div class=\"inconsistency\"><p>Sched Class <span "
669 "class=\"sched-class-name\">";
670 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
671 writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
672 #else
673 OS << RSCAndPoints.RSC.SchedClassId;
674 #endif
675 OS << "</span> contains instructions whose performance characteristics do"
676 " not match that of LLVM:</p>";
677 printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
678 OS << "<p>llvm SchedModel data:</p>";
679 printSchedClassDescHtml(RSCAndPoints.RSC, OS);
680 OS << "</div>";
683 OS << "</body></html>";
684 return llvm::Error::success();
687 // Distributes a pressure budget as evenly as possible on the provided subunits
688 // given the already existing port pressure distribution.
690 // The algorithm is as follows: while there is remaining pressure to
691 // distribute, find the subunits with minimal pressure, and distribute
692 // remaining pressure equally up to the pressure of the unit with
693 // second-to-minimal pressure.
694 // For example, let's assume we want to distribute 2*P1256
695 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
696 // DensePressure = P0 P1 P2 P3 P4 P5 P6 P7
697 // 0.1 0.3 0.2 0.0 0.0 0.5 0.5 0.5
698 // RemainingPressure = 2.0
699 // We sort the subunits by pressure:
700 // Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
701 // We'll first start by the subunits with minimal pressure, which are at
702 // the beginning of the sorted array. In this example there is one (P2).
703 // The subunit with second-to-minimal pressure is the next one in the
704 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
705 // from the budget.
706 // Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
707 // RemainingPressure = 1.9
708 // We repeat this process: distribute 0.2 pressure on each of the minimal
709 // P2 and P1, decrease budget by 2*0.2:
710 // Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
711 // RemainingPressure = 1.5
712 // There are no second-to-minimal subunits so we just share the remaining
713 // budget (1.5 cycles) equally:
714 // Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
715 // RemainingPressure = 0.0
716 // We stop as there is no remaining budget to distribute.
717 void distributePressure(float RemainingPressure,
718 llvm::SmallVector<uint16_t, 32> Subunits,
719 llvm::SmallVector<float, 32> &DensePressure) {
720 // Find the number of subunits with minimal pressure (they are at the
721 // front).
722 llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
723 return DensePressure[A] < DensePressure[B];
725 const auto getPressureForSubunit = [&DensePressure,
726 &Subunits](size_t I) -> float & {
727 return DensePressure[Subunits[I]];
729 size_t NumMinimalSU = 1;
730 while (NumMinimalSU < Subunits.size() &&
731 getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
732 ++NumMinimalSU;
734 while (RemainingPressure > 0.0f) {
735 if (NumMinimalSU == Subunits.size()) {
736 // All units are minimal, just distribute evenly and be done.
737 for (size_t I = 0; I < NumMinimalSU; ++I) {
738 getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
740 return;
742 // Distribute the remaining pressure equally.
743 const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
744 const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
745 assert(MinimalPressure < SecondToMinimalPressure);
746 const float Increment = SecondToMinimalPressure - MinimalPressure;
747 if (RemainingPressure <= NumMinimalSU * Increment) {
748 // There is not enough remaining pressure.
749 for (size_t I = 0; I < NumMinimalSU; ++I) {
750 getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
752 return;
754 // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
755 for (size_t I = 0; I < NumMinimalSU; ++I) {
756 getPressureForSubunit(I) = SecondToMinimalPressure;
757 RemainingPressure -= SecondToMinimalPressure;
759 while (NumMinimalSU < Subunits.size() &&
760 getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
761 ++NumMinimalSU;
766 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
767 const llvm::MCSchedModel &SM,
768 llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
769 // DensePressure[I] is the port pressure for Proc Resource I.
770 llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
771 llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
772 const llvm::MCWriteProcResEntry &B) {
773 return A.ProcResourceIdx < B.ProcResourceIdx;
775 for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
776 // Get units for the entry.
777 const llvm::MCProcResourceDesc *const ProcResDesc =
778 SM.getProcResource(WPR.ProcResourceIdx);
779 if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
780 // This is a ProcResUnit.
781 DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
782 } else {
783 // This is a ProcResGroup.
784 llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
785 ProcResDesc->SubUnitsIdxBegin +
786 ProcResDesc->NumUnits);
787 distributePressure(WPR.Cycles, Subunits, DensePressure);
790 // Turn dense pressure into sparse pressure by removing zero entries.
791 std::vector<std::pair<uint16_t, float>> Pressure;
792 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
793 if (DensePressure[I] > 0.0f)
794 Pressure.emplace_back(I, DensePressure[I]);
796 return Pressure;
799 } // namespace exegesis
800 } // namespace llvm