Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / tools / llvm-exegesis / lib / Analysis.cpp
blobbd088b907aaf7d13c34419f4f7e10868e9de448b
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "Analysis.h"
10 #include "BenchmarkResult.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/MC/MCAsmInfo.h"
13 #include "llvm/MC/MCTargetOptions.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <limits>
16 #include <unordered_set>
17 #include <vector>
19 namespace llvm {
20 namespace exegesis {
22 static const char kCsvSep = ',';
24 namespace {
26 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
28 template <EscapeTag Tag> void writeEscaped(raw_ostream &OS, const StringRef S);
30 template <> void writeEscaped<kEscapeCsv>(raw_ostream &OS, const StringRef S) {
31 if (!S.contains(kCsvSep)) {
32 OS << S;
33 } else {
34 // Needs escaping.
35 OS << '"';
36 for (const char C : S) {
37 if (C == '"')
38 OS << "\"\"";
39 else
40 OS << C;
42 OS << '"';
46 template <> void writeEscaped<kEscapeHtml>(raw_ostream &OS, const StringRef S) {
47 for (const char C : S) {
48 if (C == '<')
49 OS << "&lt;";
50 else if (C == '>')
51 OS << "&gt;";
52 else if (C == '&')
53 OS << "&amp;";
54 else
55 OS << C;
59 template <>
60 void writeEscaped<kEscapeHtmlString>(raw_ostream &OS, const StringRef S) {
61 for (const char C : S) {
62 if (C == '"')
63 OS << "\\\"";
64 else
65 OS << C;
69 } // namespace
71 template <EscapeTag Tag>
72 static void
73 writeClusterId(raw_ostream &OS,
74 const BenchmarkClustering::ClusterId &CID) {
75 if (CID.isNoise())
76 writeEscaped<Tag>(OS, "[noise]");
77 else if (CID.isError())
78 writeEscaped<Tag>(OS, "[error]");
79 else
80 OS << CID.getId();
83 template <EscapeTag Tag>
84 static void writeMeasurementValue(raw_ostream &OS, const double Value) {
85 // Given Value, if we wanted to serialize it to a string,
86 // how many base-10 digits will we need to store, max?
87 static constexpr auto MaxDigitCount =
88 std::numeric_limits<decltype(Value)>::max_digits10;
89 // Also, we will need a decimal separator.
90 static constexpr auto DecimalSeparatorLen = 1; // '.' e.g.
91 // So how long of a string will the serialization produce, max?
92 static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen;
94 // WARNING: when changing the format, also adjust the small-size estimate ^.
95 static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}");
97 writeEscaped<Tag>(
98 OS, formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>());
101 template <typename EscapeTag, EscapeTag Tag>
102 void Analysis::writeSnippet(raw_ostream &OS, ArrayRef<uint8_t> Bytes,
103 const char *Separator) const {
104 SmallVector<std::string, 3> Lines;
105 // Parse the asm snippet and print it.
106 while (!Bytes.empty()) {
107 MCInst MI;
108 uint64_t MISize = 0;
109 if (!DisasmHelper_->decodeInst(MI, MISize, Bytes)) {
110 writeEscaped<Tag>(OS, join(Lines, Separator));
111 writeEscaped<Tag>(OS, Separator);
112 writeEscaped<Tag>(OS, "[error decoding asm snippet]");
113 return;
115 SmallString<128> InstPrinterStr; // FIXME: magic number.
116 raw_svector_ostream OSS(InstPrinterStr);
117 DisasmHelper_->printInst(&MI, OSS);
118 Bytes = Bytes.drop_front(MISize);
119 Lines.emplace_back(InstPrinterStr.str().trim());
121 writeEscaped<Tag>(OS, join(Lines, Separator));
124 // Prints a row representing an instruction, along with scheduling info and
125 // point coordinates (measurements).
126 void Analysis::printInstructionRowCsv(const size_t PointId,
127 raw_ostream &OS) const {
128 const Benchmark &Point = Clustering_.getPoints()[PointId];
129 writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
130 OS << kCsvSep;
131 writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
132 OS << kCsvSep;
133 writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
134 OS << kCsvSep;
135 assert(!Point.Key.Instructions.empty());
136 const MCInst &MCI = Point.keyInstruction();
137 unsigned SchedClassId;
138 std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId(
139 State_.getSubtargetInfo(), State_.getInstrInfo(), MCI);
140 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
141 const MCSchedClassDesc *const SCDesc =
142 State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId);
143 writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
144 #else
145 OS << SchedClassId;
146 #endif
147 for (const auto &Measurement : Point.Measurements) {
148 OS << kCsvSep;
149 writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
151 OS << "\n";
154 Analysis::Analysis(const LLVMState &State,
155 const BenchmarkClustering &Clustering,
156 double AnalysisInconsistencyEpsilon,
157 bool AnalysisDisplayUnstableOpcodes)
158 : Clustering_(Clustering), State_(State),
159 AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
160 AnalysisInconsistencyEpsilon),
161 AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
162 if (Clustering.getPoints().empty())
163 return;
165 DisasmHelper_ = std::make_unique<DisassemblerHelper>(State);
168 template <>
169 Error Analysis::run<Analysis::PrintClusters>(raw_ostream &OS) const {
170 if (Clustering_.getPoints().empty())
171 return Error::success();
173 // Write the header.
174 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
175 << kCsvSep << "sched_class";
176 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
177 OS << kCsvSep;
178 writeEscaped<kEscapeCsv>(OS, Measurement.Key);
180 OS << "\n";
182 // Write the points.
183 for (const auto &ClusterIt : Clustering_.getValidClusters()) {
184 for (const size_t PointId : ClusterIt.PointIndices) {
185 printInstructionRowCsv(PointId, OS);
187 OS << "\n\n";
189 return Error::success();
192 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
193 ResolvedSchedClass &&RSC)
194 : RSC(std::move(RSC)) {}
196 std::vector<Analysis::ResolvedSchedClassAndPoints>
197 Analysis::makePointsPerSchedClass() const {
198 std::vector<ResolvedSchedClassAndPoints> Entries;
199 // Maps SchedClassIds to index in result.
200 std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
201 const auto &Points = Clustering_.getPoints();
202 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
203 const Benchmark &Point = Points[PointId];
204 if (!Point.Error.empty())
205 continue;
206 assert(!Point.Key.Instructions.empty());
207 // FIXME: we should be using the tuple of classes for instructions in the
208 // snippet as key.
209 const MCInst &MCI = Point.keyInstruction();
210 unsigned SchedClassId;
211 bool WasVariant;
212 std::tie(SchedClassId, WasVariant) =
213 ResolvedSchedClass::resolveSchedClassId(State_.getSubtargetInfo(),
214 State_.getInstrInfo(), MCI);
215 const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
216 if (IndexIt == SchedClassIdToIndex.end()) {
217 // Create a new entry.
218 SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
219 ResolvedSchedClassAndPoints Entry(ResolvedSchedClass(
220 State_.getSubtargetInfo(), SchedClassId, WasVariant));
221 Entry.PointIds.push_back(PointId);
222 Entries.push_back(std::move(Entry));
223 } else {
224 // Append to the existing entry.
225 Entries[IndexIt->second].PointIds.push_back(PointId);
228 return Entries;
231 // Parallel benchmarks repeat the same opcode multiple times. Just show this
232 // opcode and show the whole snippet only on hover.
233 static void writeParallelSnippetHtml(raw_ostream &OS,
234 const std::vector<MCInst> &Instructions,
235 const MCInstrInfo &InstrInfo) {
236 if (Instructions.empty())
237 return;
238 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
239 if (Instructions.size() > 1)
240 OS << " (x" << Instructions.size() << ")";
243 // Latency tries to find a serial path. Just show the opcode path and show the
244 // whole snippet only on hover.
245 static void writeLatencySnippetHtml(raw_ostream &OS,
246 const std::vector<MCInst> &Instructions,
247 const MCInstrInfo &InstrInfo) {
248 bool First = true;
249 for (const MCInst &Instr : Instructions) {
250 if (First)
251 First = false;
252 else
253 OS << " &rarr; ";
254 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
258 void Analysis::printPointHtml(const Benchmark &Point,
259 llvm::raw_ostream &OS) const {
260 OS << "<li><span class=\"mono\" title=\"";
261 writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet, "\n");
262 OS << "\">";
263 switch (Point.Mode) {
264 case Benchmark::Latency:
265 writeLatencySnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo());
266 break;
267 case Benchmark::Uops:
268 case Benchmark::InverseThroughput:
269 writeParallelSnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo());
270 break;
271 default:
272 llvm_unreachable("invalid mode");
274 OS << "</span> <span class=\"mono\">";
275 writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
276 OS << "</span></li>";
279 void Analysis::printSchedClassClustersHtml(
280 const std::vector<SchedClassCluster> &Clusters,
281 const ResolvedSchedClass &RSC, raw_ostream &OS) const {
282 const auto &Points = Clustering_.getPoints();
283 OS << "<table class=\"sched-class-clusters\">";
284 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
285 assert(!Clusters.empty());
286 for (const auto &Measurement :
287 Points[Clusters[0].getPointIds()[0]].Measurements) {
288 OS << "<th>";
289 writeEscaped<kEscapeHtml>(OS, Measurement.Key);
290 OS << "</th>";
292 OS << "</tr>";
293 for (const SchedClassCluster &Cluster : Clusters) {
294 OS << "<tr class=\""
295 << (Cluster.measurementsMatch(State_.getSubtargetInfo(), RSC,
296 Clustering_,
297 AnalysisInconsistencyEpsilonSquared_)
298 ? "good-cluster"
299 : "bad-cluster")
300 << "\"><td>";
301 writeClusterId<kEscapeHtml>(OS, Cluster.id());
302 OS << "</td><td><ul>";
303 for (const size_t PointId : Cluster.getPointIds()) {
304 printPointHtml(Points[PointId], OS);
306 OS << "</ul></td>";
307 for (const auto &Stats : Cluster.getCentroid().getStats()) {
308 OS << "<td class=\"measurement\">";
309 writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
310 OS << "<br><span class=\"minmax\">[";
311 writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
312 OS << ";";
313 writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
314 OS << "]</span></td>";
316 OS << "</tr>";
318 OS << "</table>";
321 void Analysis::SchedClassCluster::addPoint(
322 size_t PointId, const BenchmarkClustering &Clustering) {
323 PointIds.push_back(PointId);
324 const auto &Point = Clustering.getPoints()[PointId];
325 if (ClusterId.isUndef())
326 ClusterId = Clustering.getClusterIdForPoint(PointId);
327 assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
329 Centroid.addPoint(Point.Measurements);
332 bool Analysis::SchedClassCluster::measurementsMatch(
333 const MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
334 const BenchmarkClustering &Clustering,
335 const double AnalysisInconsistencyEpsilonSquared_) const {
336 assert(!Clustering.getPoints().empty());
337 const Benchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
339 if (!Centroid.validate(Mode))
340 return false;
342 const std::vector<BenchmarkMeasure> ClusterCenterPoint =
343 Centroid.getAsPoint();
345 const std::vector<BenchmarkMeasure> SchedClassPoint =
346 RSC.getAsPoint(Mode, STI, Centroid.getStats());
347 if (SchedClassPoint.empty())
348 return false; // In Uops mode validate() may not be enough.
350 assert(ClusterCenterPoint.size() == SchedClassPoint.size() &&
351 "Expected measured/sched data dimensions to match.");
353 return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint,
354 AnalysisInconsistencyEpsilonSquared_);
357 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
358 raw_ostream &OS) const {
359 OS << "<table class=\"sched-class-desc\">";
360 OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
361 "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the "
362 "idealized unit resource (port) pressure assuming ideal "
363 "distribution\">Idealized Resource Pressure</th></tr>";
364 if (RSC.SCDesc->isValid()) {
365 const auto &SI = State_.getSubtargetInfo();
366 const auto &SM = SI.getSchedModel();
367 OS << "<tr><td>&#10004;</td>";
368 OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
369 OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
370 // Latencies.
371 OS << "<td><ul>";
372 for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
373 const auto *const Entry = SI.getWriteLatencyEntry(RSC.SCDesc, I);
374 OS << "<li>" << Entry->Cycles;
375 if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
376 // Dismabiguate if more than 1 latency.
377 OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
379 OS << "</li>";
381 OS << "</ul></td>";
382 // inverse throughput.
383 OS << "<td>";
384 writeMeasurementValue<kEscapeHtml>(
385 OS, MCSchedModel::getReciprocalThroughput(SI, *RSC.SCDesc));
386 OS << "</td>";
387 // WriteProcRes.
388 OS << "<td><ul>";
389 for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
390 OS << "<li><span class=\"mono\">";
391 writeEscaped<kEscapeHtml>(OS,
392 SM.getProcResource(WPR.ProcResourceIdx)->Name);
393 OS << "</span>: " << WPR.ReleaseAtCycle << "</li>";
395 OS << "</ul></td>";
396 // Idealized port pressure.
397 OS << "<td><ul>";
398 for (const auto &Pressure : RSC.IdealizedProcResPressure) {
399 OS << "<li><span class=\"mono\">";
400 writeEscaped<kEscapeHtml>(
401 OS, SI.getSchedModel().getProcResource(Pressure.first)->Name);
402 OS << "</span>: ";
403 writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
404 OS << "</li>";
406 OS << "</ul></td>";
407 OS << "</tr>";
408 } else {
409 OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
411 OS << "</table>";
414 void Analysis::printClusterRawHtml(
415 const BenchmarkClustering::ClusterId &Id, StringRef display_name,
416 llvm::raw_ostream &OS) const {
417 const auto &Points = Clustering_.getPoints();
418 const auto &Cluster = Clustering_.getCluster(Id);
419 if (Cluster.PointIndices.empty())
420 return;
422 OS << "<div class=\"inconsistency\"><p>" << display_name << " Cluster ("
423 << Cluster.PointIndices.size() << " points)</p>";
424 OS << "<table class=\"sched-class-clusters\">";
425 // Table Header.
426 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
427 for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) {
428 OS << "<th>";
429 writeEscaped<kEscapeHtml>(OS, Measurement.Key);
430 OS << "</th>";
432 OS << "</tr>";
434 // Point data.
435 for (const auto &PointId : Cluster.PointIndices) {
436 OS << "<tr class=\"bad-cluster\"><td>" << display_name << "</td><td><ul>";
437 printPointHtml(Points[PointId], OS);
438 OS << "</ul></td>";
439 for (const auto &Measurement : Points[PointId].Measurements) {
440 OS << "<td class=\"measurement\">";
441 writeMeasurementValue<kEscapeHtml>(OS, Measurement.PerInstructionValue);
443 OS << "</tr>";
445 OS << "</table>";
447 OS << "</div>";
449 } // namespace exegesis
451 static constexpr const char kHtmlHead[] = R"(
452 <head>
453 <title>llvm-exegesis Analysis Results</title>
454 <style>
455 body {
456 font-family: sans-serif
458 span.sched-class-name {
459 font-weight: bold;
460 font-family: monospace;
462 span.opcode {
463 font-family: monospace;
465 span.config {
466 font-family: monospace;
468 div.inconsistency {
469 margin-top: 50px;
471 table {
472 margin-left: 50px;
473 border-collapse: collapse;
475 table, table tr,td,th {
476 border: 1px solid #444;
478 table ul {
479 padding-left: 0px;
480 margin: 0px;
481 list-style-type: none;
483 table.sched-class-clusters td {
484 padding-left: 10px;
485 padding-right: 10px;
486 padding-top: 10px;
487 padding-bottom: 10px;
489 table.sched-class-desc td {
490 padding-left: 10px;
491 padding-right: 10px;
492 padding-top: 2px;
493 padding-bottom: 2px;
495 span.mono {
496 font-family: monospace;
498 td.measurement {
499 text-align: center;
501 tr.good-cluster td.measurement {
502 color: #292
504 tr.bad-cluster td.measurement {
505 color: #922
507 tr.good-cluster td.measurement span.minmax {
508 color: #888;
510 tr.bad-cluster td.measurement span.minmax {
511 color: #888;
513 </style>
514 </head>
517 template <>
518 Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
519 raw_ostream &OS) const {
520 const auto &FirstPoint = Clustering_.getPoints()[0];
521 // Print the header.
522 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
523 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
524 OS << "<h3>Triple: <span class=\"mono\">";
525 writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
526 OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
527 writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
528 OS << "</span></h3>";
530 const auto &SI = State_.getSubtargetInfo();
531 for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
532 if (!RSCAndPoints.RSC.SCDesc)
533 continue;
534 // Bucket sched class points into sched class clusters.
535 std::vector<SchedClassCluster> SchedClassClusters;
536 for (const size_t PointId : RSCAndPoints.PointIds) {
537 const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
538 if (!ClusterId.isValid())
539 continue; // Ignore noise and errors. FIXME: take noise into account ?
540 if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_)
541 continue; // Either display stable or unstable clusters only.
542 auto SchedClassClusterIt = llvm::find_if(
543 SchedClassClusters, [ClusterId](const SchedClassCluster &C) {
544 return C.id() == ClusterId;
546 if (SchedClassClusterIt == SchedClassClusters.end()) {
547 SchedClassClusters.emplace_back();
548 SchedClassClusterIt = std::prev(SchedClassClusters.end());
550 SchedClassClusterIt->addPoint(PointId, Clustering_);
553 // Print any scheduling class that has at least one cluster that does not
554 // match the checked-in data.
555 if (all_of(SchedClassClusters, [this, &RSCAndPoints,
556 &SI](const SchedClassCluster &C) {
557 return C.measurementsMatch(SI, RSCAndPoints.RSC, Clustering_,
558 AnalysisInconsistencyEpsilonSquared_);
560 continue; // Nothing weird.
562 OS << "<div class=\"inconsistency\"><p>Sched Class <span "
563 "class=\"sched-class-name\">";
564 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
565 writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
566 #else
567 OS << RSCAndPoints.RSC.SchedClassId;
568 #endif
569 OS << "</span> contains instructions whose performance characteristics do"
570 " not match that of LLVM:</p>";
571 printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
572 OS << "<p>llvm SchedModel data:</p>";
573 printSchedClassDescHtml(RSCAndPoints.RSC, OS);
574 OS << "</div>";
577 printClusterRawHtml(BenchmarkClustering::ClusterId::noise(),
578 "[noise]", OS);
580 OS << "</body></html>";
581 return Error::success();
584 } // namespace exegesis
585 } // namespace llvm