tools/llvm-exegesis/lib/Analysis.cpp

   1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "Analysis.h"
  11 #include "BenchmarkResult.h"
  12 #include "llvm/ADT/STLExtras.h"
  13 #include "llvm/MC/MCAsmInfo.h"
  14 #include "llvm/Support/FormatVariadic.h"
  15 #include <unordered_set>
  16 #include <vector>
  17
  18 namespace exegesis {
  19
  20 static const char kCsvSep = ',';
  21
  22 namespace {
  23
  24 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
  25
  26 template <EscapeTag Tag>
  27 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
  28
  29 template <>
  30 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
  31   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
  32     OS << S;
  33   } else {
  34     // Needs escaping.
  35     OS << '"';
  36     for (const char C : S) {
  37       if (C == '"')
  38         OS << "\"\"";
  39       else
  40         OS << C;
  41     }
  42     OS << '"';
  43   }
  44 }
  45
  46 template <>
  47 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
  48   for (const char C : S) {
  49     if (C == '<')
  50       OS << "&lt;";
  51     else if (C == '>')
  52       OS << "&gt;";
  53     else if (C == '&')
  54       OS << "&amp;";
  55     else
  56       OS << C;
  57   }
  58 }
  59
  60 template <>
  61 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
  62                                      const llvm::StringRef S) {
  63   for (const char C : S) {
  64     if (C == '"')
  65       OS << "\\\"";
  66     else
  67       OS << C;
  68   }
  69 }
  70
  71 } // namespace
  72
  73 template <EscapeTag Tag>
  74 static void
  75 writeClusterId(llvm::raw_ostream &OS,
  76                const InstructionBenchmarkClustering::ClusterId &CID) {
  77   if (CID.isNoise())
  78     writeEscaped<Tag>(OS, "[noise]");
  79   else if (CID.isError())
  80     writeEscaped<Tag>(OS, "[error]");
  81   else
  82     OS << CID.getId();
  83 }
  84
  85 template <EscapeTag Tag>
  86 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
  87   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
  88 }
  89
  90 template <typename EscapeTag, EscapeTag Tag>
  91 void Analysis::writeSnippet(llvm::raw_ostream &OS,
  92                             llvm::ArrayRef<uint8_t> Bytes,
  93                             const char *Separator) const {
  94   llvm::SmallVector<std::string, 3> Lines;
  95   // Parse the asm snippet and print it.
  96   while (!Bytes.empty()) {
  97     llvm::MCInst MI;
  98     uint64_t MISize = 0;
  99     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
 100                                  llvm::nulls())) {
 101       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
 102       writeEscaped<Tag>(OS, Separator);
 103       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
 104       return;
 105     }
 106     Lines.emplace_back();
 107     std::string &Line = Lines.back();
 108     llvm::raw_string_ostream OSS(Line);
 109     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
 110     Bytes = Bytes.drop_front(MISize);
 111     OSS.flush();
 112     Line = llvm::StringRef(Line).trim().str();
 113   }
 114   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
 115 }
 116
 117 // Prints a row representing an instruction, along with scheduling info and
 118 // point coordinates (measurements).
 119 void Analysis::printInstructionRowCsv(const size_t PointId,
 120                                       llvm::raw_ostream &OS) const {
 121   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
 122   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
 123   OS << kCsvSep;
 124   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
 125   OS << kCsvSep;
 126   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
 127   OS << kCsvSep;
 128   assert(!Point.Key.Instructions.empty());
 129   // FIXME: Resolve variant classes.
 130   const unsigned SchedClassId =
 131       InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
 132 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 133   const auto &SchedModel = SubtargetInfo_->getSchedModel();
 134   const llvm::MCSchedClassDesc *const SCDesc =
 135       SchedModel.getSchedClassDesc(SchedClassId);
 136   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
 137 #else
 138   OS << SchedClassId;
 139 #endif
 140   for (const auto &Measurement : Point.Measurements) {
 141     OS << kCsvSep;
 142     writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value);
 143   }
 144   OS << "\n";
 145 }
 146
 147 Analysis::Analysis(const llvm::Target &Target,
 148                    const InstructionBenchmarkClustering &Clustering)
 149     : Clustering_(Clustering) {
 150   if (Clustering.getPoints().empty())
 151     return;
 152
 153   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
 154   InstrInfo_.reset(Target.createMCInstrInfo());
 155   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
 156   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
 157   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
 158                                                     FirstPoint.CpuName, ""));
 159   InstPrinter_.reset(Target.createMCInstPrinter(
 160       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
 161       *InstrInfo_, *RegInfo_));
 162
 163   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
 164                                                 &ObjectFileInfo_);
 165   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
 166   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
 167                     "InitializeXXXTargetDisassembler ?");
 168 }
 169
 170 template <>
 171 llvm::Error
 172 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
 173   if (Clustering_.getPoints().empty())
 174     return llvm::Error::success();
 175
 176   // Write the header.
 177   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
 178      << kCsvSep << "sched_class";
 179   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
 180     OS << kCsvSep;
 181     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
 182   }
 183   OS << "\n";
 184
 185   // Write the points.
 186   const auto &Clusters = Clustering_.getValidClusters();
 187   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
 188     for (const size_t PointId : Clusters[I].PointIndices) {
 189       printInstructionRowCsv(PointId, OS);
 190     }
 191     OS << "\n\n";
 192   }
 193   return llvm::Error::success();
 194 }
 195
 196 std::unordered_map<unsigned, std::vector<size_t>>
 197 Analysis::makePointsPerSchedClass() const {
 198   std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
 199   const auto &Points = Clustering_.getPoints();
 200   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
 201     const InstructionBenchmark &Point = Points[PointId];
 202     if (!Point.Error.empty())
 203       continue;
 204     assert(!Point.Key.Instructions.empty());
 205     const auto Opcode = Point.Key.Instructions[0].getOpcode();
 206     // FIXME: Resolve variant classes.
 207     PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
 208         PointId);
 209   }
 210   return PointsPerSchedClass;
 211 }
 212
 213 // Uops repeat the same opcode over again. Just show this opcode and show the
 214 // whole snippet only on hover.
 215 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
 216                                  const std::vector<llvm::MCInst> &Instructions,
 217                                  const llvm::MCInstrInfo &InstrInfo) {
 218   if (Instructions.empty())
 219     return;
 220   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
 221   if (Instructions.size() > 1)
 222     OS << " (x" << Instructions.size() << ")";
 223 }
 224
 225 // Latency tries to find a serial path. Just show the opcode path and show the
 226 // whole snippet only on hover.
 227 static void
 228 writeLatencySnippetHtml(llvm::raw_ostream &OS,
 229                         const std::vector<llvm::MCInst> &Instructions,
 230                         const llvm::MCInstrInfo &InstrInfo) {
 231   bool First = true;
 232   for (const llvm::MCInst &Instr : Instructions) {
 233     if (First)
 234       First = false;
 235     else
 236       OS << " &rarr; ";
 237     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
 238   }
 239 }
 240
 241 void Analysis::printSchedClassClustersHtml(
 242     const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
 243     llvm::raw_ostream &OS) const {
 244   const auto &Points = Clustering_.getPoints();
 245   OS << "<table class=\"sched-class-clusters\">";
 246   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
 247   assert(!Clusters.empty());
 248   for (const auto &Measurement :
 249        Points[Clusters[0].getPointIds()[0]].Measurements) {
 250     OS << "<th>";
 251     if (Measurement.DebugString.empty())
 252       writeEscaped<kEscapeHtml>(OS, Measurement.Key);
 253     else
 254       writeEscaped<kEscapeHtml>(OS, Measurement.DebugString);
 255     OS << "</th>";
 256   }
 257   OS << "</tr>";
 258   for (const SchedClassCluster &Cluster : Clusters) {
 259     OS << "<tr class=\""
 260        << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
 261                ? "good-cluster"
 262                : "bad-cluster")
 263        << "\"><td>";
 264     writeClusterId<kEscapeHtml>(OS, Cluster.id());
 265     OS << "</td><td><ul>";
 266     for (const size_t PointId : Cluster.getPointIds()) {
 267       const auto &Point = Points[PointId];
 268       OS << "<li><span class=\"mono\" title=\"";
 269       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
 270                                                  "\n");
 271       OS << "\">";
 272       switch (Point.Mode) {
 273       case InstructionBenchmark::Latency:
 274         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
 275         break;
 276       case InstructionBenchmark::Uops:
 277         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
 278         break;
 279       default:
 280         llvm_unreachable("invalid mode");
 281       }
 282       OS << "</span> <span class=\"mono\">";
 283       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
 284       OS << "</span></li>";
 285     }
 286     OS << "</ul></td>";
 287     for (const auto &Stats : Cluster.getRepresentative()) {
 288       OS << "<td class=\"measurement\">";
 289       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
 290       OS << "<br><span class=\"minmax\">[";
 291       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
 292       OS << ";";
 293       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
 294       OS << "]</span></td>";
 295     }
 296     OS << "</tr>";
 297   }
 298   OS << "</table>";
 299 }
 300
 301 // Return the non-redundant list of WriteProcRes used by the given sched class.
 302 // The scheduling model for LLVM is such that each instruction has a certain
 303 // number of uops which consume resources which are described by WriteProcRes
 304 // entries. Each entry describe how many cycles are spent on a specific ProcRes
 305 // kind.
 306 // For example, an instruction might have 3 uOps, one dispatching on P0
 307 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
 308 // Note that LLVM additionally denormalizes resource consumption to include
 309 // usage of super resources by subresources. So in practice if there exists a
 310 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
 311 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
 312 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
 313 // denote implied usage of superresources by subresources:
 314 //            P0      P06    P016
 315 //     uOp1    1      (1)     (1)
 316 //     uOp2            1      (1)
 317 //     uOp3            1      (1)
 318 //     =============================
 319 //             1       3       3
 320 // Eventually we end up with three entries for the WriteProcRes of the
 321 // instruction:
 322 //    {ProcResIdx=1,  Cycles=1}  // P0
 323 //    {ProcResIdx=7,  Cycles=3}  // P06
 324 //    {ProcResIdx=10, Cycles=3}  // P016
 325 //
 326 // Note that in this case, P016 does not contribute any cycles, so it would
 327 // be removed by this function.
 328 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
 329 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
 330 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
 331                             const llvm::MCSubtargetInfo &STI) {
 332   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
 333   const auto &SM = STI.getSchedModel();
 334   const unsigned NumProcRes = SM.getNumProcResourceKinds();
 335
 336   // This assumes that the ProcResDescs are sorted in topological order, which
 337   // is guaranteed by the tablegen backend.
 338   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
 339   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
 340                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
 341        WPR != WPREnd; ++WPR) {
 342     const llvm::MCProcResourceDesc *const ProcResDesc =
 343         SM.getProcResource(WPR->ProcResourceIdx);
 344     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
 345       // This is a ProcResUnit.
 346       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
 347       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
 348     } else {
 349       // This is a ProcResGroup. First see if it contributes any cycles or if
 350       // it has cycles just from subunits.
 351       float RemainingCycles = WPR->Cycles;
 352       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
 353            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
 354            ++SubResIdx) {
 355         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
 356       }
 357       if (RemainingCycles < 0.01f) {
 358         // The ProcResGroup contributes no cycles of its own.
 359         continue;
 360       }
 361       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
 362       Result.push_back({WPR->ProcResourceIdx,
 363                         static_cast<uint16_t>(std::round(RemainingCycles))});
 364       // Spread the remaining cycles over all subunits.
 365       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
 366            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
 367            ++SubResIdx) {
 368         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
 369       }
 370     }
 371   }
 372   return Result;
 373 }
 374
 375 Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
 376                                  const llvm::MCSubtargetInfo &STI)
 377     : SCDesc(&SD),
 378       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
 379       IdealizedProcResPressure(computeIdealizedProcResPressure(
 380           STI.getSchedModel(), NonRedundantWriteProcRes)) {}
 381
 382 void Analysis::SchedClassCluster::addPoint(
 383     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
 384   PointIds.push_back(PointId);
 385   const auto &Point = Clustering.getPoints()[PointId];
 386   if (ClusterId.isUndef()) {
 387     ClusterId = Clustering.getClusterIdForPoint(PointId);
 388     Representative.resize(Point.Measurements.size());
 389   }
 390   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
 391     Representative[I].push(Point.Measurements[I]);
 392   }
 393   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
 394 }
 395
 396 bool Analysis::SchedClassCluster::measurementsMatch(
 397     const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
 398     const InstructionBenchmarkClustering &Clustering) const {
 399   const size_t NumMeasurements = Representative.size();
 400   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
 401   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
 402   // Latency case.
 403   assert(!Clustering.getPoints().empty());
 404   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
 405   if (Mode == InstructionBenchmark::Latency) {
 406     if (NumMeasurements != 1) {
 407       llvm::errs()
 408           << "invalid number of measurements in latency mode: expected 1, got "
 409           << NumMeasurements << "\n";
 410       return false;
 411     }
 412     // Find the latency.
 413     SchedClassPoint[0].Value = 0.0;
 414     for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
 415       const llvm::MCWriteLatencyEntry *const WLE =
 416           STI.getWriteLatencyEntry(SC.SCDesc, I);
 417       SchedClassPoint[0].Value =
 418           std::max<double>(SchedClassPoint[0].Value, WLE->Cycles);
 419     }
 420     ClusterCenterPoint[0].Value = Representative[0].avg();
 421   } else if (Mode == InstructionBenchmark::Uops) {
 422     for (int I = 0, E = Representative.size(); I < E; ++I) {
 423       // Find the pressure on ProcResIdx `Key`.
 424       uint16_t ProcResIdx = 0;
 425       if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) {
 426         llvm::errs() << "expected ProcResIdx key, got "
 427                      << Representative[I].key() << "\n";
 428         return false;
 429       }
 430       const auto ProcResPressureIt =
 431           std::find_if(SC.IdealizedProcResPressure.begin(),
 432                        SC.IdealizedProcResPressure.end(),
 433                        [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
 434                          return WPR.first == ProcResIdx;
 435                        });
 436       SchedClassPoint[I].Value =
 437           ProcResPressureIt == SC.IdealizedProcResPressure.end()
 438               ? 0.0
 439               : ProcResPressureIt->second;
 440       ClusterCenterPoint[I].Value = Representative[I].avg();
 441     }
 442   } else {
 443     llvm::errs() << "unimplemented measurement matching for mode " << Mode
 444                  << "\n";
 445     return false;
 446   }
 447   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
 448 }
 449
 450 void Analysis::printSchedClassDescHtml(const SchedClass &SC,
 451                                        llvm::raw_ostream &OS) const {
 452   OS << "<table class=\"sched-class-desc\">";
 453   OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
 454         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
 455         "resource (port) pressure assuming ideal distribution\">Idealized "
 456         "Resource Pressure</th></tr>";
 457   if (SC.SCDesc->isValid()) {
 458     const auto &SM = SubtargetInfo_->getSchedModel();
 459     OS << "<tr><td>&#10004;</td>";
 460     OS << "<td>" << (SC.SCDesc->isVariant() ? "&#10004;" : "&#10005;")
 461        << "</td>";
 462     OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>";
 463     // Latencies.
 464     OS << "<td><ul>";
 465     for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
 466       const auto *const Entry =
 467           SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I);
 468       OS << "<li>" << Entry->Cycles;
 469       if (SC.SCDesc->NumWriteLatencyEntries > 1) {
 470         // Dismabiguate if more than 1 latency.
 471         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
 472       }
 473       OS << "</li>";
 474     }
 475     OS << "</ul></td>";
 476     // WriteProcRes.
 477     OS << "<td><ul>";
 478     for (const auto &WPR : SC.NonRedundantWriteProcRes) {
 479       OS << "<li><span class=\"mono\">";
 480       writeEscaped<kEscapeHtml>(OS,
 481                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
 482       OS << "</span>: " << WPR.Cycles << "</li>";
 483     }
 484     OS << "</ul></td>";
 485     // Idealized port pressure.
 486     OS << "<td><ul>";
 487     for (const auto &Pressure : SC.IdealizedProcResPressure) {
 488       OS << "<li><span class=\"mono\">";
 489       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
 490                                         .getProcResource(Pressure.first)
 491                                         ->Name);
 492       OS << "</span>: ";
 493       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
 494       OS << "</li>";
 495     }
 496     OS << "</ul></td>";
 497     OS << "</tr>";
 498   } else {
 499     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
 500   }
 501   OS << "</table>";
 502 }
 503
 504 static constexpr const char kHtmlHead[] = R"(
 505 <head>
 506 <title>llvm-exegesis Analysis Results</title>
 507 <style>
 508 body {
 509   font-family: sans-serif
 510 }
 511 span.sched-class-name {
 512   font-weight: bold;
 513   font-family: monospace;
 514 }
 515 span.opcode {
 516   font-family: monospace;
 517 }
 518 span.config {
 519   font-family: monospace;
 520 }
 521 div.inconsistency {
 522   margin-top: 50px;
 523 }
 524 table {
 525   margin-left: 50px;
 526   border-collapse: collapse;
 527 }
 528 table, table tr,td,th {
 529   border: 1px solid #444;
 530 }
 531 table ul {
 532   padding-left: 0px;
 533   margin: 0px;
 534   list-style-type: none;
 535 }
 536 table.sched-class-clusters td {
 537   padding-left: 10px;
 538   padding-right: 10px;
 539   padding-top: 10px;
 540   padding-bottom: 10px;
 541 }
 542 table.sched-class-desc td {
 543   padding-left: 10px;
 544   padding-right: 10px;
 545   padding-top: 2px;
 546   padding-bottom: 2px;
 547 }
 548 span.mono {
 549   font-family: monospace;
 550 }
 551 td.measurement {
 552   text-align: center;
 553 }
 554 tr.good-cluster td.measurement {
 555   color: #292
 556 }
 557 tr.bad-cluster td.measurement {
 558   color: #922
 559 }
 560 tr.good-cluster td.measurement span.minmax {
 561   color: #888;
 562 }
 563 tr.bad-cluster td.measurement span.minmax {
 564   color: #888;
 565 }
 566 </style>
 567 </head>
 568 )";
 569
 570 template <>
 571 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
 572     llvm::raw_ostream &OS) const {
 573   const auto &FirstPoint = Clustering_.getPoints()[0];
 574   // Print the header.
 575   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
 576   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
 577   OS << "<h3>Triple: <span class=\"mono\">";
 578   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
 579   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
 580   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
 581   OS << "</span></h3>";
 582
 583   for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
 584     const auto SchedClassId = SchedClassAndPoints.first;
 585     const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
 586     const auto &SchedModel = SubtargetInfo_->getSchedModel();
 587     const llvm::MCSchedClassDesc *const SCDesc =
 588         SchedModel.getSchedClassDesc(SchedClassId);
 589     if (!SCDesc)
 590       continue;
 591     const SchedClass SC(*SCDesc, *SubtargetInfo_);
 592
 593     // Bucket sched class points into sched class clusters.
 594     std::vector<SchedClassCluster> SchedClassClusters;
 595     for (const size_t PointId : SchedClassPoints) {
 596       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
 597       if (!ClusterId.isValid())
 598         continue; // Ignore noise and errors. FIXME: take noise into account ?
 599       auto SchedClassClusterIt =
 600           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
 601                        [ClusterId](const SchedClassCluster &C) {
 602                          return C.id() == ClusterId;
 603                        });
 604       if (SchedClassClusterIt == SchedClassClusters.end()) {
 605         SchedClassClusters.emplace_back();
 606         SchedClassClusterIt = std::prev(SchedClassClusters.end());
 607       }
 608       SchedClassClusterIt->addPoint(PointId, Clustering_);
 609     }
 610
 611     // Print any scheduling class that has at least one cluster that does not
 612     // match the checked-in data.
 613     if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
 614                     [this, &SC](const SchedClassCluster &C) {
 615                       return C.measurementsMatch(*SubtargetInfo_, SC,
 616                                                  Clustering_);
 617                     }))
 618       continue; // Nothing weird.
 619
 620     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
 621           "class=\"sched-class-name\">";
 622 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 623     writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
 624 #else
 625     OS << SchedClassId;
 626 #endif
 627     OS << "</span> contains instructions whose performance characteristics do"
 628           " not match that of LLVM:</p>";
 629     printSchedClassClustersHtml(SchedClassClusters, SC, OS);
 630     OS << "<p>llvm SchedModel data:</p>";
 631     printSchedClassDescHtml(SC, OS);
 632     OS << "</div>";
 633   }
 634
 635   OS << "</body></html>";
 636   return llvm::Error::success();
 637 }
 638
 639 // Distributes a pressure budget as evenly as possible on the provided subunits
 640 // given the already existing port pressure distribution.
 641 //
 642 // The algorithm is as follows: while there is remaining pressure to
 643 // distribute, find the subunits with minimal pressure, and distribute
 644 // remaining pressure equally up to the pressure of the unit with
 645 // second-to-minimal pressure.
 646 // For example, let's assume we want to distribute 2*P1256
 647 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
 648 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
 649 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
 650 //     RemainingPressure = 2.0
 651 // We sort the subunits by pressure:
 652 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
 653 // We'll first start by the subunits with minimal pressure, which are at
 654 // the beginning of the sorted array. In this example there is one (P2).
 655 // The subunit with second-to-minimal pressure is the next one in the
 656 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
 657 // from the budget.
 658 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
 659 //     RemainingPressure = 1.9
 660 // We repeat this process: distribute 0.2 pressure on each of the minimal
 661 // P2 and P1, decrease budget by 2*0.2:
 662 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
 663 //     RemainingPressure = 1.5
 664 // There are no second-to-minimal subunits so we just share the remaining
 665 // budget (1.5 cycles) equally:
 666 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
 667 //     RemainingPressure = 0.0
 668 // We stop as there is no remaining budget to distribute.
 669 void distributePressure(float RemainingPressure,
 670                         llvm::SmallVector<uint16_t, 32> Subunits,
 671                         llvm::SmallVector<float, 32> &DensePressure) {
 672   // Find the number of subunits with minimal pressure (they are at the
 673   // front).
 674   llvm::sort(Subunits.begin(), Subunits.end(),
 675              [&DensePressure](const uint16_t A, const uint16_t B) {
 676                return DensePressure[A] < DensePressure[B];
 677              });
 678   const auto getPressureForSubunit = [&DensePressure,
 679                                       &Subunits](size_t I) -> float & {
 680     return DensePressure[Subunits[I]];
 681   };
 682   size_t NumMinimalSU = 1;
 683   while (NumMinimalSU < Subunits.size() &&
 684          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
 685     ++NumMinimalSU;
 686   }
 687   while (RemainingPressure > 0.0f) {
 688     if (NumMinimalSU == Subunits.size()) {
 689       // All units are minimal, just distribute evenly and be done.
 690       for (size_t I = 0; I < NumMinimalSU; ++I) {
 691         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
 692       }
 693       return;
 694     }
 695     // Distribute the remaining pressure equally.
 696     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
 697     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
 698     assert(MinimalPressure < SecondToMinimalPressure);
 699     const float Increment = SecondToMinimalPressure - MinimalPressure;
 700     if (RemainingPressure <= NumMinimalSU * Increment) {
 701       // There is not enough remaining pressure.
 702       for (size_t I = 0; I < NumMinimalSU; ++I) {
 703         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
 704       }
 705       return;
 706     }
 707     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
 708     for (size_t I = 0; I < NumMinimalSU; ++I) {
 709       getPressureForSubunit(I) = SecondToMinimalPressure;
 710       RemainingPressure -= SecondToMinimalPressure;
 711     }
 712     while (NumMinimalSU < Subunits.size() &&
 713            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
 714       ++NumMinimalSU;
 715     }
 716   }
 717 }
 718
 719 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
 720     const llvm::MCSchedModel &SM,
 721     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
 722   // DensePressure[I] is the port pressure for Proc Resource I.
 723   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
 724   llvm::sort(WPRS.begin(), WPRS.end(),
 725              [](const llvm::MCWriteProcResEntry &A,
 726                 const llvm::MCWriteProcResEntry &B) {
 727                return A.ProcResourceIdx < B.ProcResourceIdx;
 728              });
 729   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
 730     // Get units for the entry.
 731     const llvm::MCProcResourceDesc *const ProcResDesc =
 732         SM.getProcResource(WPR.ProcResourceIdx);
 733     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
 734       // This is a ProcResUnit.
 735       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
 736     } else {
 737       // This is a ProcResGroup.
 738       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
 739                                                ProcResDesc->SubUnitsIdxBegin +
 740                                                    ProcResDesc->NumUnits);
 741       distributePressure(WPR.Cycles, Subunits, DensePressure);
 742     }
 743   }
 744   // Turn dense pressure into sparse pressure by removing zero entries.
 745   std::vector<std::pair<uint16_t, float>> Pressure;
 746   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
 747     if (DensePressure[I] > 0.0f)
 748       Pressure.emplace_back(I, DensePressure[I]);
 749   }
 750   return Pressure;
 751 }
 752
 753 } // namespace exegesis