tools/llvm-exegesis/lib/Analysis.cpp

   1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "Analysis.h"
  11 #include "BenchmarkResult.h"
  12 #include "llvm/ADT/STLExtras.h"
  13 #include "llvm/MC/MCAsmInfo.h"
  14 #include "llvm/Support/FormatVariadic.h"
  15 #include <unordered_set>
  16 #include <vector>
  17
  18 namespace exegesis {
  19
  20 static const char kCsvSep = ',';
  21
  22 static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
  23                                     unsigned SchedClassId,
  24                                     const llvm::MCInst &MCI) {
  25   const auto &SM = STI.getSchedModel();
  26   while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
  27     SchedClassId =
  28         STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
  29   return SchedClassId;
  30 }
  31
  32 namespace {
  33
  34 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
  35
  36 template <EscapeTag Tag>
  37 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
  38
  39 template <>
  40 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
  41   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
  42     OS << S;
  43   } else {
  44     // Needs escaping.
  45     OS << '"';
  46     for (const char C : S) {
  47       if (C == '"')
  48         OS << "\"\"";
  49       else
  50         OS << C;
  51     }
  52     OS << '"';
  53   }
  54 }
  55
  56 template <>
  57 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
  58   for (const char C : S) {
  59     if (C == '<')
  60       OS << "&lt;";
  61     else if (C == '>')
  62       OS << "&gt;";
  63     else if (C == '&')
  64       OS << "&amp;";
  65     else
  66       OS << C;
  67   }
  68 }
  69
  70 template <>
  71 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
  72                                      const llvm::StringRef S) {
  73   for (const char C : S) {
  74     if (C == '"')
  75       OS << "\\\"";
  76     else
  77       OS << C;
  78   }
  79 }
  80
  81 } // namespace
  82
  83 template <EscapeTag Tag>
  84 static void
  85 writeClusterId(llvm::raw_ostream &OS,
  86                const InstructionBenchmarkClustering::ClusterId &CID) {
  87   if (CID.isNoise())
  88     writeEscaped<Tag>(OS, "[noise]");
  89   else if (CID.isError())
  90     writeEscaped<Tag>(OS, "[error]");
  91   else
  92     OS << CID.getId();
  93 }
  94
  95 template <EscapeTag Tag>
  96 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
  97   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
  98 }
  99
 100 template <typename EscapeTag, EscapeTag Tag>
 101 void Analysis::writeSnippet(llvm::raw_ostream &OS,
 102                             llvm::ArrayRef<uint8_t> Bytes,
 103                             const char *Separator) const {
 104   llvm::SmallVector<std::string, 3> Lines;
 105   // Parse the asm snippet and print it.
 106   while (!Bytes.empty()) {
 107     llvm::MCInst MI;
 108     uint64_t MISize = 0;
 109     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
 110                                  llvm::nulls())) {
 111       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
 112       writeEscaped<Tag>(OS, Separator);
 113       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
 114       return;
 115     }
 116     Lines.emplace_back();
 117     std::string &Line = Lines.back();
 118     llvm::raw_string_ostream OSS(Line);
 119     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
 120     Bytes = Bytes.drop_front(MISize);
 121     OSS.flush();
 122     Line = llvm::StringRef(Line).trim().str();
 123   }
 124   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
 125 }
 126
 127 // Prints a row representing an instruction, along with scheduling info and
 128 // point coordinates (measurements).
 129 void Analysis::printInstructionRowCsv(const size_t PointId,
 130                                       llvm::raw_ostream &OS) const {
 131   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
 132   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
 133   OS << kCsvSep;
 134   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
 135   OS << kCsvSep;
 136   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
 137   OS << kCsvSep;
 138   assert(!Point.Key.Instructions.empty());
 139   const llvm::MCInst &MCI = Point.Key.Instructions[0];
 140   const unsigned SchedClassId = resolveSchedClassId(
 141       *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
 142
 143 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 144   const llvm::MCSchedClassDesc *const SCDesc =
 145       SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId);
 146   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
 147 #else
 148   OS << SchedClassId;
 149 #endif
 150   for (const auto &Measurement : Point.Measurements) {
 151     OS << kCsvSep;
 152     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
 153   }
 154   OS << "\n";
 155 }
 156
 157 Analysis::Analysis(const llvm::Target &Target,
 158                    const InstructionBenchmarkClustering &Clustering)
 159     : Clustering_(Clustering) {
 160   if (Clustering.getPoints().empty())
 161     return;
 162
 163   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
 164   InstrInfo_.reset(Target.createMCInstrInfo());
 165   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
 166   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
 167   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
 168                                                     FirstPoint.CpuName, ""));
 169   InstPrinter_.reset(Target.createMCInstPrinter(
 170       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
 171       *InstrInfo_, *RegInfo_));
 172
 173   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
 174                                                 &ObjectFileInfo_);
 175   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
 176   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
 177                     "InitializeXXXTargetDisassembler ?");
 178 }
 179
 180 template <>
 181 llvm::Error
 182 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
 183   if (Clustering_.getPoints().empty())
 184     return llvm::Error::success();
 185
 186   // Write the header.
 187   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
 188      << kCsvSep << "sched_class";
 189   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
 190     OS << kCsvSep;
 191     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
 192   }
 193   OS << "\n";
 194
 195   // Write the points.
 196   const auto &Clusters = Clustering_.getValidClusters();
 197   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
 198     for (const size_t PointId : Clusters[I].PointIndices) {
 199       printInstructionRowCsv(PointId, OS);
 200     }
 201     OS << "\n\n";
 202   }
 203   return llvm::Error::success();
 204 }
 205
 206 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
 207     ResolvedSchedClass &&RSC)
 208     : RSC(std::move(RSC)) {}
 209
 210 std::vector<Analysis::ResolvedSchedClassAndPoints>
 211 Analysis::makePointsPerSchedClass() const {
 212   std::vector<ResolvedSchedClassAndPoints> Entries;
 213   // Maps SchedClassIds to index in result.
 214   std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
 215   const auto &Points = Clustering_.getPoints();
 216   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
 217     const InstructionBenchmark &Point = Points[PointId];
 218     if (!Point.Error.empty())
 219       continue;
 220     assert(!Point.Key.Instructions.empty());
 221     // FIXME: we should be using the tuple of classes for instructions in the
 222     // snippet as key.
 223     const llvm::MCInst &MCI = Point.Key.Instructions[0];
 224     unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
 225     const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
 226                                                 .getSchedClassDesc(SchedClassId)
 227                                                 ->isVariant();
 228     SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
 229     const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
 230     if (IndexIt == SchedClassIdToIndex.end()) {
 231       // Create a new entry.
 232       SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
 233       ResolvedSchedClassAndPoints Entry(
 234           ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
 235       Entry.PointIds.push_back(PointId);
 236       Entries.push_back(std::move(Entry));
 237     } else {
 238       // Append to the existing entry.
 239       Entries[IndexIt->second].PointIds.push_back(PointId);
 240     }
 241   }
 242   return Entries;
 243 }
 244
 245 // Uops repeat the same opcode over again. Just show this opcode and show the
 246 // whole snippet only on hover.
 247 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
 248                                  const std::vector<llvm::MCInst> &Instructions,
 249                                  const llvm::MCInstrInfo &InstrInfo) {
 250   if (Instructions.empty())
 251     return;
 252   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
 253   if (Instructions.size() > 1)
 254     OS << " (x" << Instructions.size() << ")";
 255 }
 256
 257 // Latency tries to find a serial path. Just show the opcode path and show the
 258 // whole snippet only on hover.
 259 static void
 260 writeLatencySnippetHtml(llvm::raw_ostream &OS,
 261                         const std::vector<llvm::MCInst> &Instructions,
 262                         const llvm::MCInstrInfo &InstrInfo) {
 263   bool First = true;
 264   for (const llvm::MCInst &Instr : Instructions) {
 265     if (First)
 266       First = false;
 267     else
 268       OS << " &rarr; ";
 269     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
 270   }
 271 }
 272
 273 void Analysis::printSchedClassClustersHtml(
 274     const std::vector<SchedClassCluster> &Clusters,
 275     const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
 276   const auto &Points = Clustering_.getPoints();
 277   OS << "<table class=\"sched-class-clusters\">";
 278   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
 279   assert(!Clusters.empty());
 280   for (const auto &Measurement :
 281        Points[Clusters[0].getPointIds()[0]].Measurements) {
 282     OS << "<th>";
 283     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
 284     OS << "</th>";
 285   }
 286   OS << "</tr>";
 287   for (const SchedClassCluster &Cluster : Clusters) {
 288     OS << "<tr class=\""
 289        << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
 290                ? "good-cluster"
 291                : "bad-cluster")
 292        << "\"><td>";
 293     writeClusterId<kEscapeHtml>(OS, Cluster.id());
 294     OS << "</td><td><ul>";
 295     for (const size_t PointId : Cluster.getPointIds()) {
 296       const auto &Point = Points[PointId];
 297       OS << "<li><span class=\"mono\" title=\"";
 298       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
 299                                                  "\n");
 300       OS << "\">";
 301       switch (Point.Mode) {
 302       case InstructionBenchmark::Latency:
 303         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
 304         break;
 305       case InstructionBenchmark::Uops:
 306         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
 307         break;
 308       default:
 309         llvm_unreachable("invalid mode");
 310       }
 311       OS << "</span> <span class=\"mono\">";
 312       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
 313       OS << "</span></li>";
 314     }
 315     OS << "</ul></td>";
 316     for (const auto &Stats : Cluster.getRepresentative()) {
 317       OS << "<td class=\"measurement\">";
 318       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
 319       OS << "<br><span class=\"minmax\">[";
 320       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
 321       OS << ";";
 322       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
 323       OS << "]</span></td>";
 324     }
 325     OS << "</tr>";
 326   }
 327   OS << "</table>";
 328 }
 329
 330 // Return the non-redundant list of WriteProcRes used by the given sched class.
 331 // The scheduling model for LLVM is such that each instruction has a certain
 332 // number of uops which consume resources which are described by WriteProcRes
 333 // entries. Each entry describe how many cycles are spent on a specific ProcRes
 334 // kind.
 335 // For example, an instruction might have 3 uOps, one dispatching on P0
 336 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
 337 // Note that LLVM additionally denormalizes resource consumption to include
 338 // usage of super resources by subresources. So in practice if there exists a
 339 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
 340 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
 341 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
 342 // denote implied usage of superresources by subresources:
 343 //            P0      P06    P016
 344 //     uOp1    1      (1)     (1)
 345 //     uOp2            1      (1)
 346 //     uOp3            1      (1)
 347 //     =============================
 348 //             1       3       3
 349 // Eventually we end up with three entries for the WriteProcRes of the
 350 // instruction:
 351 //    {ProcResIdx=1,  Cycles=1}  // P0
 352 //    {ProcResIdx=7,  Cycles=3}  // P06
 353 //    {ProcResIdx=10, Cycles=3}  // P016
 354 //
 355 // Note that in this case, P016 does not contribute any cycles, so it would
 356 // be removed by this function.
 357 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
 358 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
 359 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
 360                             const llvm::MCSubtargetInfo &STI) {
 361   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
 362   const auto &SM = STI.getSchedModel();
 363   const unsigned NumProcRes = SM.getNumProcResourceKinds();
 364
 365   // This assumes that the ProcResDescs are sorted in topological order, which
 366   // is guaranteed by the tablegen backend.
 367   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
 368   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
 369                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
 370        WPR != WPREnd; ++WPR) {
 371     const llvm::MCProcResourceDesc *const ProcResDesc =
 372         SM.getProcResource(WPR->ProcResourceIdx);
 373     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
 374       // This is a ProcResUnit.
 375       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
 376       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
 377     } else {
 378       // This is a ProcResGroup. First see if it contributes any cycles or if
 379       // it has cycles just from subunits.
 380       float RemainingCycles = WPR->Cycles;
 381       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
 382            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
 383            ++SubResIdx) {
 384         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
 385       }
 386       if (RemainingCycles < 0.01f) {
 387         // The ProcResGroup contributes no cycles of its own.
 388         continue;
 389       }
 390       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
 391       Result.push_back({WPR->ProcResourceIdx,
 392                         static_cast<uint16_t>(std::round(RemainingCycles))});
 393       // Spread the remaining cycles over all subunits.
 394       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
 395            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
 396            ++SubResIdx) {
 397         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
 398       }
 399     }
 400   }
 401   return Result;
 402 }
 403
 404 Analysis::ResolvedSchedClass::ResolvedSchedClass(
 405     const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
 406     bool WasVariant)
 407     : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
 408       WasVariant(WasVariant),
 409       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
 410       IdealizedProcResPressure(computeIdealizedProcResPressure(
 411           STI.getSchedModel(), NonRedundantWriteProcRes)) {
 412   assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
 413          "ResolvedSchedClass should never be variant");
 414 }
 415
 416 void Analysis::SchedClassCluster::addPoint(
 417     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
 418   PointIds.push_back(PointId);
 419   const auto &Point = Clustering.getPoints()[PointId];
 420   if (ClusterId.isUndef()) {
 421     ClusterId = Clustering.getClusterIdForPoint(PointId);
 422     Representative.resize(Point.Measurements.size());
 423   }
 424   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
 425     Representative[I].push(Point.Measurements[I]);
 426   }
 427   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
 428 }
 429
 430 // Returns a ProxResIdx by id or name.
 431 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
 432                                const llvm::StringRef NameOrId) {
 433   // Interpret the key as an ProcResIdx.
 434   unsigned ProcResIdx = 0;
 435   if (llvm::to_integer(NameOrId, ProcResIdx, 10))
 436     return ProcResIdx;
 437   // Interpret the key as a ProcRes name.
 438   const auto &SchedModel = STI.getSchedModel();
 439   for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
 440     if (NameOrId == SchedModel.getProcResource(I)->Name)
 441       return I;
 442   }
 443   return 0;
 444 }
 445
 446 bool Analysis::SchedClassCluster::measurementsMatch(
 447     const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
 448     const InstructionBenchmarkClustering &Clustering) const {
 449   const size_t NumMeasurements = Representative.size();
 450   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
 451   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
 452   // Latency case.
 453   assert(!Clustering.getPoints().empty());
 454   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
 455   if (Mode == InstructionBenchmark::Latency) {
 456     if (NumMeasurements != 1) {
 457       llvm::errs()
 458           << "invalid number of measurements in latency mode: expected 1, got "
 459           << NumMeasurements << "\n";
 460       return false;
 461     }
 462     // Find the latency.
 463     SchedClassPoint[0].PerInstructionValue = 0.0;
 464     for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
 465       const llvm::MCWriteLatencyEntry *const WLE =
 466           STI.getWriteLatencyEntry(RSC.SCDesc, I);
 467       SchedClassPoint[0].PerInstructionValue =
 468           std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
 469     }
 470     ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
 471   } else if (Mode == InstructionBenchmark::Uops) {
 472     for (int I = 0, E = Representative.size(); I < E; ++I) {
 473       const auto Key = Representative[I].key();
 474       uint16_t ProcResIdx = findProcResIdx(STI, Key);
 475       if (ProcResIdx > 0) {
 476         // Find the pressure on ProcResIdx `Key`.
 477         const auto ProcResPressureIt =
 478             std::find_if(RSC.IdealizedProcResPressure.begin(),
 479                          RSC.IdealizedProcResPressure.end(),
 480                          [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
 481                            return WPR.first == ProcResIdx;
 482                          });
 483         SchedClassPoint[I].PerInstructionValue =
 484             ProcResPressureIt == RSC.IdealizedProcResPressure.end()
 485                 ? 0.0
 486                 : ProcResPressureIt->second;
 487       } else if (Key == "NumMicroOps") {
 488         SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
 489       } else {
 490         llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
 491                         "name, got "
 492                      << Key << "\n";
 493         return false;
 494       }
 495       ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
 496     }
 497   } else {
 498     llvm::errs() << "unimplemented measurement matching for mode " << Mode
 499                  << "\n";
 500     return false;
 501   }
 502   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
 503 }
 504
 505 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
 506                                        llvm::raw_ostream &OS) const {
 507   OS << "<table class=\"sched-class-desc\">";
 508   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
 509         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
 510         "resource (port) pressure assuming ideal distribution\">Idealized "
 511         "Resource Pressure</th></tr>";
 512   if (RSC.SCDesc->isValid()) {
 513     const auto &SM = SubtargetInfo_->getSchedModel();
 514     OS << "<tr><td>&#10004;</td>";
 515     OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
 516     OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
 517     // Latencies.
 518     OS << "<td><ul>";
 519     for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
 520       const auto *const Entry =
 521           SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
 522       OS << "<li>" << Entry->Cycles;
 523       if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
 524         // Dismabiguate if more than 1 latency.
 525         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
 526       }
 527       OS << "</li>";
 528     }
 529     OS << "</ul></td>";
 530     // WriteProcRes.
 531     OS << "<td><ul>";
 532     for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
 533       OS << "<li><span class=\"mono\">";
 534       writeEscaped<kEscapeHtml>(OS,
 535                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
 536       OS << "</span>: " << WPR.Cycles << "</li>";
 537     }
 538     OS << "</ul></td>";
 539     // Idealized port pressure.
 540     OS << "<td><ul>";
 541     for (const auto &Pressure : RSC.IdealizedProcResPressure) {
 542       OS << "<li><span class=\"mono\">";
 543       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
 544                                         .getProcResource(Pressure.first)
 545                                         ->Name);
 546       OS << "</span>: ";
 547       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
 548       OS << "</li>";
 549     }
 550     OS << "</ul></td>";
 551     OS << "</tr>";
 552   } else {
 553     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
 554   }
 555   OS << "</table>";
 556 }
 557
 558 static constexpr const char kHtmlHead[] = R"(
 559 <head>
 560 <title>llvm-exegesis Analysis Results</title>
 561 <style>
 562 body {
 563   font-family: sans-serif
 564 }
 565 span.sched-class-name {
 566   font-weight: bold;
 567   font-family: monospace;
 568 }
 569 span.opcode {
 570   font-family: monospace;
 571 }
 572 span.config {
 573   font-family: monospace;
 574 }
 575 div.inconsistency {
 576   margin-top: 50px;
 577 }
 578 table {
 579   margin-left: 50px;
 580   border-collapse: collapse;
 581 }
 582 table, table tr,td,th {
 583   border: 1px solid #444;
 584 }
 585 table ul {
 586   padding-left: 0px;
 587   margin: 0px;
 588   list-style-type: none;
 589 }
 590 table.sched-class-clusters td {
 591   padding-left: 10px;
 592   padding-right: 10px;
 593   padding-top: 10px;
 594   padding-bottom: 10px;
 595 }
 596 table.sched-class-desc td {
 597   padding-left: 10px;
 598   padding-right: 10px;
 599   padding-top: 2px;
 600   padding-bottom: 2px;
 601 }
 602 span.mono {
 603   font-family: monospace;
 604 }
 605 td.measurement {
 606   text-align: center;
 607 }
 608 tr.good-cluster td.measurement {
 609   color: #292
 610 }
 611 tr.bad-cluster td.measurement {
 612   color: #922
 613 }
 614 tr.good-cluster td.measurement span.minmax {
 615   color: #888;
 616 }
 617 tr.bad-cluster td.measurement span.minmax {
 618   color: #888;
 619 }
 620 </style>
 621 </head>
 622 )";
 623
 624 template <>
 625 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
 626     llvm::raw_ostream &OS) const {
 627   const auto &FirstPoint = Clustering_.getPoints()[0];
 628   // Print the header.
 629   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
 630   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
 631   OS << "<h3>Triple: <span class=\"mono\">";
 632   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
 633   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
 634   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
 635   OS << "</span></h3>";
 636
 637   for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
 638     if (!RSCAndPoints.RSC.SCDesc)
 639       continue;
 640     // Bucket sched class points into sched class clusters.
 641     std::vector<SchedClassCluster> SchedClassClusters;
 642     for (const size_t PointId : RSCAndPoints.PointIds) {
 643       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
 644       if (!ClusterId.isValid())
 645         continue; // Ignore noise and errors. FIXME: take noise into account ?
 646       auto SchedClassClusterIt =
 647           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
 648                        [ClusterId](const SchedClassCluster &C) {
 649                          return C.id() == ClusterId;
 650                        });
 651       if (SchedClassClusterIt == SchedClassClusters.end()) {
 652         SchedClassClusters.emplace_back();
 653         SchedClassClusterIt = std::prev(SchedClassClusters.end());
 654       }
 655       SchedClassClusterIt->addPoint(PointId, Clustering_);
 656     }
 657
 658     // Print any scheduling class that has at least one cluster that does not
 659     // match the checked-in data.
 660     if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
 661                     [this, &RSCAndPoints](const SchedClassCluster &C) {
 662                       return C.measurementsMatch(*SubtargetInfo_,
 663                                                  RSCAndPoints.RSC, Clustering_);
 664                     }))
 665       continue; // Nothing weird.
 666
 667     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
 668           "class=\"sched-class-name\">";
 669 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 670     writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
 671 #else
 672     OS << RSCAndPoints.RSC.SchedClassId;
 673 #endif
 674     OS << "</span> contains instructions whose performance characteristics do"
 675           " not match that of LLVM:</p>";
 676     printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
 677     OS << "<p>llvm SchedModel data:</p>";
 678     printSchedClassDescHtml(RSCAndPoints.RSC, OS);
 679     OS << "</div>";
 680   }
 681
 682   OS << "</body></html>";
 683   return llvm::Error::success();
 684 }
 685
 686 // Distributes a pressure budget as evenly as possible on the provided subunits
 687 // given the already existing port pressure distribution.
 688 //
 689 // The algorithm is as follows: while there is remaining pressure to
 690 // distribute, find the subunits with minimal pressure, and distribute
 691 // remaining pressure equally up to the pressure of the unit with
 692 // second-to-minimal pressure.
 693 // For example, let's assume we want to distribute 2*P1256
 694 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
 695 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
 696 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
 697 //     RemainingPressure = 2.0
 698 // We sort the subunits by pressure:
 699 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
 700 // We'll first start by the subunits with minimal pressure, which are at
 701 // the beginning of the sorted array. In this example there is one (P2).
 702 // The subunit with second-to-minimal pressure is the next one in the
 703 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
 704 // from the budget.
 705 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
 706 //     RemainingPressure = 1.9
 707 // We repeat this process: distribute 0.2 pressure on each of the minimal
 708 // P2 and P1, decrease budget by 2*0.2:
 709 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
 710 //     RemainingPressure = 1.5
 711 // There are no second-to-minimal subunits so we just share the remaining
 712 // budget (1.5 cycles) equally:
 713 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
 714 //     RemainingPressure = 0.0
 715 // We stop as there is no remaining budget to distribute.
 716 void distributePressure(float RemainingPressure,
 717                         llvm::SmallVector<uint16_t, 32> Subunits,
 718                         llvm::SmallVector<float, 32> &DensePressure) {
 719   // Find the number of subunits with minimal pressure (they are at the
 720   // front).
 721   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
 722     return DensePressure[A] < DensePressure[B];
 723   });
 724   const auto getPressureForSubunit = [&DensePressure,
 725                                       &Subunits](size_t I) -> float & {
 726     return DensePressure[Subunits[I]];
 727   };
 728   size_t NumMinimalSU = 1;
 729   while (NumMinimalSU < Subunits.size() &&
 730          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
 731     ++NumMinimalSU;
 732   }
 733   while (RemainingPressure > 0.0f) {
 734     if (NumMinimalSU == Subunits.size()) {
 735       // All units are minimal, just distribute evenly and be done.
 736       for (size_t I = 0; I < NumMinimalSU; ++I) {
 737         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
 738       }
 739       return;
 740     }
 741     // Distribute the remaining pressure equally.
 742     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
 743     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
 744     assert(MinimalPressure < SecondToMinimalPressure);
 745     const float Increment = SecondToMinimalPressure - MinimalPressure;
 746     if (RemainingPressure <= NumMinimalSU * Increment) {
 747       // There is not enough remaining pressure.
 748       for (size_t I = 0; I < NumMinimalSU; ++I) {
 749         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
 750       }
 751       return;
 752     }
 753     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
 754     for (size_t I = 0; I < NumMinimalSU; ++I) {
 755       getPressureForSubunit(I) = SecondToMinimalPressure;
 756       RemainingPressure -= SecondToMinimalPressure;
 757     }
 758     while (NumMinimalSU < Subunits.size() &&
 759            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
 760       ++NumMinimalSU;
 761     }
 762   }
 763 }
 764
 765 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
 766     const llvm::MCSchedModel &SM,
 767     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
 768   // DensePressure[I] is the port pressure for Proc Resource I.
 769   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
 770   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
 771                       const llvm::MCWriteProcResEntry &B) {
 772     return A.ProcResourceIdx < B.ProcResourceIdx;
 773   });
 774   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
 775     // Get units for the entry.
 776     const llvm::MCProcResourceDesc *const ProcResDesc =
 777         SM.getProcResource(WPR.ProcResourceIdx);
 778     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
 779       // This is a ProcResUnit.
 780       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
 781     } else {
 782       // This is a ProcResGroup.
 783       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
 784                                                ProcResDesc->SubUnitsIdxBegin +
 785                                                    ProcResDesc->NumUnits);
 786       distributePressure(WPR.Cycles, Subunits, DensePressure);
 787     }
 788   }
 789   // Turn dense pressure into sparse pressure by removing zero entries.
 790   std::vector<std::pair<uint16_t, float>> Pressure;
 791   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
 792     if (DensePressure[I] > 0.0f)
 793       Pressure.emplace_back(I, DensePressure[I]);
 794   }
 795   return Pressure;
 796 }
 797
 798 } // namespace exegesis