tools/llvm-exegesis/lib/Analysis.cpp

   1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "Analysis.h"
  11 #include "BenchmarkResult.h"
  12 #include "llvm/ADT/STLExtras.h"
  13 #include "llvm/MC/MCAsmInfo.h"
  14 #include "llvm/Support/FormatVariadic.h"
  15 #include <unordered_set>
  16 #include <vector>
  17
  18 namespace llvm {
  19 namespace exegesis {
  20
  21 static const char kCsvSep = ',';
  22
  23 static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
  24                                     unsigned SchedClassId,
  25                                     const llvm::MCInst &MCI) {
  26   const auto &SM = STI.getSchedModel();
  27   while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
  28     SchedClassId =
  29         STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
  30   return SchedClassId;
  31 }
  32
  33 namespace {
  34
  35 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
  36
  37 template <EscapeTag Tag>
  38 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
  39
  40 template <>
  41 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
  42   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
  43     OS << S;
  44   } else {
  45     // Needs escaping.
  46     OS << '"';
  47     for (const char C : S) {
  48       if (C == '"')
  49         OS << "\"\"";
  50       else
  51         OS << C;
  52     }
  53     OS << '"';
  54   }
  55 }
  56
  57 template <>
  58 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
  59   for (const char C : S) {
  60     if (C == '<')
  61       OS << "&lt;";
  62     else if (C == '>')
  63       OS << "&gt;";
  64     else if (C == '&')
  65       OS << "&amp;";
  66     else
  67       OS << C;
  68   }
  69 }
  70
  71 template <>
  72 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
  73                                      const llvm::StringRef S) {
  74   for (const char C : S) {
  75     if (C == '"')
  76       OS << "\\\"";
  77     else
  78       OS << C;
  79   }
  80 }
  81
  82 } // namespace
  83
  84 template <EscapeTag Tag>
  85 static void
  86 writeClusterId(llvm::raw_ostream &OS,
  87                const InstructionBenchmarkClustering::ClusterId &CID) {
  88   if (CID.isNoise())
  89     writeEscaped<Tag>(OS, "[noise]");
  90   else if (CID.isError())
  91     writeEscaped<Tag>(OS, "[error]");
  92   else
  93     OS << CID.getId();
  94 }
  95
  96 template <EscapeTag Tag>
  97 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
  98   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
  99 }
 100
 101 template <typename EscapeTag, EscapeTag Tag>
 102 void Analysis::writeSnippet(llvm::raw_ostream &OS,
 103                             llvm::ArrayRef<uint8_t> Bytes,
 104                             const char *Separator) const {
 105   llvm::SmallVector<std::string, 3> Lines;
 106   // Parse the asm snippet and print it.
 107   while (!Bytes.empty()) {
 108     llvm::MCInst MI;
 109     uint64_t MISize = 0;
 110     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
 111                                  llvm::nulls())) {
 112       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
 113       writeEscaped<Tag>(OS, Separator);
 114       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
 115       return;
 116     }
 117     Lines.emplace_back();
 118     std::string &Line = Lines.back();
 119     llvm::raw_string_ostream OSS(Line);
 120     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
 121     Bytes = Bytes.drop_front(MISize);
 122     OSS.flush();
 123     Line = llvm::StringRef(Line).trim().str();
 124   }
 125   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
 126 }
 127
 128 // Prints a row representing an instruction, along with scheduling info and
 129 // point coordinates (measurements).
 130 void Analysis::printInstructionRowCsv(const size_t PointId,
 131                                       llvm::raw_ostream &OS) const {
 132   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
 133   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
 134   OS << kCsvSep;
 135   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
 136   OS << kCsvSep;
 137   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
 138   OS << kCsvSep;
 139   assert(!Point.Key.Instructions.empty());
 140   const llvm::MCInst &MCI = Point.Key.Instructions[0];
 141   const unsigned SchedClassId = resolveSchedClassId(
 142       *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
 143
 144 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 145   const llvm::MCSchedClassDesc *const SCDesc =
 146       SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId);
 147   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
 148 #else
 149   OS << SchedClassId;
 150 #endif
 151   for (const auto &Measurement : Point.Measurements) {
 152     OS << kCsvSep;
 153     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
 154   }
 155   OS << "\n";
 156 }
 157
 158 Analysis::Analysis(const llvm::Target &Target,
 159                    const InstructionBenchmarkClustering &Clustering)
 160     : Clustering_(Clustering) {
 161   if (Clustering.getPoints().empty())
 162     return;
 163
 164   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
 165   InstrInfo_.reset(Target.createMCInstrInfo());
 166   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
 167   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
 168   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
 169                                                     FirstPoint.CpuName, ""));
 170   InstPrinter_.reset(Target.createMCInstPrinter(
 171       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
 172       *InstrInfo_, *RegInfo_));
 173
 174   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
 175                                                 &ObjectFileInfo_);
 176   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
 177   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
 178                     "InitializeXXXTargetDisassembler ?");
 179 }
 180
 181 template <>
 182 llvm::Error
 183 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
 184   if (Clustering_.getPoints().empty())
 185     return llvm::Error::success();
 186
 187   // Write the header.
 188   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
 189      << kCsvSep << "sched_class";
 190   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
 191     OS << kCsvSep;
 192     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
 193   }
 194   OS << "\n";
 195
 196   // Write the points.
 197   const auto &Clusters = Clustering_.getValidClusters();
 198   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
 199     for (const size_t PointId : Clusters[I].PointIndices) {
 200       printInstructionRowCsv(PointId, OS);
 201     }
 202     OS << "\n\n";
 203   }
 204   return llvm::Error::success();
 205 }
 206
 207 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
 208     ResolvedSchedClass &&RSC)
 209     : RSC(std::move(RSC)) {}
 210
 211 std::vector<Analysis::ResolvedSchedClassAndPoints>
 212 Analysis::makePointsPerSchedClass() const {
 213   std::vector<ResolvedSchedClassAndPoints> Entries;
 214   // Maps SchedClassIds to index in result.
 215   std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
 216   const auto &Points = Clustering_.getPoints();
 217   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
 218     const InstructionBenchmark &Point = Points[PointId];
 219     if (!Point.Error.empty())
 220       continue;
 221     assert(!Point.Key.Instructions.empty());
 222     // FIXME: we should be using the tuple of classes for instructions in the
 223     // snippet as key.
 224     const llvm::MCInst &MCI = Point.Key.Instructions[0];
 225     unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
 226     const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
 227                                                 .getSchedClassDesc(SchedClassId)
 228                                                 ->isVariant();
 229     SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
 230     const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
 231     if (IndexIt == SchedClassIdToIndex.end()) {
 232       // Create a new entry.
 233       SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
 234       ResolvedSchedClassAndPoints Entry(
 235           ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
 236       Entry.PointIds.push_back(PointId);
 237       Entries.push_back(std::move(Entry));
 238     } else {
 239       // Append to the existing entry.
 240       Entries[IndexIt->second].PointIds.push_back(PointId);
 241     }
 242   }
 243   return Entries;
 244 }
 245
 246 // Uops repeat the same opcode over again. Just show this opcode and show the
 247 // whole snippet only on hover.
 248 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
 249                                  const std::vector<llvm::MCInst> &Instructions,
 250                                  const llvm::MCInstrInfo &InstrInfo) {
 251   if (Instructions.empty())
 252     return;
 253   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
 254   if (Instructions.size() > 1)
 255     OS << " (x" << Instructions.size() << ")";
 256 }
 257
 258 // Latency tries to find a serial path. Just show the opcode path and show the
 259 // whole snippet only on hover.
 260 static void
 261 writeLatencySnippetHtml(llvm::raw_ostream &OS,
 262                         const std::vector<llvm::MCInst> &Instructions,
 263                         const llvm::MCInstrInfo &InstrInfo) {
 264   bool First = true;
 265   for (const llvm::MCInst &Instr : Instructions) {
 266     if (First)
 267       First = false;
 268     else
 269       OS << " &rarr; ";
 270     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
 271   }
 272 }
 273
 274 void Analysis::printSchedClassClustersHtml(
 275     const std::vector<SchedClassCluster> &Clusters,
 276     const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
 277   const auto &Points = Clustering_.getPoints();
 278   OS << "<table class=\"sched-class-clusters\">";
 279   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
 280   assert(!Clusters.empty());
 281   for (const auto &Measurement :
 282        Points[Clusters[0].getPointIds()[0]].Measurements) {
 283     OS << "<th>";
 284     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
 285     OS << "</th>";
 286   }
 287   OS << "</tr>";
 288   for (const SchedClassCluster &Cluster : Clusters) {
 289     OS << "<tr class=\""
 290        << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
 291                ? "good-cluster"
 292                : "bad-cluster")
 293        << "\"><td>";
 294     writeClusterId<kEscapeHtml>(OS, Cluster.id());
 295     OS << "</td><td><ul>";
 296     for (const size_t PointId : Cluster.getPointIds()) {
 297       const auto &Point = Points[PointId];
 298       OS << "<li><span class=\"mono\" title=\"";
 299       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
 300                                                  "\n");
 301       OS << "\">";
 302       switch (Point.Mode) {
 303       case InstructionBenchmark::Latency:
 304         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
 305         break;
 306       case InstructionBenchmark::Uops:
 307         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
 308         break;
 309       default:
 310         llvm_unreachable("invalid mode");
 311       }
 312       OS << "</span> <span class=\"mono\">";
 313       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
 314       OS << "</span></li>";
 315     }
 316     OS << "</ul></td>";
 317     for (const auto &Stats : Cluster.getRepresentative()) {
 318       OS << "<td class=\"measurement\">";
 319       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
 320       OS << "<br><span class=\"minmax\">[";
 321       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
 322       OS << ";";
 323       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
 324       OS << "]</span></td>";
 325     }
 326     OS << "</tr>";
 327   }
 328   OS << "</table>";
 329 }
 330
 331 // Return the non-redundant list of WriteProcRes used by the given sched class.
 332 // The scheduling model for LLVM is such that each instruction has a certain
 333 // number of uops which consume resources which are described by WriteProcRes
 334 // entries. Each entry describe how many cycles are spent on a specific ProcRes
 335 // kind.
 336 // For example, an instruction might have 3 uOps, one dispatching on P0
 337 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
 338 // Note that LLVM additionally denormalizes resource consumption to include
 339 // usage of super resources by subresources. So in practice if there exists a
 340 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
 341 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
 342 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
 343 // denote implied usage of superresources by subresources:
 344 //            P0      P06    P016
 345 //     uOp1    1      (1)     (1)
 346 //     uOp2            1      (1)
 347 //     uOp3            1      (1)
 348 //     =============================
 349 //             1       3       3
 350 // Eventually we end up with three entries for the WriteProcRes of the
 351 // instruction:
 352 //    {ProcResIdx=1,  Cycles=1}  // P0
 353 //    {ProcResIdx=7,  Cycles=3}  // P06
 354 //    {ProcResIdx=10, Cycles=3}  // P016
 355 //
 356 // Note that in this case, P016 does not contribute any cycles, so it would
 357 // be removed by this function.
 358 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
 359 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
 360 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
 361                             const llvm::MCSubtargetInfo &STI) {
 362   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
 363   const auto &SM = STI.getSchedModel();
 364   const unsigned NumProcRes = SM.getNumProcResourceKinds();
 365
 366   // This assumes that the ProcResDescs are sorted in topological order, which
 367   // is guaranteed by the tablegen backend.
 368   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
 369   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
 370                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
 371        WPR != WPREnd; ++WPR) {
 372     const llvm::MCProcResourceDesc *const ProcResDesc =
 373         SM.getProcResource(WPR->ProcResourceIdx);
 374     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
 375       // This is a ProcResUnit.
 376       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
 377       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
 378     } else {
 379       // This is a ProcResGroup. First see if it contributes any cycles or if
 380       // it has cycles just from subunits.
 381       float RemainingCycles = WPR->Cycles;
 382       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
 383            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
 384            ++SubResIdx) {
 385         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
 386       }
 387       if (RemainingCycles < 0.01f) {
 388         // The ProcResGroup contributes no cycles of its own.
 389         continue;
 390       }
 391       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
 392       Result.push_back({WPR->ProcResourceIdx,
 393                         static_cast<uint16_t>(std::round(RemainingCycles))});
 394       // Spread the remaining cycles over all subunits.
 395       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
 396            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
 397            ++SubResIdx) {
 398         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
 399       }
 400     }
 401   }
 402   return Result;
 403 }
 404
 405 Analysis::ResolvedSchedClass::ResolvedSchedClass(
 406     const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
 407     bool WasVariant)
 408     : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
 409       WasVariant(WasVariant),
 410       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
 411       IdealizedProcResPressure(computeIdealizedProcResPressure(
 412           STI.getSchedModel(), NonRedundantWriteProcRes)) {
 413   assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
 414          "ResolvedSchedClass should never be variant");
 415 }
 416
 417 void Analysis::SchedClassCluster::addPoint(
 418     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
 419   PointIds.push_back(PointId);
 420   const auto &Point = Clustering.getPoints()[PointId];
 421   if (ClusterId.isUndef()) {
 422     ClusterId = Clustering.getClusterIdForPoint(PointId);
 423     Representative.resize(Point.Measurements.size());
 424   }
 425   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
 426     Representative[I].push(Point.Measurements[I]);
 427   }
 428   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
 429 }
 430
 431 // Returns a ProxResIdx by id or name.
 432 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
 433                                const llvm::StringRef NameOrId) {
 434   // Interpret the key as an ProcResIdx.
 435   unsigned ProcResIdx = 0;
 436   if (llvm::to_integer(NameOrId, ProcResIdx, 10))
 437     return ProcResIdx;
 438   // Interpret the key as a ProcRes name.
 439   const auto &SchedModel = STI.getSchedModel();
 440   for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
 441     if (NameOrId == SchedModel.getProcResource(I)->Name)
 442       return I;
 443   }
 444   return 0;
 445 }
 446
 447 bool Analysis::SchedClassCluster::measurementsMatch(
 448     const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
 449     const InstructionBenchmarkClustering &Clustering) const {
 450   const size_t NumMeasurements = Representative.size();
 451   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
 452   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
 453   // Latency case.
 454   assert(!Clustering.getPoints().empty());
 455   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
 456   if (Mode == InstructionBenchmark::Latency) {
 457     if (NumMeasurements != 1) {
 458       llvm::errs()
 459           << "invalid number of measurements in latency mode: expected 1, got "
 460           << NumMeasurements << "\n";
 461       return false;
 462     }
 463     // Find the latency.
 464     SchedClassPoint[0].PerInstructionValue = 0.0;
 465     for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
 466       const llvm::MCWriteLatencyEntry *const WLE =
 467           STI.getWriteLatencyEntry(RSC.SCDesc, I);
 468       SchedClassPoint[0].PerInstructionValue =
 469           std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
 470     }
 471     ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
 472   } else if (Mode == InstructionBenchmark::Uops) {
 473     for (int I = 0, E = Representative.size(); I < E; ++I) {
 474       const auto Key = Representative[I].key();
 475       uint16_t ProcResIdx = findProcResIdx(STI, Key);
 476       if (ProcResIdx > 0) {
 477         // Find the pressure on ProcResIdx `Key`.
 478         const auto ProcResPressureIt =
 479             std::find_if(RSC.IdealizedProcResPressure.begin(),
 480                          RSC.IdealizedProcResPressure.end(),
 481                          [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
 482                            return WPR.first == ProcResIdx;
 483                          });
 484         SchedClassPoint[I].PerInstructionValue =
 485             ProcResPressureIt == RSC.IdealizedProcResPressure.end()
 486                 ? 0.0
 487                 : ProcResPressureIt->second;
 488       } else if (Key == "NumMicroOps") {
 489         SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
 490       } else {
 491         llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
 492                         "name, got "
 493                      << Key << "\n";
 494         return false;
 495       }
 496       ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
 497     }
 498   } else {
 499     llvm::errs() << "unimplemented measurement matching for mode " << Mode
 500                  << "\n";
 501     return false;
 502   }
 503   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
 504 }
 505
 506 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
 507                                        llvm::raw_ostream &OS) const {
 508   OS << "<table class=\"sched-class-desc\">";
 509   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
 510         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
 511         "resource (port) pressure assuming ideal distribution\">Idealized "
 512         "Resource Pressure</th></tr>";
 513   if (RSC.SCDesc->isValid()) {
 514     const auto &SM = SubtargetInfo_->getSchedModel();
 515     OS << "<tr><td>&#10004;</td>";
 516     OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
 517     OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
 518     // Latencies.
 519     OS << "<td><ul>";
 520     for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
 521       const auto *const Entry =
 522           SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
 523       OS << "<li>" << Entry->Cycles;
 524       if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
 525         // Dismabiguate if more than 1 latency.
 526         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
 527       }
 528       OS << "</li>";
 529     }
 530     OS << "</ul></td>";
 531     // WriteProcRes.
 532     OS << "<td><ul>";
 533     for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
 534       OS << "<li><span class=\"mono\">";
 535       writeEscaped<kEscapeHtml>(OS,
 536                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
 537       OS << "</span>: " << WPR.Cycles << "</li>";
 538     }
 539     OS << "</ul></td>";
 540     // Idealized port pressure.
 541     OS << "<td><ul>";
 542     for (const auto &Pressure : RSC.IdealizedProcResPressure) {
 543       OS << "<li><span class=\"mono\">";
 544       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
 545                                         .getProcResource(Pressure.first)
 546                                         ->Name);
 547       OS << "</span>: ";
 548       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
 549       OS << "</li>";
 550     }
 551     OS << "</ul></td>";
 552     OS << "</tr>";
 553   } else {
 554     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
 555   }
 556   OS << "</table>";
 557 }
 558
 559 static constexpr const char kHtmlHead[] = R"(
 560 <head>
 561 <title>llvm-exegesis Analysis Results</title>
 562 <style>
 563 body {
 564   font-family: sans-serif
 565 }
 566 span.sched-class-name {
 567   font-weight: bold;
 568   font-family: monospace;
 569 }
 570 span.opcode {
 571   font-family: monospace;
 572 }
 573 span.config {
 574   font-family: monospace;
 575 }
 576 div.inconsistency {
 577   margin-top: 50px;
 578 }
 579 table {
 580   margin-left: 50px;
 581   border-collapse: collapse;
 582 }
 583 table, table tr,td,th {
 584   border: 1px solid #444;
 585 }
 586 table ul {
 587   padding-left: 0px;
 588   margin: 0px;
 589   list-style-type: none;
 590 }
 591 table.sched-class-clusters td {
 592   padding-left: 10px;
 593   padding-right: 10px;
 594   padding-top: 10px;
 595   padding-bottom: 10px;
 596 }
 597 table.sched-class-desc td {
 598   padding-left: 10px;
 599   padding-right: 10px;
 600   padding-top: 2px;
 601   padding-bottom: 2px;
 602 }
 603 span.mono {
 604   font-family: monospace;
 605 }
 606 td.measurement {
 607   text-align: center;
 608 }
 609 tr.good-cluster td.measurement {
 610   color: #292
 611 }
 612 tr.bad-cluster td.measurement {
 613   color: #922
 614 }
 615 tr.good-cluster td.measurement span.minmax {
 616   color: #888;
 617 }
 618 tr.bad-cluster td.measurement span.minmax {
 619   color: #888;
 620 }
 621 </style>
 622 </head>
 623 )";
 624
 625 template <>
 626 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
 627     llvm::raw_ostream &OS) const {
 628   const auto &FirstPoint = Clustering_.getPoints()[0];
 629   // Print the header.
 630   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
 631   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
 632   OS << "<h3>Triple: <span class=\"mono\">";
 633   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
 634   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
 635   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
 636   OS << "</span></h3>";
 637
 638   for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
 639     if (!RSCAndPoints.RSC.SCDesc)
 640       continue;
 641     // Bucket sched class points into sched class clusters.
 642     std::vector<SchedClassCluster> SchedClassClusters;
 643     for (const size_t PointId : RSCAndPoints.PointIds) {
 644       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
 645       if (!ClusterId.isValid())
 646         continue; // Ignore noise and errors. FIXME: take noise into account ?
 647       auto SchedClassClusterIt =
 648           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
 649                        [ClusterId](const SchedClassCluster &C) {
 650                          return C.id() == ClusterId;
 651                        });
 652       if (SchedClassClusterIt == SchedClassClusters.end()) {
 653         SchedClassClusters.emplace_back();
 654         SchedClassClusterIt = std::prev(SchedClassClusters.end());
 655       }
 656       SchedClassClusterIt->addPoint(PointId, Clustering_);
 657     }
 658
 659     // Print any scheduling class that has at least one cluster that does not
 660     // match the checked-in data.
 661     if (llvm::all_of(SchedClassClusters,
 662                      [this, &RSCAndPoints](const SchedClassCluster &C) {
 663                        return C.measurementsMatch(
 664                            *SubtargetInfo_, RSCAndPoints.RSC, Clustering_);
 665                      }))
 666       continue; // Nothing weird.
 667
 668     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
 669           "class=\"sched-class-name\">";
 670 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 671     writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
 672 #else
 673     OS << RSCAndPoints.RSC.SchedClassId;
 674 #endif
 675     OS << "</span> contains instructions whose performance characteristics do"
 676           " not match that of LLVM:</p>";
 677     printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
 678     OS << "<p>llvm SchedModel data:</p>";
 679     printSchedClassDescHtml(RSCAndPoints.RSC, OS);
 680     OS << "</div>";
 681   }
 682
 683   OS << "</body></html>";
 684   return llvm::Error::success();
 685 }
 686
 687 // Distributes a pressure budget as evenly as possible on the provided subunits
 688 // given the already existing port pressure distribution.
 689 //
 690 // The algorithm is as follows: while there is remaining pressure to
 691 // distribute, find the subunits with minimal pressure, and distribute
 692 // remaining pressure equally up to the pressure of the unit with
 693 // second-to-minimal pressure.
 694 // For example, let's assume we want to distribute 2*P1256
 695 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
 696 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
 697 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
 698 //     RemainingPressure = 2.0
 699 // We sort the subunits by pressure:
 700 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
 701 // We'll first start by the subunits with minimal pressure, which are at
 702 // the beginning of the sorted array. In this example there is one (P2).
 703 // The subunit with second-to-minimal pressure is the next one in the
 704 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
 705 // from the budget.
 706 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
 707 //     RemainingPressure = 1.9
 708 // We repeat this process: distribute 0.2 pressure on each of the minimal
 709 // P2 and P1, decrease budget by 2*0.2:
 710 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
 711 //     RemainingPressure = 1.5
 712 // There are no second-to-minimal subunits so we just share the remaining
 713 // budget (1.5 cycles) equally:
 714 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
 715 //     RemainingPressure = 0.0
 716 // We stop as there is no remaining budget to distribute.
 717 void distributePressure(float RemainingPressure,
 718                         llvm::SmallVector<uint16_t, 32> Subunits,
 719                         llvm::SmallVector<float, 32> &DensePressure) {
 720   // Find the number of subunits with minimal pressure (they are at the
 721   // front).
 722   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
 723     return DensePressure[A] < DensePressure[B];
 724   });
 725   const auto getPressureForSubunit = [&DensePressure,
 726                                       &Subunits](size_t I) -> float & {
 727     return DensePressure[Subunits[I]];
 728   };
 729   size_t NumMinimalSU = 1;
 730   while (NumMinimalSU < Subunits.size() &&
 731          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
 732     ++NumMinimalSU;
 733   }
 734   while (RemainingPressure > 0.0f) {
 735     if (NumMinimalSU == Subunits.size()) {
 736       // All units are minimal, just distribute evenly and be done.
 737       for (size_t I = 0; I < NumMinimalSU; ++I) {
 738         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
 739       }
 740       return;
 741     }
 742     // Distribute the remaining pressure equally.
 743     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
 744     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
 745     assert(MinimalPressure < SecondToMinimalPressure);
 746     const float Increment = SecondToMinimalPressure - MinimalPressure;
 747     if (RemainingPressure <= NumMinimalSU * Increment) {
 748       // There is not enough remaining pressure.
 749       for (size_t I = 0; I < NumMinimalSU; ++I) {
 750         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
 751       }
 752       return;
 753     }
 754     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
 755     for (size_t I = 0; I < NumMinimalSU; ++I) {
 756       getPressureForSubunit(I) = SecondToMinimalPressure;
 757       RemainingPressure -= SecondToMinimalPressure;
 758     }
 759     while (NumMinimalSU < Subunits.size() &&
 760            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
 761       ++NumMinimalSU;
 762     }
 763   }
 764 }
 765
 766 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
 767     const llvm::MCSchedModel &SM,
 768     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
 769   // DensePressure[I] is the port pressure for Proc Resource I.
 770   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
 771   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
 772                       const llvm::MCWriteProcResEntry &B) {
 773     return A.ProcResourceIdx < B.ProcResourceIdx;
 774   });
 775   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
 776     // Get units for the entry.
 777     const llvm::MCProcResourceDesc *const ProcResDesc =
 778         SM.getProcResource(WPR.ProcResourceIdx);
 779     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
 780       // This is a ProcResUnit.
 781       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
 782     } else {
 783       // This is a ProcResGroup.
 784       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
 785                                                ProcResDesc->SubUnitsIdxBegin +
 786                                                    ProcResDesc->NumUnits);
 787       distributePressure(WPR.Cycles, Subunits, DensePressure);
 788     }
 789   }
 790   // Turn dense pressure into sparse pressure by removing zero entries.
 791   std::vector<std::pair<uint16_t, float>> Pressure;
 792   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
 793     if (DensePressure[I] > 0.0f)
 794       Pressure.emplace_back(I, DensePressure[I]);
 795   }
 796   return Pressure;
 797 }
 798
 799 } // namespace exegesis
 800 } // namespace llvm