llvm/tools/llvm-exegesis/llvm-exegesis.cpp

   1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// Measures execution properties (latencies/uops) of an instruction.
  11 ///
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "lib/Analysis.h"
  15 #include "lib/BenchmarkResult.h"
  16 #include "lib/BenchmarkRunner.h"
  17 #include "lib/Clustering.h"
  18 #include "lib/Error.h"
  19 #include "lib/LlvmState.h"
  20 #include "lib/PerfHelper.h"
  21 #include "lib/SnippetFile.h"
  22 #include "lib/SnippetRepetitor.h"
  23 #include "lib/Target.h"
  24 #include "lib/TargetSelect.h"
  25 #include "llvm/ADT/StringExtras.h"
  26 #include "llvm/ADT/Twine.h"
  27 #include "llvm/MC/MCInstBuilder.h"
  28 #include "llvm/MC/MCObjectFileInfo.h"
  29 #include "llvm/MC/MCParser/MCAsmParser.h"
  30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
  31 #include "llvm/MC/MCRegisterInfo.h"
  32 #include "llvm/MC/MCSubtargetInfo.h"
  33 #include "llvm/MC/TargetRegistry.h"
  34 #include "llvm/Object/ObjectFile.h"
  35 #include "llvm/Support/CommandLine.h"
  36 #include "llvm/Support/FileSystem.h"
  37 #include "llvm/Support/Format.h"
  38 #include "llvm/Support/Path.h"
  39 #include "llvm/Support/SourceMgr.h"
  40 #include "llvm/Support/TargetSelect.h"
  41 #include <algorithm>
  42 #include <string>
  43
  44 namespace llvm {
  45 namespace exegesis {
  46
  47 static cl::OptionCategory Options("llvm-exegesis options");
  48 static cl::OptionCategory BenchmarkOptions("llvm-exegesis benchmark options");
  49 static cl::OptionCategory AnalysisOptions("llvm-exegesis analysis options");
  50
  51 static cl::opt<int> OpcodeIndex(
  52     "opcode-index",
  53     cl::desc("opcode to measure, by index, or -1 to measure all opcodes"),
  54     cl::cat(BenchmarkOptions), cl::init(0));
  55
  56 static cl::opt<std::string>
  57     OpcodeNames("opcode-name",
  58                 cl::desc("comma-separated list of opcodes to measure, by name"),
  59                 cl::cat(BenchmarkOptions), cl::init(""));
  60
  61 static cl::opt<std::string> SnippetsFile("snippets-file",
  62                                          cl::desc("code snippets to measure"),
  63                                          cl::cat(BenchmarkOptions),
  64                                          cl::init(""));
  65
  66 static cl::opt<std::string>
  67     BenchmarkFile("benchmarks-file",
  68                   cl::desc("File to read (analysis mode) or write "
  69                            "(latency/uops/inverse_throughput modes) benchmark "
  70                            "results. “-” uses stdin/stdout."),
  71                   cl::cat(Options), cl::init(""));
  72
  73 static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
  74     "mode", cl::desc("the mode to run"), cl::cat(Options),
  75     cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency, "latency",
  76                           "Instruction Latency"),
  77                clEnumValN(exegesis::InstructionBenchmark::InverseThroughput,
  78                           "inverse_throughput",
  79                           "Instruction Inverse Throughput"),
  80                clEnumValN(exegesis::InstructionBenchmark::Uops, "uops",
  81                           "Uop Decomposition"),
  82                // When not asking for a specific benchmark mode,
  83                // we'll analyse the results.
  84                clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
  85                           "Analysis")));
  86
  87 static cl::opt<exegesis::InstructionBenchmark::ResultAggregationModeE>
  88     ResultAggMode(
  89         "result-aggregation-mode",
  90         cl::desc("How to aggregate multi-values result"), cl::cat(Options),
  91         cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min",
  92                               "Keep min reading"),
  93                    clEnumValN(exegesis::InstructionBenchmark::Max, "max",
  94                               "Keep max reading"),
  95                    clEnumValN(exegesis::InstructionBenchmark::Mean, "mean",
  96                               "Compute mean of all readings"),
  97                    clEnumValN(exegesis::InstructionBenchmark::MinVariance,
  98                               "min-variance",
  99                               "Keep readings set with min-variance")),
 100         cl::init(exegesis::InstructionBenchmark::Min));
 101
 102 static cl::opt<exegesis::InstructionBenchmark::RepetitionModeE> RepetitionMode(
 103     "repetition-mode", cl::desc("how to repeat the instruction snippet"),
 104     cl::cat(BenchmarkOptions),
 105     cl::values(
 106         clEnumValN(exegesis::InstructionBenchmark::Duplicate, "duplicate",
 107                    "Duplicate the snippet"),
 108         clEnumValN(exegesis::InstructionBenchmark::Loop, "loop",
 109                    "Loop over the snippet"),
 110         clEnumValN(exegesis::InstructionBenchmark::AggregateMin, "min",
 111                    "All of the above and take the minimum of measurements")),
 112     cl::init(exegesis::InstructionBenchmark::Duplicate));
 113
 114 static cl::opt<unsigned>
 115     NumRepetitions("num-repetitions",
 116                    cl::desc("number of time to repeat the asm snippet"),
 117                    cl::cat(BenchmarkOptions), cl::init(10000));
 118
 119 static cl::opt<unsigned>
 120     LoopBodySize("loop-body-size",
 121                  cl::desc("when repeating the instruction snippet by looping "
 122                           "over it, duplicate the snippet until the loop body "
 123                           "contains at least this many instruction"),
 124                  cl::cat(BenchmarkOptions), cl::init(0));
 125
 126 static cl::opt<unsigned> MaxConfigsPerOpcode(
 127     "max-configs-per-opcode",
 128     cl::desc(
 129         "allow to snippet generator to generate at most that many configs"),
 130     cl::cat(BenchmarkOptions), cl::init(1));
 131
 132 static cl::opt<bool> IgnoreInvalidSchedClass(
 133     "ignore-invalid-sched-class",
 134     cl::desc("ignore instructions that do not define a sched class"),
 135     cl::cat(BenchmarkOptions), cl::init(false));
 136
 137 static cl::opt<exegesis::InstructionBenchmarkClustering::ModeE>
 138     AnalysisClusteringAlgorithm(
 139         "analysis-clustering", cl::desc("the clustering algorithm to use"),
 140         cl::cat(AnalysisOptions),
 141         cl::values(clEnumValN(exegesis::InstructionBenchmarkClustering::Dbscan,
 142                               "dbscan", "use DBSCAN/OPTICS algorithm"),
 143                    clEnumValN(exegesis::InstructionBenchmarkClustering::Naive,
 144                               "naive", "one cluster per opcode")),
 145         cl::init(exegesis::InstructionBenchmarkClustering::Dbscan));
 146
 147 static cl::opt<unsigned> AnalysisDbscanNumPoints(
 148     "analysis-numpoints",
 149     cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
 150     cl::cat(AnalysisOptions), cl::init(3));
 151
 152 static cl::opt<float> AnalysisClusteringEpsilon(
 153     "analysis-clustering-epsilon",
 154     cl::desc("epsilon for benchmark point clustering"),
 155     cl::cat(AnalysisOptions), cl::init(0.1));
 156
 157 static cl::opt<float> AnalysisInconsistencyEpsilon(
 158     "analysis-inconsistency-epsilon",
 159     cl::desc("epsilon for detection of when the cluster is different from the "
 160              "LLVM schedule profile values"),
 161     cl::cat(AnalysisOptions), cl::init(0.1));
 162
 163 static cl::opt<std::string>
 164     AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
 165                                cl::cat(AnalysisOptions), cl::init(""));
 166 static cl::opt<std::string>
 167     AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
 168                                       cl::desc(""), cl::cat(AnalysisOptions),
 169                                       cl::init(""));
 170
 171 static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
 172     "analysis-display-unstable-clusters",
 173     cl::desc("if there is more than one benchmark for an opcode, said "
 174              "benchmarks may end up not being clustered into the same cluster "
 175              "if the measured performance characteristics are different. by "
 176              "default all such opcodes are filtered out. this flag will "
 177              "instead show only such unstable opcodes"),
 178     cl::cat(AnalysisOptions), cl::init(false));
 179
 180 static cl::opt<std::string> CpuName(
 181     "mcpu",
 182     cl::desc("cpu name to use for pfm counters, leave empty to autodetect"),
 183     cl::cat(Options), cl::init(""));
 184
 185 static cl::opt<bool>
 186     DumpObjectToDisk("dump-object-to-disk",
 187                      cl::desc("dumps the generated benchmark object to disk "
 188                               "and prints a message to access it"),
 189                      cl::cat(BenchmarkOptions), cl::init(true));
 190
 191 static ExitOnError ExitOnErr("llvm-exegesis error: ");
 192
 193 // Helper function that logs the error(s) and exits.
 194 template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) {
 195   ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...));
 196 }
 197
 198 // Check Err. If it's in a failure state log the file error(s) and exit.
 199 static void ExitOnFileError(const Twine &FileName, Error Err) {
 200   if (Err) {
 201     ExitOnErr(createFileError(FileName, std::move(Err)));
 202   }
 203 }
 204
 205 // Check E. If it's in a success state then return the contained value.
 206 // If it's in a failure state log the file error(s) and exit.
 207 template <typename T>
 208 T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {
 209   ExitOnFileError(FileName, E.takeError());
 210   return std::move(*E);
 211 }
 212
 213 // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
 214 // and returns the opcode indices or {} if snippets should be read from
 215 // `SnippetsFile`.
 216 static std::vector<unsigned> getOpcodesOrDie(const MCInstrInfo &MCInstrInfo) {
 217   const size_t NumSetFlags = (OpcodeNames.empty() ? 0 : 1) +
 218                              (OpcodeIndex == 0 ? 0 : 1) +
 219                              (SnippetsFile.empty() ? 0 : 1);
 220   if (NumSetFlags != 1) {
 221     ExitOnErr.setBanner("llvm-exegesis: ");
 222     ExitWithError("please provide one and only one of 'opcode-index', "
 223                   "'opcode-name' or 'snippets-file'");
 224   }
 225   if (!SnippetsFile.empty())
 226     return {};
 227   if (OpcodeIndex > 0)
 228     return {static_cast<unsigned>(OpcodeIndex)};
 229   if (OpcodeIndex < 0) {
 230     std::vector<unsigned> Result;
 231     for (unsigned I = 1, E = MCInstrInfo.getNumOpcodes(); I < E; ++I)
 232       Result.push_back(I);
 233     return Result;
 234   }
 235   // Resolve opcode name -> opcode.
 236   const auto ResolveName = [&MCInstrInfo](StringRef OpcodeName) -> unsigned {
 237     for (unsigned I = 1, E = MCInstrInfo.getNumOpcodes(); I < E; ++I)
 238       if (MCInstrInfo.getName(I) == OpcodeName)
 239         return I;
 240     return 0u;
 241   };
 242   SmallVector<StringRef, 2> Pieces;
 243   StringRef(OpcodeNames.getValue())
 244       .split(Pieces, ",", /* MaxSplit */ -1, /* KeepEmpty */ false);
 245   std::vector<unsigned> Result;
 246   for (const StringRef &OpcodeName : Pieces) {
 247     if (unsigned Opcode = ResolveName(OpcodeName))
 248       Result.push_back(Opcode);
 249     else
 250       ExitWithError(Twine("unknown opcode ").concat(OpcodeName));
 251   }
 252   return Result;
 253 }
 254
 255 // Generates code snippets for opcode `Opcode`.
 256 static Expected<std::vector<BenchmarkCode>>
 257 generateSnippets(const LLVMState &State, unsigned Opcode,
 258                  const BitVector &ForbiddenRegs) {
 259   const Instruction &Instr = State.getIC().getInstr(Opcode);
 260   const MCInstrDesc &InstrDesc = Instr.Description;
 261   // Ignore instructions that we cannot run.
 262   if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())
 263     return make_error<Failure>(
 264         "Unsupported opcode: isPseudo/usesCustomInserter");
 265   if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
 266     return make_error<Failure>("Unsupported opcode: isBranch/isIndirectBranch");
 267   if (InstrDesc.isCall() || InstrDesc.isReturn())
 268     return make_error<Failure>("Unsupported opcode: isCall/isReturn");
 269
 270   const std::vector<InstructionTemplate> InstructionVariants =
 271       State.getExegesisTarget().generateInstructionVariants(
 272           Instr, MaxConfigsPerOpcode);
 273
 274   SnippetGenerator::Options SnippetOptions;
 275   SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode;
 276   const std::unique_ptr<SnippetGenerator> Generator =
 277       State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State,
 278                                                        SnippetOptions);
 279   if (!Generator)
 280     ExitWithError("cannot create snippet generator");
 281
 282   std::vector<BenchmarkCode> Benchmarks;
 283   for (const InstructionTemplate &Variant : InstructionVariants) {
 284     if (Benchmarks.size() >= MaxConfigsPerOpcode)
 285       break;
 286     if (auto Err = Generator->generateConfigurations(Variant, Benchmarks,
 287                                                      ForbiddenRegs))
 288       return std::move(Err);
 289   }
 290   return Benchmarks;
 291 }
 292
 293 void benchmarkMain() {
 294 #ifndef HAVE_LIBPFM
 295   ExitWithError("benchmarking unavailable, LLVM was built without libpfm.");
 296 #endif
 297
 298   if (exegesis::pfm::pfmInitialize())
 299     ExitWithError("cannot initialize libpfm");
 300
 301   InitializeNativeTarget();
 302   InitializeNativeTargetAsmPrinter();
 303   InitializeNativeTargetAsmParser();
 304   InitializeNativeExegesisTarget();
 305
 306   const LLVMState State(CpuName);
 307
 308   // Preliminary check to ensure features needed for requested
 309   // benchmark mode are present on target CPU and/or OS.
 310   ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
 311
 312   const std::unique_ptr<BenchmarkRunner> Runner =
 313       ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
 314           BenchmarkMode, State, ResultAggMode));
 315   if (!Runner) {
 316     ExitWithError("cannot create benchmark runner");
 317   }
 318
 319   const auto Opcodes = getOpcodesOrDie(State.getInstrInfo());
 320
 321   SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;
 322   if (RepetitionMode != InstructionBenchmark::RepetitionModeE::AggregateMin)
 323     Repetitors.emplace_back(SnippetRepetitor::Create(RepetitionMode, State));
 324   else {
 325     for (InstructionBenchmark::RepetitionModeE RepMode :
 326          {InstructionBenchmark::RepetitionModeE::Duplicate,
 327           InstructionBenchmark::RepetitionModeE::Loop})
 328       Repetitors.emplace_back(SnippetRepetitor::Create(RepMode, State));
 329   }
 330
 331   BitVector AllReservedRegs;
 332   llvm::for_each(Repetitors,
 333                  [&AllReservedRegs](
 334                      const std::unique_ptr<const SnippetRepetitor> &Repetitor) {
 335                    AllReservedRegs |= Repetitor->getReservedRegs();
 336                  });
 337
 338   std::vector<BenchmarkCode> Configurations;
 339   if (!Opcodes.empty()) {
 340     for (const unsigned Opcode : Opcodes) {
 341       // Ignore instructions without a sched class if
 342       // -ignore-invalid-sched-class is passed.
 343       if (IgnoreInvalidSchedClass &&
 344           State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
 345         errs() << State.getInstrInfo().getName(Opcode)
 346                << ": ignoring instruction without sched class\n";
 347         continue;
 348       }
 349
 350       auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs);
 351       if (!ConfigsForInstr) {
 352         logAllUnhandledErrors(
 353             ConfigsForInstr.takeError(), errs(),
 354             Twine(State.getInstrInfo().getName(Opcode)).concat(": "));
 355         continue;
 356       }
 357       std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
 358                 std::back_inserter(Configurations));
 359     }
 360   } else {
 361     Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
 362   }
 363
 364   if (NumRepetitions == 0) {
 365     ExitOnErr.setBanner("llvm-exegesis: ");
 366     ExitWithError("--num-repetitions must be greater than zero");
 367   }
 368
 369   // Write to standard output if file is not set.
 370   if (BenchmarkFile.empty())
 371     BenchmarkFile = "-";
 372
 373   for (const BenchmarkCode &Conf : Configurations) {
 374     InstructionBenchmark Result = ExitOnErr(Runner->runConfiguration(
 375         Conf, NumRepetitions, LoopBodySize, Repetitors, DumpObjectToDisk));
 376     ExitOnFileError(BenchmarkFile, Result.writeYaml(State, BenchmarkFile));
 377   }
 378   exegesis::pfm::pfmTerminate();
 379 }
 380
 381 // Prints the results of running analysis pass `Pass` to file `OutputFilename`
 382 // if OutputFilename is non-empty.
 383 template <typename Pass>
 384 static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
 385                              const std::string &OutputFilename) {
 386   if (OutputFilename.empty())
 387     return;
 388   if (OutputFilename != "-") {
 389     errs() << "Printing " << Name << " results to file '" << OutputFilename
 390            << "'\n";
 391   }
 392   std::error_code ErrorCode;
 393   raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,
 394                             sys::fs::FA_Read | sys::fs::FA_Write);
 395   if (ErrorCode)
 396     ExitOnFileError(OutputFilename, errorCodeToError(ErrorCode));
 397   if (auto Err = Analyzer.run<Pass>(ClustersOS))
 398     ExitOnFileError(OutputFilename, std::move(Err));
 399 }
 400
 401 static void analysisMain() {
 402   ExitOnErr.setBanner("llvm-exegesis: ");
 403   if (BenchmarkFile.empty())
 404     ExitWithError("--benchmarks-file must be set");
 405
 406   if (AnalysisClustersOutputFile.empty() &&
 407       AnalysisInconsistenciesOutputFile.empty()) {
 408     ExitWithError(
 409         "for --mode=analysis: At least one of --analysis-clusters-output-file "
 410         "and --analysis-inconsistencies-output-file must be specified");
 411   }
 412
 413   InitializeNativeTarget();
 414   InitializeNativeTargetAsmPrinter();
 415   InitializeNativeTargetDisassembler();
 416
 417   // Read benchmarks.
 418   const LLVMState State("");
 419   const std::vector<InstructionBenchmark> Points = ExitOnFileError(
 420       BenchmarkFile, InstructionBenchmark::readYamls(State, BenchmarkFile));
 421
 422   outs() << "Parsed " << Points.size() << " benchmark points\n";
 423   if (Points.empty()) {
 424     errs() << "no benchmarks to analyze\n";
 425     return;
 426   }
 427   // FIXME: Check that all points have the same triple/cpu.
 428   // FIXME: Merge points from several runs (latency and uops).
 429
 430   std::string Error;
 431   const auto *TheTarget =
 432       TargetRegistry::lookupTarget(Points[0].LLVMTriple, Error);
 433   if (!TheTarget) {
 434     errs() << "unknown target '" << Points[0].LLVMTriple << "'\n";
 435     return;
 436   }
 437
 438   std::unique_ptr<MCSubtargetInfo> SubtargetInfo(
 439       TheTarget->createMCSubtargetInfo(Points[0].LLVMTriple, CpuName, ""));
 440
 441   std::unique_ptr<MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
 442   assert(InstrInfo && "Unable to create instruction info!");
 443
 444   const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
 445       Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
 446       AnalysisClusteringEpsilon, SubtargetInfo.get(), InstrInfo.get()));
 447
 448   const Analysis Analyzer(
 449       *TheTarget, std::move(SubtargetInfo), std::move(InstrInfo), Clustering,
 450       AnalysisInconsistencyEpsilon, AnalysisDisplayUnstableOpcodes, CpuName);
 451
 452   maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
 453                                             AnalysisClustersOutputFile);
 454   maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
 455       Analyzer, "sched class consistency analysis",
 456       AnalysisInconsistenciesOutputFile);
 457 }
 458
 459 } // namespace exegesis
 460 } // namespace llvm
 461
 462 int main(int Argc, char **Argv) {
 463   using namespace llvm;
 464   cl::ParseCommandLineOptions(Argc, Argv, "");
 465
 466   exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) {
 467     if (Err.isA<exegesis::ClusteringError>())
 468       return EXIT_SUCCESS;
 469     return EXIT_FAILURE;
 470   });
 471
 472   if (exegesis::BenchmarkMode == exegesis::InstructionBenchmark::Unknown) {
 473     exegesis::analysisMain();
 474   } else {
 475     exegesis::benchmarkMain();
 476   }
 477   return EXIT_SUCCESS;
 478 }