[ARM] Cortex-M4 schedule additions
[llvm-complete.git] / tools / llvm-exegesis / llvm-exegesis.cpp
blob6c4567870d21ec1704e8d28ce2958a4990c76883
1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Measures execution properties (latencies/uops) of an instruction.
11 ///
12 //===----------------------------------------------------------------------===//
14 #include "lib/Analysis.h"
15 #include "lib/BenchmarkResult.h"
16 #include "lib/BenchmarkRunner.h"
17 #include "lib/Clustering.h"
18 #include "lib/LlvmState.h"
19 #include "lib/PerfHelper.h"
20 #include "lib/SnippetRepetitor.h"
21 #include "lib/Target.h"
22 #include "lib/TargetSelect.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/MC/MCInstBuilder.h"
26 #include "llvm/MC/MCObjectFileInfo.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCStreamer.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/Object/ObjectFile.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Format.h"
35 #include "llvm/Support/Path.h"
36 #include "llvm/Support/SourceMgr.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/TargetSelect.h"
39 #include <algorithm>
40 #include <string>
42 namespace llvm {
43 namespace exegesis {
45 static cl::OptionCategory Options("llvm-exegesis options");
46 static cl::OptionCategory BenchmarkOptions("llvm-exegesis benchmark options");
47 static cl::OptionCategory AnalysisOptions("llvm-exegesis analysis options");
49 static cl::opt<int> OpcodeIndex("opcode-index",
50 cl::desc("opcode to measure, by index"),
51 cl::cat(BenchmarkOptions), cl::init(0));
53 static cl::opt<std::string>
54 OpcodeNames("opcode-name",
55 cl::desc("comma-separated list of opcodes to measure, by name"),
56 cl::cat(BenchmarkOptions), cl::init(""));
58 static cl::opt<std::string> SnippetsFile("snippets-file",
59 cl::desc("code snippets to measure"),
60 cl::cat(BenchmarkOptions),
61 cl::init(""));
63 static cl::opt<std::string>
64 BenchmarkFile("benchmarks-file",
65 cl::desc("File to read (analysis mode) or write "
66 "(latency/uops/inverse_throughput modes) benchmark "
67 "results. “-” uses stdin/stdout."),
68 cl::cat(Options), cl::init(""));
70 static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
71 "mode", cl::desc("the mode to run"), cl::cat(Options),
72 cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency, "latency",
73 "Instruction Latency"),
74 clEnumValN(exegesis::InstructionBenchmark::InverseThroughput,
75 "inverse_throughput",
76 "Instruction Inverse Throughput"),
77 clEnumValN(exegesis::InstructionBenchmark::Uops, "uops",
78 "Uop Decomposition"),
79 // When not asking for a specific benchmark mode,
80 // we'll analyse the results.
81 clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
82 "Analysis")));
84 static cl::opt<exegesis::InstructionBenchmark::RepetitionModeE> RepetitionMode(
85 "repetition-mode", cl::desc("how to repeat the instruction snippet"),
86 cl::cat(BenchmarkOptions),
87 cl::values(clEnumValN(exegesis::InstructionBenchmark::Duplicate,
88 "duplicate", "Duplicate the snippet"),
89 clEnumValN(exegesis::InstructionBenchmark::Loop, "loop",
90 "Loop over the snippet")));
92 static cl::opt<unsigned>
93 NumRepetitions("num-repetitions",
94 cl::desc("number of time to repeat the asm snippet"),
95 cl::cat(BenchmarkOptions), cl::init(10000));
97 static cl::opt<bool> IgnoreInvalidSchedClass(
98 "ignore-invalid-sched-class",
99 cl::desc("ignore instructions that do not define a sched class"),
100 cl::cat(BenchmarkOptions), cl::init(false));
102 static cl::opt<exegesis::InstructionBenchmarkClustering::ModeE>
103 AnalysisClusteringAlgorithm(
104 "analysis-clustering", cl::desc("the clustering algorithm to use"),
105 cl::cat(AnalysisOptions),
106 cl::values(clEnumValN(exegesis::InstructionBenchmarkClustering::Dbscan,
107 "dbscan", "use DBSCAN/OPTICS algorithm"),
108 clEnumValN(exegesis::InstructionBenchmarkClustering::Naive,
109 "naive", "one cluster per opcode")),
110 cl::init(exegesis::InstructionBenchmarkClustering::Dbscan));
112 static cl::opt<unsigned> AnalysisDbscanNumPoints(
113 "analysis-numpoints",
114 cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
115 cl::cat(AnalysisOptions), cl::init(3));
117 static cl::opt<float> AnalysisClusteringEpsilon(
118 "analysis-clustering-epsilon",
119 cl::desc("epsilon for benchmark point clustering"),
120 cl::cat(AnalysisOptions), cl::init(0.1));
122 static cl::opt<float> AnalysisInconsistencyEpsilon(
123 "analysis-inconsistency-epsilon",
124 cl::desc("epsilon for detection of when the cluster is different from the "
125 "LLVM schedule profile values"),
126 cl::cat(AnalysisOptions), cl::init(0.1));
128 static cl::opt<std::string>
129 AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
130 cl::cat(AnalysisOptions), cl::init(""));
131 static cl::opt<std::string>
132 AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
133 cl::desc(""), cl::cat(AnalysisOptions),
134 cl::init(""));
136 static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
137 "analysis-display-unstable-clusters",
138 cl::desc("if there is more than one benchmark for an opcode, said "
139 "benchmarks may end up not being clustered into the same cluster "
140 "if the measured performance characteristics are different. by "
141 "default all such opcodes are filtered out. this flag will "
142 "instead show only such unstable opcodes"),
143 cl::cat(AnalysisOptions), cl::init(false));
145 static cl::opt<std::string> CpuName(
146 "mcpu",
147 cl::desc("cpu name to use for pfm counters, leave empty to autodetect"),
148 cl::cat(Options), cl::init(""));
150 static cl::opt<bool>
151 DumpObjectToDisk("dump-object-to-disk",
152 cl::desc("dumps the generated benchmark object to disk "
153 "and prints a message to access it"),
154 cl::cat(BenchmarkOptions), cl::init(true));
156 static ExitOnError ExitOnErr;
158 // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
159 // and returns the opcode indices or {} if snippets should be read from
160 // `SnippetsFile`.
161 static std::vector<unsigned>
162 getOpcodesOrDie(const llvm::MCInstrInfo &MCInstrInfo) {
163 const size_t NumSetFlags = (OpcodeNames.empty() ? 0 : 1) +
164 (OpcodeIndex == 0 ? 0 : 1) +
165 (SnippetsFile.empty() ? 0 : 1);
166 if (NumSetFlags != 1)
167 llvm::report_fatal_error(
168 "please provide one and only one of 'opcode-index', 'opcode-name' or "
169 "'snippets-file'");
170 if (!SnippetsFile.empty())
171 return {};
172 if (OpcodeIndex > 0)
173 return {static_cast<unsigned>(OpcodeIndex)};
174 if (OpcodeIndex < 0) {
175 std::vector<unsigned> Result;
176 for (unsigned I = 1, E = MCInstrInfo.getNumOpcodes(); I < E; ++I)
177 Result.push_back(I);
178 return Result;
180 // Resolve opcode name -> opcode.
181 const auto ResolveName =
182 [&MCInstrInfo](llvm::StringRef OpcodeName) -> unsigned {
183 for (unsigned I = 1, E = MCInstrInfo.getNumOpcodes(); I < E; ++I)
184 if (MCInstrInfo.getName(I) == OpcodeName)
185 return I;
186 return 0u;
188 llvm::SmallVector<llvm::StringRef, 2> Pieces;
189 llvm::StringRef(OpcodeNames.getValue())
190 .split(Pieces, ",", /* MaxSplit */ -1, /* KeepEmpty */ false);
191 std::vector<unsigned> Result;
192 for (const llvm::StringRef OpcodeName : Pieces) {
193 if (unsigned Opcode = ResolveName(OpcodeName))
194 Result.push_back(Opcode);
195 else
196 llvm::report_fatal_error(
197 llvm::Twine("unknown opcode ").concat(OpcodeName));
199 return Result;
202 // Generates code snippets for opcode `Opcode`.
203 static llvm::Expected<std::vector<BenchmarkCode>>
204 generateSnippets(const LLVMState &State, unsigned Opcode,
205 const llvm::BitVector &ForbiddenRegs) {
206 const Instruction &Instr = State.getIC().getInstr(Opcode);
207 const llvm::MCInstrDesc &InstrDesc = *Instr.Description;
208 // Ignore instructions that we cannot run.
209 if (InstrDesc.isPseudo())
210 return llvm::make_error<BenchmarkFailure>("Unsupported opcode: isPseudo");
211 if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
212 return llvm::make_error<BenchmarkFailure>(
213 "Unsupported opcode: isBranch/isIndirectBranch");
214 if (InstrDesc.isCall() || InstrDesc.isReturn())
215 return llvm::make_error<BenchmarkFailure>(
216 "Unsupported opcode: isCall/isReturn");
218 const std::unique_ptr<SnippetGenerator> Generator =
219 State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State);
220 if (!Generator)
221 llvm::report_fatal_error("cannot create snippet generator");
222 return Generator->generateConfigurations(Instr, ForbiddenRegs);
225 namespace {
227 // An MCStreamer that reads a BenchmarkCode definition from a file.
228 // The BenchmarkCode definition is just an asm file, with additional comments to
229 // specify which registers should be defined or are live on entry.
230 class BenchmarkCodeStreamer : public llvm::MCStreamer,
231 public llvm::AsmCommentConsumer {
232 public:
233 explicit BenchmarkCodeStreamer(llvm::MCContext *Context,
234 const llvm::MCRegisterInfo *TheRegInfo,
235 BenchmarkCode *Result)
236 : llvm::MCStreamer(*Context), RegInfo(TheRegInfo), Result(Result) {}
238 // Implementation of the llvm::MCStreamer interface. We only care about
239 // instructions.
240 void EmitInstruction(const llvm::MCInst &Instruction,
241 const llvm::MCSubtargetInfo &STI) override {
242 Result->Instructions.push_back(Instruction);
245 // Implementation of the llvm::AsmCommentConsumer.
246 void HandleComment(llvm::SMLoc Loc, llvm::StringRef CommentText) override {
247 CommentText = CommentText.trim();
248 if (!CommentText.consume_front("LLVM-EXEGESIS-"))
249 return;
250 if (CommentText.consume_front("DEFREG")) {
251 // LLVM-EXEGESIS-DEFREF <reg> <hex_value>
252 RegisterValue RegVal;
253 llvm::SmallVector<llvm::StringRef, 2> Parts;
254 CommentText.split(Parts, ' ', /*unlimited splits*/ -1,
255 /*do not keep empty strings*/ false);
256 if (Parts.size() != 2) {
257 llvm::errs() << "invalid comment 'LLVM-EXEGESIS-DEFREG " << CommentText
258 << "\n";
259 ++InvalidComments;
261 if (!(RegVal.Register = findRegisterByName(Parts[0].trim()))) {
262 llvm::errs() << "unknown register in 'LLVM-EXEGESIS-DEFREG "
263 << CommentText << "\n";
264 ++InvalidComments;
265 return;
267 const llvm::StringRef HexValue = Parts[1].trim();
268 RegVal.Value = llvm::APInt(
269 /* each hex digit is 4 bits */ HexValue.size() * 4, HexValue, 16);
270 Result->RegisterInitialValues.push_back(std::move(RegVal));
271 return;
273 if (CommentText.consume_front("LIVEIN")) {
274 // LLVM-EXEGESIS-LIVEIN <reg>
275 if (unsigned Reg = findRegisterByName(CommentText.ltrim()))
276 Result->LiveIns.push_back(Reg);
277 else {
278 llvm::errs() << "unknown register in 'LLVM-EXEGESIS-LIVEIN "
279 << CommentText << "\n";
280 ++InvalidComments;
282 return;
286 unsigned numInvalidComments() const { return InvalidComments; }
288 private:
289 // We only care about instructions, we don't implement this part of the API.
290 void EmitCommonSymbol(llvm::MCSymbol *Symbol, uint64_t Size,
291 unsigned ByteAlignment) override {}
292 bool EmitSymbolAttribute(llvm::MCSymbol *Symbol,
293 llvm::MCSymbolAttr Attribute) override {
294 return false;
296 void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
297 unsigned ValueSize,
298 unsigned MaxBytesToEmit) override {}
299 void EmitZerofill(llvm::MCSection *Section, llvm::MCSymbol *Symbol,
300 uint64_t Size, unsigned ByteAlignment,
301 llvm::SMLoc Loc) override {}
303 unsigned findRegisterByName(const llvm::StringRef RegName) const {
304 // FIXME: Can we do better than this ?
305 for (unsigned I = 0, E = RegInfo->getNumRegs(); I < E; ++I) {
306 if (RegName == RegInfo->getName(I))
307 return I;
309 llvm::errs() << "'" << RegName
310 << "' is not a valid register name for the target\n";
311 return 0;
314 const llvm::MCRegisterInfo *const RegInfo;
315 BenchmarkCode *const Result;
316 unsigned InvalidComments = 0;
319 } // namespace
321 // Reads code snippets from file `Filename`.
322 static llvm::Expected<std::vector<BenchmarkCode>>
323 readSnippets(const LLVMState &State, llvm::StringRef Filename) {
324 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferPtr =
325 llvm::MemoryBuffer::getFileOrSTDIN(Filename);
326 if (std::error_code EC = BufferPtr.getError()) {
327 return llvm::make_error<BenchmarkFailure>(
328 "cannot read snippet: " + Filename + ": " + EC.message());
330 llvm::SourceMgr SM;
331 SM.AddNewSourceBuffer(std::move(BufferPtr.get()), llvm::SMLoc());
333 BenchmarkCode Result;
335 llvm::MCObjectFileInfo ObjectFileInfo;
336 const llvm::TargetMachine &TM = State.getTargetMachine();
337 llvm::MCContext Context(TM.getMCAsmInfo(), TM.getMCRegisterInfo(),
338 &ObjectFileInfo);
339 ObjectFileInfo.InitMCObjectFileInfo(TM.getTargetTriple(), /*PIC*/ false,
340 Context);
341 BenchmarkCodeStreamer Streamer(&Context, TM.getMCRegisterInfo(), &Result);
342 const std::unique_ptr<llvm::MCAsmParser> AsmParser(
343 llvm::createMCAsmParser(SM, Context, Streamer, *TM.getMCAsmInfo()));
344 if (!AsmParser)
345 return llvm::make_error<BenchmarkFailure>("cannot create asm parser");
346 AsmParser->getLexer().setCommentConsumer(&Streamer);
348 const std::unique_ptr<llvm::MCTargetAsmParser> TargetAsmParser(
349 TM.getTarget().createMCAsmParser(*TM.getMCSubtargetInfo(), *AsmParser,
350 *TM.getMCInstrInfo(),
351 llvm::MCTargetOptions()));
353 if (!TargetAsmParser)
354 return llvm::make_error<BenchmarkFailure>(
355 "cannot create target asm parser");
356 AsmParser->setTargetParser(*TargetAsmParser);
358 if (AsmParser->Run(false))
359 return llvm::make_error<BenchmarkFailure>("cannot parse asm file");
360 if (Streamer.numInvalidComments())
361 return llvm::make_error<BenchmarkFailure>(
362 llvm::Twine("found ")
363 .concat(llvm::Twine(Streamer.numInvalidComments()))
364 .concat(" invalid LLVM-EXEGESIS comments"));
365 return std::vector<BenchmarkCode>{std::move(Result)};
368 void benchmarkMain() {
369 #ifndef HAVE_LIBPFM
370 llvm::report_fatal_error(
371 "benchmarking unavailable, LLVM was built without libpfm.");
372 #endif
374 if (exegesis::pfm::pfmInitialize())
375 llvm::report_fatal_error("cannot initialize libpfm");
377 llvm::InitializeNativeTarget();
378 llvm::InitializeNativeTargetAsmPrinter();
379 llvm::InitializeNativeTargetAsmParser();
380 InitializeNativeExegesisTarget();
382 const LLVMState State(CpuName);
383 const auto Opcodes = getOpcodesOrDie(State.getInstrInfo());
385 const auto Repetitor = SnippetRepetitor::Create(RepetitionMode, State);
387 std::vector<BenchmarkCode> Configurations;
388 if (!Opcodes.empty()) {
389 for (const unsigned Opcode : Opcodes) {
390 // Ignore instructions without a sched class if
391 // -ignore-invalid-sched-class is passed.
392 if (IgnoreInvalidSchedClass &&
393 State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
394 llvm::errs() << State.getInstrInfo().getName(Opcode)
395 << ": ignoring instruction without sched class\n";
396 continue;
398 auto ConfigsForInstr =
399 generateSnippets(State, Opcode, Repetitor->getReservedRegs());
400 if (!ConfigsForInstr) {
401 llvm::logAllUnhandledErrors(
402 ConfigsForInstr.takeError(), llvm::errs(),
403 llvm::Twine(State.getInstrInfo().getName(Opcode)).concat(": "));
404 continue;
406 std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
407 std::back_inserter(Configurations));
409 } else {
410 Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
413 const std::unique_ptr<BenchmarkRunner> Runner =
414 State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State);
415 if (!Runner) {
416 llvm::report_fatal_error("cannot create benchmark runner");
419 if (NumRepetitions == 0)
420 llvm::report_fatal_error("--num-repetitions must be greater than zero");
422 // Write to standard output if file is not set.
423 if (BenchmarkFile.empty())
424 BenchmarkFile = "-";
426 for (const BenchmarkCode &Conf : Configurations) {
427 InstructionBenchmark Result = Runner->runConfiguration(
428 Conf, NumRepetitions, *Repetitor, DumpObjectToDisk);
429 ExitOnErr(Result.writeYaml(State, BenchmarkFile));
431 exegesis::pfm::pfmTerminate();
434 // Prints the results of running analysis pass `Pass` to file `OutputFilename`
435 // if OutputFilename is non-empty.
436 template <typename Pass>
437 static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
438 const std::string &OutputFilename) {
439 if (OutputFilename.empty())
440 return;
441 if (OutputFilename != "-") {
442 llvm::errs() << "Printing " << Name << " results to file '"
443 << OutputFilename << "'\n";
445 std::error_code ErrorCode;
446 llvm::raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,
447 llvm::sys::fs::FA_Read |
448 llvm::sys::fs::FA_Write);
449 if (ErrorCode)
450 llvm::report_fatal_error("cannot open out file: " + OutputFilename);
451 if (auto Err = Analyzer.run<Pass>(ClustersOS))
452 llvm::report_fatal_error(std::move(Err));
455 static void analysisMain() {
456 if (BenchmarkFile.empty())
457 llvm::report_fatal_error("--benchmarks-file must be set.");
459 if (AnalysisClustersOutputFile.empty() &&
460 AnalysisInconsistenciesOutputFile.empty()) {
461 llvm::report_fatal_error(
462 "At least one of --analysis-clusters-output-file and "
463 "--analysis-inconsistencies-output-file must be specified.");
466 llvm::InitializeNativeTarget();
467 llvm::InitializeNativeTargetAsmPrinter();
468 llvm::InitializeNativeTargetDisassembler();
469 // Read benchmarks.
470 const LLVMState State("");
471 const std::vector<InstructionBenchmark> Points =
472 ExitOnErr(InstructionBenchmark::readYamls(State, BenchmarkFile));
473 llvm::outs() << "Parsed " << Points.size() << " benchmark points\n";
474 if (Points.empty()) {
475 llvm::errs() << "no benchmarks to analyze\n";
476 return;
478 // FIXME: Check that all points have the same triple/cpu.
479 // FIXME: Merge points from several runs (latency and uops).
481 std::string Error;
482 const auto *TheTarget =
483 llvm::TargetRegistry::lookupTarget(Points[0].LLVMTriple, Error);
484 if (!TheTarget) {
485 llvm::errs() << "unknown target '" << Points[0].LLVMTriple << "'\n";
486 return;
489 std::unique_ptr<llvm::MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
491 const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
492 Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
493 AnalysisClusteringEpsilon, InstrInfo->getNumOpcodes()));
495 const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
496 AnalysisInconsistencyEpsilon,
497 AnalysisDisplayUnstableOpcodes);
499 maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
500 AnalysisClustersOutputFile);
501 maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
502 Analyzer, "sched class consistency analysis",
503 AnalysisInconsistenciesOutputFile);
506 } // namespace exegesis
507 } // namespace llvm
509 int main(int Argc, char **Argv) {
510 using namespace llvm;
511 cl::ParseCommandLineOptions(Argc, Argv, "");
513 exegesis::ExitOnErr.setExitCodeMapper([](const llvm::Error &Err) {
514 if (Err.isA<llvm::StringError>())
515 return EXIT_SUCCESS;
516 return EXIT_FAILURE;
519 if (exegesis::BenchmarkMode == exegesis::InstructionBenchmark::Unknown) {
520 exegesis::analysisMain();
521 } else {
522 exegesis::benchmarkMain();
524 return EXIT_SUCCESS;