1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Measures execution properties (latencies/uops) of an instruction.
12 //===----------------------------------------------------------------------===//
14 #include "lib/Analysis.h"
15 #include "lib/BenchmarkResult.h"
16 #include "lib/BenchmarkRunner.h"
17 #include "lib/Clustering.h"
18 #include "lib/Error.h"
19 #include "lib/LlvmState.h"
20 #include "lib/PerfHelper.h"
21 #include "lib/SnippetFile.h"
22 #include "lib/SnippetRepetitor.h"
23 #include "lib/Target.h"
24 #include "lib/TargetSelect.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/MC/MCInstBuilder.h"
28 #include "llvm/MC/MCObjectFileInfo.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Object/ObjectFile.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/Path.h"
39 #include "llvm/Support/SourceMgr.h"
40 #include "llvm/Support/TargetSelect.h"
47 static cl::OptionCategory
Options("llvm-exegesis options");
48 static cl::OptionCategory
BenchmarkOptions("llvm-exegesis benchmark options");
49 static cl::OptionCategory
AnalysisOptions("llvm-exegesis analysis options");
51 static cl::opt
<int> OpcodeIndex(
53 cl::desc("opcode to measure, by index, or -1 to measure all opcodes"),
54 cl::cat(BenchmarkOptions
), cl::init(0));
56 static cl::opt
<std::string
>
57 OpcodeNames("opcode-name",
58 cl::desc("comma-separated list of opcodes to measure, by name"),
59 cl::cat(BenchmarkOptions
), cl::init(""));
61 static cl::opt
<std::string
> SnippetsFile("snippets-file",
62 cl::desc("code snippets to measure"),
63 cl::cat(BenchmarkOptions
),
66 static cl::opt
<std::string
>
67 BenchmarkFile("benchmarks-file",
68 cl::desc("File to read (analysis mode) or write "
69 "(latency/uops/inverse_throughput modes) benchmark "
70 "results. “-” uses stdin/stdout."),
71 cl::cat(Options
), cl::init(""));
73 static cl::opt
<exegesis::InstructionBenchmark::ModeE
> BenchmarkMode(
74 "mode", cl::desc("the mode to run"), cl::cat(Options
),
75 cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency
, "latency",
76 "Instruction Latency"),
77 clEnumValN(exegesis::InstructionBenchmark::InverseThroughput
,
79 "Instruction Inverse Throughput"),
80 clEnumValN(exegesis::InstructionBenchmark::Uops
, "uops",
82 // When not asking for a specific benchmark mode,
83 // we'll analyse the results.
84 clEnumValN(exegesis::InstructionBenchmark::Unknown
, "analysis",
87 static cl::opt
<exegesis::InstructionBenchmark::ResultAggregationModeE
>
89 "result-aggregation-mode",
90 cl::desc("How to aggregate multi-values result"), cl::cat(Options
),
91 cl::values(clEnumValN(exegesis::InstructionBenchmark::Min
, "min",
93 clEnumValN(exegesis::InstructionBenchmark::Max
, "max",
95 clEnumValN(exegesis::InstructionBenchmark::Mean
, "mean",
96 "Compute mean of all readings"),
97 clEnumValN(exegesis::InstructionBenchmark::MinVariance
,
99 "Keep readings set with min-variance")),
100 cl::init(exegesis::InstructionBenchmark::Min
));
102 static cl::opt
<exegesis::InstructionBenchmark::RepetitionModeE
> RepetitionMode(
103 "repetition-mode", cl::desc("how to repeat the instruction snippet"),
104 cl::cat(BenchmarkOptions
),
106 clEnumValN(exegesis::InstructionBenchmark::Duplicate
, "duplicate",
107 "Duplicate the snippet"),
108 clEnumValN(exegesis::InstructionBenchmark::Loop
, "loop",
109 "Loop over the snippet"),
110 clEnumValN(exegesis::InstructionBenchmark::AggregateMin
, "min",
111 "All of the above and take the minimum of measurements")),
112 cl::init(exegesis::InstructionBenchmark::Duplicate
));
114 static cl::opt
<unsigned>
115 NumRepetitions("num-repetitions",
116 cl::desc("number of time to repeat the asm snippet"),
117 cl::cat(BenchmarkOptions
), cl::init(10000));
119 static cl::opt
<unsigned>
120 LoopBodySize("loop-body-size",
121 cl::desc("when repeating the instruction snippet by looping "
122 "over it, duplicate the snippet until the loop body "
123 "contains at least this many instruction"),
124 cl::cat(BenchmarkOptions
), cl::init(0));
126 static cl::opt
<unsigned> MaxConfigsPerOpcode(
127 "max-configs-per-opcode",
129 "allow to snippet generator to generate at most that many configs"),
130 cl::cat(BenchmarkOptions
), cl::init(1));
132 static cl::opt
<bool> IgnoreInvalidSchedClass(
133 "ignore-invalid-sched-class",
134 cl::desc("ignore instructions that do not define a sched class"),
135 cl::cat(BenchmarkOptions
), cl::init(false));
137 static cl::opt
<exegesis::InstructionBenchmarkClustering::ModeE
>
138 AnalysisClusteringAlgorithm(
139 "analysis-clustering", cl::desc("the clustering algorithm to use"),
140 cl::cat(AnalysisOptions
),
141 cl::values(clEnumValN(exegesis::InstructionBenchmarkClustering::Dbscan
,
142 "dbscan", "use DBSCAN/OPTICS algorithm"),
143 clEnumValN(exegesis::InstructionBenchmarkClustering::Naive
,
144 "naive", "one cluster per opcode")),
145 cl::init(exegesis::InstructionBenchmarkClustering::Dbscan
));
147 static cl::opt
<unsigned> AnalysisDbscanNumPoints(
148 "analysis-numpoints",
149 cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
150 cl::cat(AnalysisOptions
), cl::init(3));
152 static cl::opt
<float> AnalysisClusteringEpsilon(
153 "analysis-clustering-epsilon",
154 cl::desc("epsilon for benchmark point clustering"),
155 cl::cat(AnalysisOptions
), cl::init(0.1));
157 static cl::opt
<float> AnalysisInconsistencyEpsilon(
158 "analysis-inconsistency-epsilon",
159 cl::desc("epsilon for detection of when the cluster is different from the "
160 "LLVM schedule profile values"),
161 cl::cat(AnalysisOptions
), cl::init(0.1));
163 static cl::opt
<std::string
>
164 AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
165 cl::cat(AnalysisOptions
), cl::init(""));
166 static cl::opt
<std::string
>
167 AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
168 cl::desc(""), cl::cat(AnalysisOptions
),
171 static cl::opt
<bool> AnalysisDisplayUnstableOpcodes(
172 "analysis-display-unstable-clusters",
173 cl::desc("if there is more than one benchmark for an opcode, said "
174 "benchmarks may end up not being clustered into the same cluster "
175 "if the measured performance characteristics are different. by "
176 "default all such opcodes are filtered out. this flag will "
177 "instead show only such unstable opcodes"),
178 cl::cat(AnalysisOptions
), cl::init(false));
180 static cl::opt
<std::string
> CpuName(
182 cl::desc("cpu name to use for pfm counters, leave empty to autodetect"),
183 cl::cat(Options
), cl::init(""));
186 DumpObjectToDisk("dump-object-to-disk",
187 cl::desc("dumps the generated benchmark object to disk "
188 "and prints a message to access it"),
189 cl::cat(BenchmarkOptions
), cl::init(true));
191 static ExitOnError
ExitOnErr("llvm-exegesis error: ");
193 // Helper function that logs the error(s) and exits.
194 template <typename
... ArgTs
> static void ExitWithError(ArgTs
&&... Args
) {
195 ExitOnErr(make_error
<Failure
>(std::forward
<ArgTs
>(Args
)...));
198 // Check Err. If it's in a failure state log the file error(s) and exit.
199 static void ExitOnFileError(const Twine
&FileName
, Error Err
) {
201 ExitOnErr(createFileError(FileName
, std::move(Err
)));
205 // Check E. If it's in a success state then return the contained value.
206 // If it's in a failure state log the file error(s) and exit.
207 template <typename T
>
208 T
ExitOnFileError(const Twine
&FileName
, Expected
<T
> &&E
) {
209 ExitOnFileError(FileName
, E
.takeError());
210 return std::move(*E
);
213 // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
214 // and returns the opcode indices or {} if snippets should be read from
216 static std::vector
<unsigned> getOpcodesOrDie(const MCInstrInfo
&MCInstrInfo
) {
217 const size_t NumSetFlags
= (OpcodeNames
.empty() ? 0 : 1) +
218 (OpcodeIndex
== 0 ? 0 : 1) +
219 (SnippetsFile
.empty() ? 0 : 1);
220 if (NumSetFlags
!= 1) {
221 ExitOnErr
.setBanner("llvm-exegesis: ");
222 ExitWithError("please provide one and only one of 'opcode-index', "
223 "'opcode-name' or 'snippets-file'");
225 if (!SnippetsFile
.empty())
228 return {static_cast<unsigned>(OpcodeIndex
)};
229 if (OpcodeIndex
< 0) {
230 std::vector
<unsigned> Result
;
231 for (unsigned I
= 1, E
= MCInstrInfo
.getNumOpcodes(); I
< E
; ++I
)
235 // Resolve opcode name -> opcode.
236 const auto ResolveName
= [&MCInstrInfo
](StringRef OpcodeName
) -> unsigned {
237 for (unsigned I
= 1, E
= MCInstrInfo
.getNumOpcodes(); I
< E
; ++I
)
238 if (MCInstrInfo
.getName(I
) == OpcodeName
)
242 SmallVector
<StringRef
, 2> Pieces
;
243 StringRef(OpcodeNames
.getValue())
244 .split(Pieces
, ",", /* MaxSplit */ -1, /* KeepEmpty */ false);
245 std::vector
<unsigned> Result
;
246 for (const StringRef
&OpcodeName
: Pieces
) {
247 if (unsigned Opcode
= ResolveName(OpcodeName
))
248 Result
.push_back(Opcode
);
250 ExitWithError(Twine("unknown opcode ").concat(OpcodeName
));
255 // Generates code snippets for opcode `Opcode`.
256 static Expected
<std::vector
<BenchmarkCode
>>
257 generateSnippets(const LLVMState
&State
, unsigned Opcode
,
258 const BitVector
&ForbiddenRegs
) {
259 const Instruction
&Instr
= State
.getIC().getInstr(Opcode
);
260 const MCInstrDesc
&InstrDesc
= Instr
.Description
;
261 // Ignore instructions that we cannot run.
262 if (InstrDesc
.isPseudo() || InstrDesc
.usesCustomInsertionHook())
263 return make_error
<Failure
>(
264 "Unsupported opcode: isPseudo/usesCustomInserter");
265 if (InstrDesc
.isBranch() || InstrDesc
.isIndirectBranch())
266 return make_error
<Failure
>("Unsupported opcode: isBranch/isIndirectBranch");
267 if (InstrDesc
.isCall() || InstrDesc
.isReturn())
268 return make_error
<Failure
>("Unsupported opcode: isCall/isReturn");
270 const std::vector
<InstructionTemplate
> InstructionVariants
=
271 State
.getExegesisTarget().generateInstructionVariants(
272 Instr
, MaxConfigsPerOpcode
);
274 SnippetGenerator::Options SnippetOptions
;
275 SnippetOptions
.MaxConfigsPerOpcode
= MaxConfigsPerOpcode
;
276 const std::unique_ptr
<SnippetGenerator
> Generator
=
277 State
.getExegesisTarget().createSnippetGenerator(BenchmarkMode
, State
,
280 ExitWithError("cannot create snippet generator");
282 std::vector
<BenchmarkCode
> Benchmarks
;
283 for (const InstructionTemplate
&Variant
: InstructionVariants
) {
284 if (Benchmarks
.size() >= MaxConfigsPerOpcode
)
286 if (auto Err
= Generator
->generateConfigurations(Variant
, Benchmarks
,
288 return std::move(Err
);
293 void benchmarkMain() {
295 ExitWithError("benchmarking unavailable, LLVM was built without libpfm.");
298 if (exegesis::pfm::pfmInitialize())
299 ExitWithError("cannot initialize libpfm");
301 InitializeNativeTarget();
302 InitializeNativeTargetAsmPrinter();
303 InitializeNativeTargetAsmParser();
304 InitializeNativeExegesisTarget();
306 const LLVMState
State(CpuName
);
308 // Preliminary check to ensure features needed for requested
309 // benchmark mode are present on target CPU and/or OS.
310 ExitOnErr(State
.getExegesisTarget().checkFeatureSupport());
312 const std::unique_ptr
<BenchmarkRunner
> Runner
=
313 ExitOnErr(State
.getExegesisTarget().createBenchmarkRunner(
314 BenchmarkMode
, State
, ResultAggMode
));
316 ExitWithError("cannot create benchmark runner");
319 const auto Opcodes
= getOpcodesOrDie(State
.getInstrInfo());
321 SmallVector
<std::unique_ptr
<const SnippetRepetitor
>, 2> Repetitors
;
322 if (RepetitionMode
!= InstructionBenchmark::RepetitionModeE::AggregateMin
)
323 Repetitors
.emplace_back(SnippetRepetitor::Create(RepetitionMode
, State
));
325 for (InstructionBenchmark::RepetitionModeE RepMode
:
326 {InstructionBenchmark::RepetitionModeE::Duplicate
,
327 InstructionBenchmark::RepetitionModeE::Loop
})
328 Repetitors
.emplace_back(SnippetRepetitor::Create(RepMode
, State
));
331 BitVector AllReservedRegs
;
332 llvm::for_each(Repetitors
,
334 const std::unique_ptr
<const SnippetRepetitor
> &Repetitor
) {
335 AllReservedRegs
|= Repetitor
->getReservedRegs();
338 std::vector
<BenchmarkCode
> Configurations
;
339 if (!Opcodes
.empty()) {
340 for (const unsigned Opcode
: Opcodes
) {
341 // Ignore instructions without a sched class if
342 // -ignore-invalid-sched-class is passed.
343 if (IgnoreInvalidSchedClass
&&
344 State
.getInstrInfo().get(Opcode
).getSchedClass() == 0) {
345 errs() << State
.getInstrInfo().getName(Opcode
)
346 << ": ignoring instruction without sched class\n";
350 auto ConfigsForInstr
= generateSnippets(State
, Opcode
, AllReservedRegs
);
351 if (!ConfigsForInstr
) {
352 logAllUnhandledErrors(
353 ConfigsForInstr
.takeError(), errs(),
354 Twine(State
.getInstrInfo().getName(Opcode
)).concat(": "));
357 std::move(ConfigsForInstr
->begin(), ConfigsForInstr
->end(),
358 std::back_inserter(Configurations
));
361 Configurations
= ExitOnErr(readSnippets(State
, SnippetsFile
));
364 if (NumRepetitions
== 0) {
365 ExitOnErr
.setBanner("llvm-exegesis: ");
366 ExitWithError("--num-repetitions must be greater than zero");
369 // Write to standard output if file is not set.
370 if (BenchmarkFile
.empty())
373 for (const BenchmarkCode
&Conf
: Configurations
) {
374 InstructionBenchmark Result
= ExitOnErr(Runner
->runConfiguration(
375 Conf
, NumRepetitions
, LoopBodySize
, Repetitors
, DumpObjectToDisk
));
376 ExitOnFileError(BenchmarkFile
, Result
.writeYaml(State
, BenchmarkFile
));
378 exegesis::pfm::pfmTerminate();
381 // Prints the results of running analysis pass `Pass` to file `OutputFilename`
382 // if OutputFilename is non-empty.
383 template <typename Pass
>
384 static void maybeRunAnalysis(const Analysis
&Analyzer
, const std::string
&Name
,
385 const std::string
&OutputFilename
) {
386 if (OutputFilename
.empty())
388 if (OutputFilename
!= "-") {
389 errs() << "Printing " << Name
<< " results to file '" << OutputFilename
392 std::error_code ErrorCode
;
393 raw_fd_ostream
ClustersOS(OutputFilename
, ErrorCode
,
394 sys::fs::FA_Read
| sys::fs::FA_Write
);
396 ExitOnFileError(OutputFilename
, errorCodeToError(ErrorCode
));
397 if (auto Err
= Analyzer
.run
<Pass
>(ClustersOS
))
398 ExitOnFileError(OutputFilename
, std::move(Err
));
401 static void analysisMain() {
402 ExitOnErr
.setBanner("llvm-exegesis: ");
403 if (BenchmarkFile
.empty())
404 ExitWithError("--benchmarks-file must be set");
406 if (AnalysisClustersOutputFile
.empty() &&
407 AnalysisInconsistenciesOutputFile
.empty()) {
409 "for --mode=analysis: At least one of --analysis-clusters-output-file "
410 "and --analysis-inconsistencies-output-file must be specified");
413 InitializeNativeTarget();
414 InitializeNativeTargetAsmPrinter();
415 InitializeNativeTargetDisassembler();
418 const LLVMState
State("");
419 const std::vector
<InstructionBenchmark
> Points
= ExitOnFileError(
420 BenchmarkFile
, InstructionBenchmark::readYamls(State
, BenchmarkFile
));
422 outs() << "Parsed " << Points
.size() << " benchmark points\n";
423 if (Points
.empty()) {
424 errs() << "no benchmarks to analyze\n";
427 // FIXME: Check that all points have the same triple/cpu.
428 // FIXME: Merge points from several runs (latency and uops).
431 const auto *TheTarget
=
432 TargetRegistry::lookupTarget(Points
[0].LLVMTriple
, Error
);
434 errs() << "unknown target '" << Points
[0].LLVMTriple
<< "'\n";
438 std::unique_ptr
<MCSubtargetInfo
> SubtargetInfo(
439 TheTarget
->createMCSubtargetInfo(Points
[0].LLVMTriple
, CpuName
, ""));
441 std::unique_ptr
<MCInstrInfo
> InstrInfo(TheTarget
->createMCInstrInfo());
442 assert(InstrInfo
&& "Unable to create instruction info!");
444 const auto Clustering
= ExitOnErr(InstructionBenchmarkClustering::create(
445 Points
, AnalysisClusteringAlgorithm
, AnalysisDbscanNumPoints
,
446 AnalysisClusteringEpsilon
, SubtargetInfo
.get(), InstrInfo
.get()));
448 const Analysis
Analyzer(
449 *TheTarget
, std::move(SubtargetInfo
), std::move(InstrInfo
), Clustering
,
450 AnalysisInconsistencyEpsilon
, AnalysisDisplayUnstableOpcodes
, CpuName
);
452 maybeRunAnalysis
<Analysis::PrintClusters
>(Analyzer
, "analysis clusters",
453 AnalysisClustersOutputFile
);
454 maybeRunAnalysis
<Analysis::PrintSchedClassInconsistencies
>(
455 Analyzer
, "sched class consistency analysis",
456 AnalysisInconsistenciesOutputFile
);
459 } // namespace exegesis
462 int main(int Argc
, char **Argv
) {
463 using namespace llvm
;
464 cl::ParseCommandLineOptions(Argc
, Argv
, "");
466 exegesis::ExitOnErr
.setExitCodeMapper([](const Error
&Err
) {
467 if (Err
.isA
<exegesis::ClusteringError
>())
472 if (exegesis::BenchmarkMode
== exegesis::InstructionBenchmark::Unknown
) {
473 exegesis::analysisMain();
475 exegesis::benchmarkMain();