1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Measures execution properties (latencies/uops) of an instruction.
12 //===----------------------------------------------------------------------===//
14 #include "lib/Analysis.h"
15 #include "lib/BenchmarkResult.h"
16 #include "lib/BenchmarkRunner.h"
17 #include "lib/Clustering.h"
18 #include "lib/LlvmState.h"
19 #include "lib/PerfHelper.h"
20 #include "lib/Target.h"
21 #include "lib/TargetSelect.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/Twine.h"
24 #include "llvm/MC/MCInstBuilder.h"
25 #include "llvm/MC/MCObjectFileInfo.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Format.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/SourceMgr.h"
36 #include "llvm/Support/TargetRegistry.h"
37 #include "llvm/Support/TargetSelect.h"
44 static cl::OptionCategory
Options("llvm-exegesis options");
45 static cl::OptionCategory
BenchmarkOptions("llvm-exegesis benchmark options");
46 static cl::OptionCategory
AnalysisOptions("llvm-exegesis analysis options");
48 static cl::opt
<int> OpcodeIndex("opcode-index",
49 cl::desc("opcode to measure, by index"),
50 cl::cat(BenchmarkOptions
), cl::init(0));
52 static cl::opt
<std::string
>
53 OpcodeNames("opcode-name",
54 cl::desc("comma-separated list of opcodes to measure, by name"),
55 cl::cat(BenchmarkOptions
), cl::init(""));
57 static cl::opt
<std::string
> SnippetsFile("snippets-file",
58 cl::desc("code snippets to measure"),
59 cl::cat(BenchmarkOptions
),
62 static cl::opt
<std::string
>
63 BenchmarkFile("benchmarks-file",
64 cl::desc("File to read (analysis mode) or write "
65 "(latency/uops/inverse_throughput modes) benchmark "
66 "results. “-” uses stdin/stdout."),
67 cl::cat(Options
), cl::init(""));
69 static cl::opt
<exegesis::InstructionBenchmark::ModeE
> BenchmarkMode(
70 "mode", cl::desc("the mode to run"), cl::cat(Options
),
71 cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency
, "latency",
72 "Instruction Latency"),
73 clEnumValN(exegesis::InstructionBenchmark::InverseThroughput
,
75 "Instruction Inverse Throughput"),
76 clEnumValN(exegesis::InstructionBenchmark::Uops
, "uops",
78 // When not asking for a specific benchmark mode,
79 // we'll analyse the results.
80 clEnumValN(exegesis::InstructionBenchmark::Unknown
, "analysis",
83 static cl::opt
<unsigned>
84 NumRepetitions("num-repetitions",
85 cl::desc("number of time to repeat the asm snippet"),
86 cl::cat(BenchmarkOptions
), cl::init(10000));
88 static cl::opt
<bool> IgnoreInvalidSchedClass(
89 "ignore-invalid-sched-class",
90 cl::desc("ignore instructions that do not define a sched class"),
91 cl::cat(BenchmarkOptions
), cl::init(false));
93 static cl::opt
<exegesis::InstructionBenchmarkClustering::ModeE
>
94 AnalysisClusteringAlgorithm(
95 "analysis-clustering", cl::desc("the clustering algorithm to use"),
96 cl::cat(AnalysisOptions
),
97 cl::values(clEnumValN(exegesis::InstructionBenchmarkClustering::Dbscan
,
98 "dbscan", "use DBSCAN/OPTICS algorithm"),
99 clEnumValN(exegesis::InstructionBenchmarkClustering::Naive
,
100 "naive", "one cluster per opcode")),
101 cl::init(exegesis::InstructionBenchmarkClustering::Dbscan
));
103 static cl::opt
<unsigned> AnalysisDbscanNumPoints(
104 "analysis-numpoints",
105 cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
106 cl::cat(AnalysisOptions
), cl::init(3));
108 static cl::opt
<float> AnalysisClusteringEpsilon(
109 "analysis-clustering-epsilon",
110 cl::desc("epsilon for benchmark point clustering"),
111 cl::cat(AnalysisOptions
), cl::init(0.1));
113 static cl::opt
<float> AnalysisInconsistencyEpsilon(
114 "analysis-inconsistency-epsilon",
115 cl::desc("epsilon for detection of when the cluster is different from the "
116 "LLVM schedule profile values"),
117 cl::cat(AnalysisOptions
), cl::init(0.1));
119 static cl::opt
<std::string
>
120 AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
121 cl::cat(AnalysisOptions
), cl::init(""));
122 static cl::opt
<std::string
>
123 AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
124 cl::desc(""), cl::cat(AnalysisOptions
),
127 static cl::opt
<bool> AnalysisDisplayUnstableOpcodes(
128 "analysis-display-unstable-clusters",
129 cl::desc("if there is more than one benchmark for an opcode, said "
130 "benchmarks may end up not being clustered into the same cluster "
131 "if the measured performance characteristics are different. by "
132 "default all such opcodes are filtered out. this flag will "
133 "instead show only such unstable opcodes"),
134 cl::cat(AnalysisOptions
), cl::init(false));
136 static cl::opt
<std::string
> CpuName(
138 cl::desc("cpu name to use for pfm counters, leave empty to autodetect"),
139 cl::cat(Options
), cl::init(""));
142 DumpObjectToDisk("dump-object-to-disk",
143 cl::desc("dumps the generated benchmark object to disk "
144 "and prints a message to access it"),
145 cl::cat(BenchmarkOptions
), cl::init(true));
147 static ExitOnError ExitOnErr
;
149 // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
150 // and returns the opcode indices or {} if snippets should be read from
152 static std::vector
<unsigned>
153 getOpcodesOrDie(const llvm::MCInstrInfo
&MCInstrInfo
) {
154 const size_t NumSetFlags
= (OpcodeNames
.empty() ? 0 : 1) +
155 (OpcodeIndex
== 0 ? 0 : 1) +
156 (SnippetsFile
.empty() ? 0 : 1);
157 if (NumSetFlags
!= 1)
158 llvm::report_fatal_error(
159 "please provide one and only one of 'opcode-index', 'opcode-name' or "
161 if (!SnippetsFile
.empty())
164 return {static_cast<unsigned>(OpcodeIndex
)};
165 if (OpcodeIndex
< 0) {
166 std::vector
<unsigned> Result
;
167 for (unsigned I
= 1, E
= MCInstrInfo
.getNumOpcodes(); I
< E
; ++I
)
171 // Resolve opcode name -> opcode.
172 const auto ResolveName
=
173 [&MCInstrInfo
](llvm::StringRef OpcodeName
) -> unsigned {
174 for (unsigned I
= 1, E
= MCInstrInfo
.getNumOpcodes(); I
< E
; ++I
)
175 if (MCInstrInfo
.getName(I
) == OpcodeName
)
179 llvm::SmallVector
<llvm::StringRef
, 2> Pieces
;
180 llvm::StringRef(OpcodeNames
.getValue())
181 .split(Pieces
, ",", /* MaxSplit */ -1, /* KeepEmpty */ false);
182 std::vector
<unsigned> Result
;
183 for (const llvm::StringRef OpcodeName
: Pieces
) {
184 if (unsigned Opcode
= ResolveName(OpcodeName
))
185 Result
.push_back(Opcode
);
187 llvm::report_fatal_error(
188 llvm::Twine("unknown opcode ").concat(OpcodeName
));
193 // Generates code snippets for opcode `Opcode`.
194 static llvm::Expected
<std::vector
<BenchmarkCode
>>
195 generateSnippets(const LLVMState
&State
, unsigned Opcode
) {
196 const Instruction
&Instr
= State
.getIC().getInstr(Opcode
);
197 const llvm::MCInstrDesc
&InstrDesc
= *Instr
.Description
;
198 // Ignore instructions that we cannot run.
199 if (InstrDesc
.isPseudo())
200 return llvm::make_error
<BenchmarkFailure
>("Unsupported opcode: isPseudo");
201 if (InstrDesc
.isBranch() || InstrDesc
.isIndirectBranch())
202 return llvm::make_error
<BenchmarkFailure
>(
203 "Unsupported opcode: isBranch/isIndirectBranch");
204 if (InstrDesc
.isCall() || InstrDesc
.isReturn())
205 return llvm::make_error
<BenchmarkFailure
>(
206 "Unsupported opcode: isCall/isReturn");
208 const std::unique_ptr
<SnippetGenerator
> Generator
=
209 State
.getExegesisTarget().createSnippetGenerator(BenchmarkMode
, State
);
211 llvm::report_fatal_error("cannot create snippet generator");
212 return Generator
->generateConfigurations(Instr
);
217 // An MCStreamer that reads a BenchmarkCode definition from a file.
218 // The BenchmarkCode definition is just an asm file, with additional comments to
219 // specify which registers should be defined or are live on entry.
220 class BenchmarkCodeStreamer
: public llvm::MCStreamer
,
221 public llvm::AsmCommentConsumer
{
223 explicit BenchmarkCodeStreamer(llvm::MCContext
*Context
,
224 const llvm::MCRegisterInfo
*TheRegInfo
,
225 BenchmarkCode
*Result
)
226 : llvm::MCStreamer(*Context
), RegInfo(TheRegInfo
), Result(Result
) {}
228 // Implementation of the llvm::MCStreamer interface. We only care about
230 void EmitInstruction(const llvm::MCInst
&Instruction
,
231 const llvm::MCSubtargetInfo
&STI
) override
{
232 Result
->Instructions
.push_back(Instruction
);
235 // Implementation of the llvm::AsmCommentConsumer.
236 void HandleComment(llvm::SMLoc Loc
, llvm::StringRef CommentText
) override
{
237 CommentText
= CommentText
.trim();
238 if (!CommentText
.consume_front("LLVM-EXEGESIS-"))
240 if (CommentText
.consume_front("DEFREG")) {
241 // LLVM-EXEGESIS-DEFREF <reg> <hex_value>
242 RegisterValue RegVal
;
243 llvm::SmallVector
<llvm::StringRef
, 2> Parts
;
244 CommentText
.split(Parts
, ' ', /*unlimited splits*/ -1,
245 /*do not keep empty strings*/ false);
246 if (Parts
.size() != 2) {
247 llvm::errs() << "invalid comment 'LLVM-EXEGESIS-DEFREG " << CommentText
251 if (!(RegVal
.Register
= findRegisterByName(Parts
[0].trim()))) {
252 llvm::errs() << "unknown register in 'LLVM-EXEGESIS-DEFREG "
253 << CommentText
<< "\n";
257 const llvm::StringRef HexValue
= Parts
[1].trim();
258 RegVal
.Value
= llvm::APInt(
259 /* each hex digit is 4 bits */ HexValue
.size() * 4, HexValue
, 16);
260 Result
->RegisterInitialValues
.push_back(std::move(RegVal
));
263 if (CommentText
.consume_front("LIVEIN")) {
264 // LLVM-EXEGESIS-LIVEIN <reg>
265 if (unsigned Reg
= findRegisterByName(CommentText
.ltrim()))
266 Result
->LiveIns
.push_back(Reg
);
268 llvm::errs() << "unknown register in 'LLVM-EXEGESIS-LIVEIN "
269 << CommentText
<< "\n";
276 unsigned numInvalidComments() const { return InvalidComments
; }
279 // We only care about instructions, we don't implement this part of the API.
280 void EmitCommonSymbol(llvm::MCSymbol
*Symbol
, uint64_t Size
,
281 unsigned ByteAlignment
) override
{}
282 bool EmitSymbolAttribute(llvm::MCSymbol
*Symbol
,
283 llvm::MCSymbolAttr Attribute
) override
{
286 void EmitValueToAlignment(unsigned ByteAlignment
, int64_t Value
,
288 unsigned MaxBytesToEmit
) override
{}
289 void EmitZerofill(llvm::MCSection
*Section
, llvm::MCSymbol
*Symbol
,
290 uint64_t Size
, unsigned ByteAlignment
,
291 llvm::SMLoc Loc
) override
{}
293 unsigned findRegisterByName(const llvm::StringRef RegName
) const {
294 // FIXME: Can we do better than this ?
295 for (unsigned I
= 0, E
= RegInfo
->getNumRegs(); I
< E
; ++I
) {
296 if (RegName
== RegInfo
->getName(I
))
299 llvm::errs() << "'" << RegName
300 << "' is not a valid register name for the target\n";
304 const llvm::MCRegisterInfo
*const RegInfo
;
305 BenchmarkCode
*const Result
;
306 unsigned InvalidComments
= 0;
311 // Reads code snippets from file `Filename`.
312 static llvm::Expected
<std::vector
<BenchmarkCode
>>
313 readSnippets(const LLVMState
&State
, llvm::StringRef Filename
) {
314 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> BufferPtr
=
315 llvm::MemoryBuffer::getFileOrSTDIN(Filename
);
316 if (std::error_code EC
= BufferPtr
.getError()) {
317 return llvm::make_error
<BenchmarkFailure
>(
318 "cannot read snippet: " + Filename
+ ": " + EC
.message());
321 SM
.AddNewSourceBuffer(std::move(BufferPtr
.get()), llvm::SMLoc());
323 BenchmarkCode Result
;
325 llvm::MCObjectFileInfo ObjectFileInfo
;
326 const llvm::TargetMachine
&TM
= State
.getTargetMachine();
327 llvm::MCContext
Context(TM
.getMCAsmInfo(), TM
.getMCRegisterInfo(),
329 ObjectFileInfo
.InitMCObjectFileInfo(TM
.getTargetTriple(), /*PIC*/ false,
331 BenchmarkCodeStreamer
Streamer(&Context
, TM
.getMCRegisterInfo(), &Result
);
332 const std::unique_ptr
<llvm::MCAsmParser
> AsmParser(
333 llvm::createMCAsmParser(SM
, Context
, Streamer
, *TM
.getMCAsmInfo()));
335 return llvm::make_error
<BenchmarkFailure
>("cannot create asm parser");
336 AsmParser
->getLexer().setCommentConsumer(&Streamer
);
338 const std::unique_ptr
<llvm::MCTargetAsmParser
> TargetAsmParser(
339 TM
.getTarget().createMCAsmParser(*TM
.getMCSubtargetInfo(), *AsmParser
,
340 *TM
.getMCInstrInfo(),
341 llvm::MCTargetOptions()));
343 if (!TargetAsmParser
)
344 return llvm::make_error
<BenchmarkFailure
>(
345 "cannot create target asm parser");
346 AsmParser
->setTargetParser(*TargetAsmParser
);
348 if (AsmParser
->Run(false))
349 return llvm::make_error
<BenchmarkFailure
>("cannot parse asm file");
350 if (Streamer
.numInvalidComments())
351 return llvm::make_error
<BenchmarkFailure
>(
352 llvm::Twine("found ")
353 .concat(llvm::Twine(Streamer
.numInvalidComments()))
354 .concat(" invalid LLVM-EXEGESIS comments"));
355 return std::vector
<BenchmarkCode
>{std::move(Result
)};
358 void benchmarkMain() {
360 llvm::report_fatal_error(
361 "benchmarking unavailable, LLVM was built without libpfm.");
364 if (exegesis::pfm::pfmInitialize())
365 llvm::report_fatal_error("cannot initialize libpfm");
367 llvm::InitializeNativeTarget();
368 llvm::InitializeNativeTargetAsmPrinter();
369 llvm::InitializeNativeTargetAsmParser();
370 InitializeNativeExegesisTarget();
372 const LLVMState
State(CpuName
);
373 const auto Opcodes
= getOpcodesOrDie(State
.getInstrInfo());
375 std::vector
<BenchmarkCode
> Configurations
;
376 if (!Opcodes
.empty()) {
377 for (const unsigned Opcode
: Opcodes
) {
378 // Ignore instructions without a sched class if
379 // -ignore-invalid-sched-class is passed.
380 if (IgnoreInvalidSchedClass
&&
381 State
.getInstrInfo().get(Opcode
).getSchedClass() == 0) {
382 llvm::errs() << State
.getInstrInfo().getName(Opcode
)
383 << ": ignoring instruction without sched class\n";
386 auto ConfigsForInstr
= generateSnippets(State
, Opcode
);
387 if (!ConfigsForInstr
) {
388 llvm::logAllUnhandledErrors(
389 ConfigsForInstr
.takeError(), llvm::errs(),
390 llvm::Twine(State
.getInstrInfo().getName(Opcode
)).concat(": "));
393 std::move(ConfigsForInstr
->begin(), ConfigsForInstr
->end(),
394 std::back_inserter(Configurations
));
397 Configurations
= ExitOnErr(readSnippets(State
, SnippetsFile
));
400 const std::unique_ptr
<BenchmarkRunner
> Runner
=
401 State
.getExegesisTarget().createBenchmarkRunner(BenchmarkMode
, State
);
403 llvm::report_fatal_error("cannot create benchmark runner");
406 if (NumRepetitions
== 0)
407 llvm::report_fatal_error("--num-repetitions must be greater than zero");
409 // Write to standard output if file is not set.
410 if (BenchmarkFile
.empty())
413 for (const BenchmarkCode
&Conf
: Configurations
) {
414 InstructionBenchmark Result
=
415 Runner
->runConfiguration(Conf
, NumRepetitions
, DumpObjectToDisk
);
416 ExitOnErr(Result
.writeYaml(State
, BenchmarkFile
));
418 exegesis::pfm::pfmTerminate();
421 // Prints the results of running analysis pass `Pass` to file `OutputFilename`
422 // if OutputFilename is non-empty.
423 template <typename Pass
>
424 static void maybeRunAnalysis(const Analysis
&Analyzer
, const std::string
&Name
,
425 const std::string
&OutputFilename
) {
426 if (OutputFilename
.empty())
428 if (OutputFilename
!= "-") {
429 llvm::errs() << "Printing " << Name
<< " results to file '"
430 << OutputFilename
<< "'\n";
432 std::error_code ErrorCode
;
433 llvm::raw_fd_ostream
ClustersOS(OutputFilename
, ErrorCode
,
434 llvm::sys::fs::FA_Read
|
435 llvm::sys::fs::FA_Write
);
437 llvm::report_fatal_error("cannot open out file: " + OutputFilename
);
438 if (auto Err
= Analyzer
.run
<Pass
>(ClustersOS
))
439 llvm::report_fatal_error(std::move(Err
));
442 static void analysisMain() {
443 if (BenchmarkFile
.empty())
444 llvm::report_fatal_error("--benchmarks-file must be set.");
446 if (AnalysisClustersOutputFile
.empty() &&
447 AnalysisInconsistenciesOutputFile
.empty()) {
448 llvm::report_fatal_error(
449 "At least one of --analysis-clusters-output-file and "
450 "--analysis-inconsistencies-output-file must be specified.");
453 llvm::InitializeNativeTarget();
454 llvm::InitializeNativeTargetAsmPrinter();
455 llvm::InitializeNativeTargetDisassembler();
457 const LLVMState
State("");
458 const std::vector
<InstructionBenchmark
> Points
=
459 ExitOnErr(InstructionBenchmark::readYamls(State
, BenchmarkFile
));
460 llvm::outs() << "Parsed " << Points
.size() << " benchmark points\n";
461 if (Points
.empty()) {
462 llvm::errs() << "no benchmarks to analyze\n";
465 // FIXME: Check that all points have the same triple/cpu.
466 // FIXME: Merge points from several runs (latency and uops).
469 const auto *TheTarget
=
470 llvm::TargetRegistry::lookupTarget(Points
[0].LLVMTriple
, Error
);
472 llvm::errs() << "unknown target '" << Points
[0].LLVMTriple
<< "'\n";
476 std::unique_ptr
<llvm::MCInstrInfo
> InstrInfo(TheTarget
->createMCInstrInfo());
478 const auto Clustering
= ExitOnErr(InstructionBenchmarkClustering::create(
479 Points
, AnalysisClusteringAlgorithm
, AnalysisDbscanNumPoints
,
480 AnalysisClusteringEpsilon
, InstrInfo
->getNumOpcodes()));
482 const Analysis
Analyzer(*TheTarget
, std::move(InstrInfo
), Clustering
,
483 AnalysisInconsistencyEpsilon
,
484 AnalysisDisplayUnstableOpcodes
);
486 maybeRunAnalysis
<Analysis::PrintClusters
>(Analyzer
, "analysis clusters",
487 AnalysisClustersOutputFile
);
488 maybeRunAnalysis
<Analysis::PrintSchedClassInconsistencies
>(
489 Analyzer
, "sched class consistency analysis",
490 AnalysisInconsistenciesOutputFile
);
493 } // namespace exegesis
496 int main(int Argc
, char **Argv
) {
497 using namespace llvm
;
498 cl::ParseCommandLineOptions(Argc
, Argv
, "");
500 exegesis::ExitOnErr
.setExitCodeMapper([](const llvm::Error
&Err
) {
501 if (Err
.isA
<llvm::StringError
>())
506 if (exegesis::BenchmarkMode
== exegesis::InstructionBenchmark::Unknown
) {
507 exegesis::analysisMain();
509 exegesis::benchmarkMain();