1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Measures execution properties (latencies/uops) of an instruction.
12 //===----------------------------------------------------------------------===//
14 #include "lib/Analysis.h"
15 #include "lib/BenchmarkResult.h"
16 #include "lib/BenchmarkRunner.h"
17 #include "lib/Clustering.h"
18 #include "lib/LlvmState.h"
19 #include "lib/PerfHelper.h"
20 #include "lib/SnippetRepetitor.h"
21 #include "lib/Target.h"
22 #include "lib/TargetSelect.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/MC/MCInstBuilder.h"
26 #include "llvm/MC/MCObjectFileInfo.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCStreamer.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/Object/ObjectFile.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Format.h"
35 #include "llvm/Support/Path.h"
36 #include "llvm/Support/SourceMgr.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/TargetSelect.h"
45 static cl::OptionCategory
Options("llvm-exegesis options");
46 static cl::OptionCategory
BenchmarkOptions("llvm-exegesis benchmark options");
47 static cl::OptionCategory
AnalysisOptions("llvm-exegesis analysis options");
49 static cl::opt
<int> OpcodeIndex("opcode-index",
50 cl::desc("opcode to measure, by index"),
51 cl::cat(BenchmarkOptions
), cl::init(0));
53 static cl::opt
<std::string
>
54 OpcodeNames("opcode-name",
55 cl::desc("comma-separated list of opcodes to measure, by name"),
56 cl::cat(BenchmarkOptions
), cl::init(""));
58 static cl::opt
<std::string
> SnippetsFile("snippets-file",
59 cl::desc("code snippets to measure"),
60 cl::cat(BenchmarkOptions
),
63 static cl::opt
<std::string
>
64 BenchmarkFile("benchmarks-file",
65 cl::desc("File to read (analysis mode) or write "
66 "(latency/uops/inverse_throughput modes) benchmark "
67 "results. “-” uses stdin/stdout."),
68 cl::cat(Options
), cl::init(""));
70 static cl::opt
<exegesis::InstructionBenchmark::ModeE
> BenchmarkMode(
71 "mode", cl::desc("the mode to run"), cl::cat(Options
),
72 cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency
, "latency",
73 "Instruction Latency"),
74 clEnumValN(exegesis::InstructionBenchmark::InverseThroughput
,
76 "Instruction Inverse Throughput"),
77 clEnumValN(exegesis::InstructionBenchmark::Uops
, "uops",
79 // When not asking for a specific benchmark mode,
80 // we'll analyse the results.
81 clEnumValN(exegesis::InstructionBenchmark::Unknown
, "analysis",
84 static cl::opt
<exegesis::InstructionBenchmark::RepetitionModeE
> RepetitionMode(
85 "repetition-mode", cl::desc("how to repeat the instruction snippet"),
86 cl::cat(BenchmarkOptions
),
87 cl::values(clEnumValN(exegesis::InstructionBenchmark::Duplicate
,
88 "duplicate", "Duplicate the snippet"),
89 clEnumValN(exegesis::InstructionBenchmark::Loop
, "loop",
90 "Loop over the snippet")));
92 static cl::opt
<unsigned>
93 NumRepetitions("num-repetitions",
94 cl::desc("number of time to repeat the asm snippet"),
95 cl::cat(BenchmarkOptions
), cl::init(10000));
97 static cl::opt
<bool> IgnoreInvalidSchedClass(
98 "ignore-invalid-sched-class",
99 cl::desc("ignore instructions that do not define a sched class"),
100 cl::cat(BenchmarkOptions
), cl::init(false));
102 static cl::opt
<exegesis::InstructionBenchmarkClustering::ModeE
>
103 AnalysisClusteringAlgorithm(
104 "analysis-clustering", cl::desc("the clustering algorithm to use"),
105 cl::cat(AnalysisOptions
),
106 cl::values(clEnumValN(exegesis::InstructionBenchmarkClustering::Dbscan
,
107 "dbscan", "use DBSCAN/OPTICS algorithm"),
108 clEnumValN(exegesis::InstructionBenchmarkClustering::Naive
,
109 "naive", "one cluster per opcode")),
110 cl::init(exegesis::InstructionBenchmarkClustering::Dbscan
));
112 static cl::opt
<unsigned> AnalysisDbscanNumPoints(
113 "analysis-numpoints",
114 cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
115 cl::cat(AnalysisOptions
), cl::init(3));
117 static cl::opt
<float> AnalysisClusteringEpsilon(
118 "analysis-clustering-epsilon",
119 cl::desc("epsilon for benchmark point clustering"),
120 cl::cat(AnalysisOptions
), cl::init(0.1));
122 static cl::opt
<float> AnalysisInconsistencyEpsilon(
123 "analysis-inconsistency-epsilon",
124 cl::desc("epsilon for detection of when the cluster is different from the "
125 "LLVM schedule profile values"),
126 cl::cat(AnalysisOptions
), cl::init(0.1));
128 static cl::opt
<std::string
>
129 AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
130 cl::cat(AnalysisOptions
), cl::init(""));
131 static cl::opt
<std::string
>
132 AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
133 cl::desc(""), cl::cat(AnalysisOptions
),
136 static cl::opt
<bool> AnalysisDisplayUnstableOpcodes(
137 "analysis-display-unstable-clusters",
138 cl::desc("if there is more than one benchmark for an opcode, said "
139 "benchmarks may end up not being clustered into the same cluster "
140 "if the measured performance characteristics are different. by "
141 "default all such opcodes are filtered out. this flag will "
142 "instead show only such unstable opcodes"),
143 cl::cat(AnalysisOptions
), cl::init(false));
145 static cl::opt
<std::string
> CpuName(
147 cl::desc("cpu name to use for pfm counters, leave empty to autodetect"),
148 cl::cat(Options
), cl::init(""));
151 DumpObjectToDisk("dump-object-to-disk",
152 cl::desc("dumps the generated benchmark object to disk "
153 "and prints a message to access it"),
154 cl::cat(BenchmarkOptions
), cl::init(true));
156 static ExitOnError ExitOnErr
;
158 // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
159 // and returns the opcode indices or {} if snippets should be read from
161 static std::vector
<unsigned>
162 getOpcodesOrDie(const llvm::MCInstrInfo
&MCInstrInfo
) {
163 const size_t NumSetFlags
= (OpcodeNames
.empty() ? 0 : 1) +
164 (OpcodeIndex
== 0 ? 0 : 1) +
165 (SnippetsFile
.empty() ? 0 : 1);
166 if (NumSetFlags
!= 1)
167 llvm::report_fatal_error(
168 "please provide one and only one of 'opcode-index', 'opcode-name' or "
170 if (!SnippetsFile
.empty())
173 return {static_cast<unsigned>(OpcodeIndex
)};
174 if (OpcodeIndex
< 0) {
175 std::vector
<unsigned> Result
;
176 for (unsigned I
= 1, E
= MCInstrInfo
.getNumOpcodes(); I
< E
; ++I
)
180 // Resolve opcode name -> opcode.
181 const auto ResolveName
=
182 [&MCInstrInfo
](llvm::StringRef OpcodeName
) -> unsigned {
183 for (unsigned I
= 1, E
= MCInstrInfo
.getNumOpcodes(); I
< E
; ++I
)
184 if (MCInstrInfo
.getName(I
) == OpcodeName
)
188 llvm::SmallVector
<llvm::StringRef
, 2> Pieces
;
189 llvm::StringRef(OpcodeNames
.getValue())
190 .split(Pieces
, ",", /* MaxSplit */ -1, /* KeepEmpty */ false);
191 std::vector
<unsigned> Result
;
192 for (const llvm::StringRef OpcodeName
: Pieces
) {
193 if (unsigned Opcode
= ResolveName(OpcodeName
))
194 Result
.push_back(Opcode
);
196 llvm::report_fatal_error(
197 llvm::Twine("unknown opcode ").concat(OpcodeName
));
202 // Generates code snippets for opcode `Opcode`.
203 static llvm::Expected
<std::vector
<BenchmarkCode
>>
204 generateSnippets(const LLVMState
&State
, unsigned Opcode
,
205 const llvm::BitVector
&ForbiddenRegs
) {
206 const Instruction
&Instr
= State
.getIC().getInstr(Opcode
);
207 const llvm::MCInstrDesc
&InstrDesc
= *Instr
.Description
;
208 // Ignore instructions that we cannot run.
209 if (InstrDesc
.isPseudo())
210 return llvm::make_error
<BenchmarkFailure
>("Unsupported opcode: isPseudo");
211 if (InstrDesc
.isBranch() || InstrDesc
.isIndirectBranch())
212 return llvm::make_error
<BenchmarkFailure
>(
213 "Unsupported opcode: isBranch/isIndirectBranch");
214 if (InstrDesc
.isCall() || InstrDesc
.isReturn())
215 return llvm::make_error
<BenchmarkFailure
>(
216 "Unsupported opcode: isCall/isReturn");
218 const std::unique_ptr
<SnippetGenerator
> Generator
=
219 State
.getExegesisTarget().createSnippetGenerator(BenchmarkMode
, State
);
221 llvm::report_fatal_error("cannot create snippet generator");
222 return Generator
->generateConfigurations(Instr
, ForbiddenRegs
);
227 // An MCStreamer that reads a BenchmarkCode definition from a file.
228 // The BenchmarkCode definition is just an asm file, with additional comments to
229 // specify which registers should be defined or are live on entry.
230 class BenchmarkCodeStreamer
: public llvm::MCStreamer
,
231 public llvm::AsmCommentConsumer
{
233 explicit BenchmarkCodeStreamer(llvm::MCContext
*Context
,
234 const llvm::MCRegisterInfo
*TheRegInfo
,
235 BenchmarkCode
*Result
)
236 : llvm::MCStreamer(*Context
), RegInfo(TheRegInfo
), Result(Result
) {}
238 // Implementation of the llvm::MCStreamer interface. We only care about
240 void EmitInstruction(const llvm::MCInst
&Instruction
,
241 const llvm::MCSubtargetInfo
&STI
) override
{
242 Result
->Instructions
.push_back(Instruction
);
245 // Implementation of the llvm::AsmCommentConsumer.
246 void HandleComment(llvm::SMLoc Loc
, llvm::StringRef CommentText
) override
{
247 CommentText
= CommentText
.trim();
248 if (!CommentText
.consume_front("LLVM-EXEGESIS-"))
250 if (CommentText
.consume_front("DEFREG")) {
251 // LLVM-EXEGESIS-DEFREF <reg> <hex_value>
252 RegisterValue RegVal
;
253 llvm::SmallVector
<llvm::StringRef
, 2> Parts
;
254 CommentText
.split(Parts
, ' ', /*unlimited splits*/ -1,
255 /*do not keep empty strings*/ false);
256 if (Parts
.size() != 2) {
257 llvm::errs() << "invalid comment 'LLVM-EXEGESIS-DEFREG " << CommentText
261 if (!(RegVal
.Register
= findRegisterByName(Parts
[0].trim()))) {
262 llvm::errs() << "unknown register in 'LLVM-EXEGESIS-DEFREG "
263 << CommentText
<< "\n";
267 const llvm::StringRef HexValue
= Parts
[1].trim();
268 RegVal
.Value
= llvm::APInt(
269 /* each hex digit is 4 bits */ HexValue
.size() * 4, HexValue
, 16);
270 Result
->RegisterInitialValues
.push_back(std::move(RegVal
));
273 if (CommentText
.consume_front("LIVEIN")) {
274 // LLVM-EXEGESIS-LIVEIN <reg>
275 if (unsigned Reg
= findRegisterByName(CommentText
.ltrim()))
276 Result
->LiveIns
.push_back(Reg
);
278 llvm::errs() << "unknown register in 'LLVM-EXEGESIS-LIVEIN "
279 << CommentText
<< "\n";
286 unsigned numInvalidComments() const { return InvalidComments
; }
289 // We only care about instructions, we don't implement this part of the API.
290 void EmitCommonSymbol(llvm::MCSymbol
*Symbol
, uint64_t Size
,
291 unsigned ByteAlignment
) override
{}
292 bool EmitSymbolAttribute(llvm::MCSymbol
*Symbol
,
293 llvm::MCSymbolAttr Attribute
) override
{
296 void EmitValueToAlignment(unsigned ByteAlignment
, int64_t Value
,
298 unsigned MaxBytesToEmit
) override
{}
299 void EmitZerofill(llvm::MCSection
*Section
, llvm::MCSymbol
*Symbol
,
300 uint64_t Size
, unsigned ByteAlignment
,
301 llvm::SMLoc Loc
) override
{}
303 unsigned findRegisterByName(const llvm::StringRef RegName
) const {
304 // FIXME: Can we do better than this ?
305 for (unsigned I
= 0, E
= RegInfo
->getNumRegs(); I
< E
; ++I
) {
306 if (RegName
== RegInfo
->getName(I
))
309 llvm::errs() << "'" << RegName
310 << "' is not a valid register name for the target\n";
314 const llvm::MCRegisterInfo
*const RegInfo
;
315 BenchmarkCode
*const Result
;
316 unsigned InvalidComments
= 0;
321 // Reads code snippets from file `Filename`.
322 static llvm::Expected
<std::vector
<BenchmarkCode
>>
323 readSnippets(const LLVMState
&State
, llvm::StringRef Filename
) {
324 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> BufferPtr
=
325 llvm::MemoryBuffer::getFileOrSTDIN(Filename
);
326 if (std::error_code EC
= BufferPtr
.getError()) {
327 return llvm::make_error
<BenchmarkFailure
>(
328 "cannot read snippet: " + Filename
+ ": " + EC
.message());
331 SM
.AddNewSourceBuffer(std::move(BufferPtr
.get()), llvm::SMLoc());
333 BenchmarkCode Result
;
335 llvm::MCObjectFileInfo ObjectFileInfo
;
336 const llvm::TargetMachine
&TM
= State
.getTargetMachine();
337 llvm::MCContext
Context(TM
.getMCAsmInfo(), TM
.getMCRegisterInfo(),
339 ObjectFileInfo
.InitMCObjectFileInfo(TM
.getTargetTriple(), /*PIC*/ false,
341 BenchmarkCodeStreamer
Streamer(&Context
, TM
.getMCRegisterInfo(), &Result
);
342 const std::unique_ptr
<llvm::MCAsmParser
> AsmParser(
343 llvm::createMCAsmParser(SM
, Context
, Streamer
, *TM
.getMCAsmInfo()));
345 return llvm::make_error
<BenchmarkFailure
>("cannot create asm parser");
346 AsmParser
->getLexer().setCommentConsumer(&Streamer
);
348 const std::unique_ptr
<llvm::MCTargetAsmParser
> TargetAsmParser(
349 TM
.getTarget().createMCAsmParser(*TM
.getMCSubtargetInfo(), *AsmParser
,
350 *TM
.getMCInstrInfo(),
351 llvm::MCTargetOptions()));
353 if (!TargetAsmParser
)
354 return llvm::make_error
<BenchmarkFailure
>(
355 "cannot create target asm parser");
356 AsmParser
->setTargetParser(*TargetAsmParser
);
358 if (AsmParser
->Run(false))
359 return llvm::make_error
<BenchmarkFailure
>("cannot parse asm file");
360 if (Streamer
.numInvalidComments())
361 return llvm::make_error
<BenchmarkFailure
>(
362 llvm::Twine("found ")
363 .concat(llvm::Twine(Streamer
.numInvalidComments()))
364 .concat(" invalid LLVM-EXEGESIS comments"));
365 return std::vector
<BenchmarkCode
>{std::move(Result
)};
368 void benchmarkMain() {
370 llvm::report_fatal_error(
371 "benchmarking unavailable, LLVM was built without libpfm.");
374 if (exegesis::pfm::pfmInitialize())
375 llvm::report_fatal_error("cannot initialize libpfm");
377 llvm::InitializeNativeTarget();
378 llvm::InitializeNativeTargetAsmPrinter();
379 llvm::InitializeNativeTargetAsmParser();
380 InitializeNativeExegesisTarget();
382 const LLVMState
State(CpuName
);
383 const auto Opcodes
= getOpcodesOrDie(State
.getInstrInfo());
385 const auto Repetitor
= SnippetRepetitor::Create(RepetitionMode
, State
);
387 std::vector
<BenchmarkCode
> Configurations
;
388 if (!Opcodes
.empty()) {
389 for (const unsigned Opcode
: Opcodes
) {
390 // Ignore instructions without a sched class if
391 // -ignore-invalid-sched-class is passed.
392 if (IgnoreInvalidSchedClass
&&
393 State
.getInstrInfo().get(Opcode
).getSchedClass() == 0) {
394 llvm::errs() << State
.getInstrInfo().getName(Opcode
)
395 << ": ignoring instruction without sched class\n";
398 auto ConfigsForInstr
=
399 generateSnippets(State
, Opcode
, Repetitor
->getReservedRegs());
400 if (!ConfigsForInstr
) {
401 llvm::logAllUnhandledErrors(
402 ConfigsForInstr
.takeError(), llvm::errs(),
403 llvm::Twine(State
.getInstrInfo().getName(Opcode
)).concat(": "));
406 std::move(ConfigsForInstr
->begin(), ConfigsForInstr
->end(),
407 std::back_inserter(Configurations
));
410 Configurations
= ExitOnErr(readSnippets(State
, SnippetsFile
));
413 const std::unique_ptr
<BenchmarkRunner
> Runner
=
414 State
.getExegesisTarget().createBenchmarkRunner(BenchmarkMode
, State
);
416 llvm::report_fatal_error("cannot create benchmark runner");
419 if (NumRepetitions
== 0)
420 llvm::report_fatal_error("--num-repetitions must be greater than zero");
422 // Write to standard output if file is not set.
423 if (BenchmarkFile
.empty())
426 for (const BenchmarkCode
&Conf
: Configurations
) {
427 InstructionBenchmark Result
= Runner
->runConfiguration(
428 Conf
, NumRepetitions
, *Repetitor
, DumpObjectToDisk
);
429 ExitOnErr(Result
.writeYaml(State
, BenchmarkFile
));
431 exegesis::pfm::pfmTerminate();
434 // Prints the results of running analysis pass `Pass` to file `OutputFilename`
435 // if OutputFilename is non-empty.
436 template <typename Pass
>
437 static void maybeRunAnalysis(const Analysis
&Analyzer
, const std::string
&Name
,
438 const std::string
&OutputFilename
) {
439 if (OutputFilename
.empty())
441 if (OutputFilename
!= "-") {
442 llvm::errs() << "Printing " << Name
<< " results to file '"
443 << OutputFilename
<< "'\n";
445 std::error_code ErrorCode
;
446 llvm::raw_fd_ostream
ClustersOS(OutputFilename
, ErrorCode
,
447 llvm::sys::fs::FA_Read
|
448 llvm::sys::fs::FA_Write
);
450 llvm::report_fatal_error("cannot open out file: " + OutputFilename
);
451 if (auto Err
= Analyzer
.run
<Pass
>(ClustersOS
))
452 llvm::report_fatal_error(std::move(Err
));
455 static void analysisMain() {
456 if (BenchmarkFile
.empty())
457 llvm::report_fatal_error("--benchmarks-file must be set.");
459 if (AnalysisClustersOutputFile
.empty() &&
460 AnalysisInconsistenciesOutputFile
.empty()) {
461 llvm::report_fatal_error(
462 "At least one of --analysis-clusters-output-file and "
463 "--analysis-inconsistencies-output-file must be specified.");
466 llvm::InitializeNativeTarget();
467 llvm::InitializeNativeTargetAsmPrinter();
468 llvm::InitializeNativeTargetDisassembler();
470 const LLVMState
State("");
471 const std::vector
<InstructionBenchmark
> Points
=
472 ExitOnErr(InstructionBenchmark::readYamls(State
, BenchmarkFile
));
473 llvm::outs() << "Parsed " << Points
.size() << " benchmark points\n";
474 if (Points
.empty()) {
475 llvm::errs() << "no benchmarks to analyze\n";
478 // FIXME: Check that all points have the same triple/cpu.
479 // FIXME: Merge points from several runs (latency and uops).
482 const auto *TheTarget
=
483 llvm::TargetRegistry::lookupTarget(Points
[0].LLVMTriple
, Error
);
485 llvm::errs() << "unknown target '" << Points
[0].LLVMTriple
<< "'\n";
489 std::unique_ptr
<llvm::MCInstrInfo
> InstrInfo(TheTarget
->createMCInstrInfo());
491 const auto Clustering
= ExitOnErr(InstructionBenchmarkClustering::create(
492 Points
, AnalysisClusteringAlgorithm
, AnalysisDbscanNumPoints
,
493 AnalysisClusteringEpsilon
, InstrInfo
->getNumOpcodes()));
495 const Analysis
Analyzer(*TheTarget
, std::move(InstrInfo
), Clustering
,
496 AnalysisInconsistencyEpsilon
,
497 AnalysisDisplayUnstableOpcodes
);
499 maybeRunAnalysis
<Analysis::PrintClusters
>(Analyzer
, "analysis clusters",
500 AnalysisClustersOutputFile
);
501 maybeRunAnalysis
<Analysis::PrintSchedClassInconsistencies
>(
502 Analyzer
, "sched class consistency analysis",
503 AnalysisInconsistenciesOutputFile
);
506 } // namespace exegesis
509 int main(int Argc
, char **Argv
) {
510 using namespace llvm
;
511 cl::ParseCommandLineOptions(Argc
, Argv
, "");
513 exegesis::ExitOnErr
.setExitCodeMapper([](const llvm::Error
&Err
) {
514 if (Err
.isA
<llvm::StringError
>())
519 if (exegesis::BenchmarkMode
== exegesis::InstructionBenchmark::Unknown
) {
520 exegesis::analysisMain();
522 exegesis::benchmarkMain();