Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / tools / llvm-exegesis / lib / Target.h
blob6de5b3c1065f1aab5abcc023e332437383771c0d
1 //===-- Target.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// Classes that handle the creation of target-specific objects. This is
12 /// similar to Target/TargetRegistry.
13 ///
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H
17 #define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H
19 #include "BenchmarkResult.h"
20 #include "BenchmarkRunner.h"
21 #include "Error.h"
22 #include "LlvmState.h"
23 #include "PerfHelper.h"
24 #include "SnippetGenerator.h"
25 #include "llvm/CodeGen/TargetPassConfig.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/LegacyPassManager.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/TargetParser/SubtargetFeature.h"
33 #include "llvm/TargetParser/Triple.h"
35 namespace llvm {
36 namespace exegesis {
38 extern cl::OptionCategory Options;
39 extern cl::OptionCategory BenchmarkOptions;
40 extern cl::OptionCategory AnalysisOptions;
42 struct PfmCountersInfo {
43 // An optional name of a performance counter that can be used to measure
44 // cycles.
45 const char *CycleCounter;
47 // An optional name of a performance counter that can be used to measure
48 // uops.
49 const char *UopsCounter;
51 // An IssueCounter specifies how to measure uops issued to specific proc
52 // resources.
53 struct IssueCounter {
54 const char *Counter;
55 // The name of the ProcResource that this counter measures.
56 const char *ProcResName;
58 // An optional list of IssueCounters.
59 const IssueCounter *IssueCounters;
60 unsigned NumIssueCounters;
62 static const PfmCountersInfo Default;
63 static const PfmCountersInfo Dummy;
66 struct CpuAndPfmCounters {
67 const char *CpuName;
68 const PfmCountersInfo *PCI;
69 bool operator<(StringRef S) const { return StringRef(CpuName) < S; }
72 class ExegesisTarget {
73 public:
74 typedef bool (*OpcodeAvailabilityChecker)(unsigned, const FeatureBitset &);
75 ExegesisTarget(ArrayRef<CpuAndPfmCounters> CpuPfmCounters,
76 OpcodeAvailabilityChecker IsOpcodeAvailable)
77 : CpuPfmCounters(CpuPfmCounters), IsOpcodeAvailable(IsOpcodeAvailable) {}
79 // Targets can use this to create target-specific perf counters.
80 virtual Expected<std::unique_ptr<pfm::Counter>>
81 createCounter(StringRef CounterName, const LLVMState &State,
82 const pid_t ProcessID = 0) const;
84 // Targets can use this to add target-specific passes in assembleToStream();
85 virtual void addTargetSpecificPasses(PassManagerBase &PM) const {}
87 // Generates code to move a constant into a the given register.
88 // Precondition: Value must fit into Reg.
89 virtual std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg,
90 const APInt &Value) const = 0;
92 // Generates the code for the lower munmap call. The code generated by this
93 // function may clobber registers.
94 virtual void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const {
95 report_fatal_error(
96 "generateLowerMunmap is not implemented on the current architecture");
99 // Generates the upper munmap call. The code generated by this function may
100 // clobber registers.
101 virtual void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const {
102 report_fatal_error(
103 "generateUpperMunmap is not implemented on the current architecture");
106 // Generates the code for an exit syscall. The code generated by this function
107 // may clobber registers.
108 virtual std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const {
109 report_fatal_error(
110 "generateExitSyscall is not implemented on the current architecture");
113 // Generates the code to mmap a region of code. The code generated by this
114 // function may clobber registers.
115 virtual std::vector<MCInst>
116 generateMmap(intptr_t Address, size_t Length,
117 intptr_t FileDescriptorAddress) const {
118 report_fatal_error(
119 "generateMmap is not implemented on the current architecture");
122 // Generates the mmap code for the aux memory. The code generated by this
123 // function may clobber registers.
124 virtual void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const {
125 report_fatal_error(
126 "generateMmapAuxMem is not implemented on the current architecture\n");
129 // Moves argument registers into other registers that won't get clobbered
130 // while making syscalls. The code generated by this function may clobber
131 // registers.
132 virtual void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const {
133 report_fatal_error("moveArgumentRegisters is not implemented on the "
134 "current architecture\n");
137 // Generates code to move argument registers, unmap memory above and below the
138 // snippet, and map the auxiliary memory into the subprocess. The code
139 // generated by this function may clobber registers.
140 virtual std::vector<MCInst> generateMemoryInitialSetup() const {
141 report_fatal_error("generateMemoryInitialSetup is not supported on the "
142 "current architecture\n");
145 // Returns true if all features are available that are required by Opcode.
146 virtual bool isOpcodeAvailable(unsigned Opcode,
147 const FeatureBitset &Features) const {
148 return IsOpcodeAvailable(Opcode, Features);
151 // Sets the stack register to the auxiliary memory so that operations
152 // requiring the stack can be formed (e.g., setting large registers). The code
153 // generated by this function may clobber registers.
154 virtual std::vector<MCInst> setStackRegisterToAuxMem() const {
155 report_fatal_error("setStackRegisterToAuxMem is not implemented on the "
156 "current architectures");
159 virtual intptr_t getAuxiliaryMemoryStartAddress() const {
160 report_fatal_error("getAuxiliaryMemoryStartAddress is not implemented on "
161 "the current architecture");
164 // Generates the necessary ioctl system calls to configure the perf counters.
165 // The code generated by this function preserves all registers if the
166 // parameter SaveRegisters is set to true.
167 virtual std::vector<MCInst> configurePerfCounter(long Request,
168 bool SaveRegisters) const {
169 report_fatal_error(
170 "configurePerfCounter is not implemented on the current architecture");
173 // Gets the ABI dependent registers that are used to pass arguments in a
174 // function call.
175 virtual std::vector<unsigned> getArgumentRegisters() const {
176 report_fatal_error(
177 "getArgumentRegisters is not implemented on the current architecture");
180 // Gets the registers that might potentially need to be saved by while
181 // the setup in the test harness executes.
182 virtual std::vector<unsigned> getRegistersNeedSaving() const {
183 report_fatal_error("getRegistersNeedSaving is not implemented on the "
184 "current architecture");
187 // Returns the register pointing to scratch memory, or 0 if this target
188 // does not support memory operands. The benchmark function uses the
189 // default calling convention.
190 virtual unsigned getScratchMemoryRegister(const Triple &) const { return 0; }
192 // Fills memory operands with references to the address at [Reg] + Offset.
193 virtual void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
194 unsigned Offset) const {
195 llvm_unreachable(
196 "fillMemoryOperands() requires getScratchMemoryRegister() > 0");
199 // Returns a counter usable as a loop counter.
200 virtual unsigned getLoopCounterRegister(const Triple &) const { return 0; }
202 // Adds the code to decrement the loop counter and
203 virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
204 MachineBasicBlock &TargetMBB,
205 const MCInstrInfo &MII) const {
206 llvm_unreachable("decrementLoopCounterAndBranch() requires "
207 "getLoopCounterRegister() > 0");
210 // Returns a list of unavailable registers.
211 // Targets can use this to prevent some registers to be automatically selected
212 // for use in snippets.
213 virtual ArrayRef<unsigned> getUnavailableRegisters() const { return {}; }
215 // Returns the maximum number of bytes a load/store instruction can access at
216 // once. This is typically the size of the largest register available on the
217 // processor. Note that this only used as a hint to generate independant
218 // load/stores to/from memory, so the exact returned value does not really
219 // matter as long as it's large enough.
220 virtual unsigned getMaxMemoryAccessSize() const { return 0; }
222 // Assigns a random operand of the right type to variable Var.
223 // The target is responsible for handling any operand starting from
224 // OPERAND_FIRST_TARGET.
225 virtual Error randomizeTargetMCOperand(const Instruction &Instr,
226 const Variable &Var,
227 MCOperand &AssignedValue,
228 const BitVector &ForbiddenRegs) const {
229 return make_error<Failure>(
230 "targets with target-specific operands should implement this");
233 // Returns true if this instruction is supported as a back-to-back
234 // instructions.
235 // FIXME: Eventually we should discover this dynamically.
236 virtual bool allowAsBackToBack(const Instruction &Instr) const {
237 return true;
240 // For some instructions, it is interesting to measure how it's performance
241 // characteristics differ depending on it's operands.
242 // This allows us to produce all the interesting variants.
243 virtual std::vector<InstructionTemplate>
244 generateInstructionVariants(const Instruction &Instr,
245 unsigned MaxConfigsPerOpcode) const {
246 // By default, we're happy with whatever randomizer will give us.
247 return {&Instr};
250 // Checks hardware and software support for current benchmark mode.
251 // Returns an error if the target host does not have support to run the
252 // benchmark.
253 virtual Error checkFeatureSupport() const { return Error::success(); }
255 // Creates a snippet generator for the given mode.
256 std::unique_ptr<SnippetGenerator>
257 createSnippetGenerator(Benchmark::ModeE Mode,
258 const LLVMState &State,
259 const SnippetGenerator::Options &Opts) const;
260 // Creates a benchmark runner for the given mode.
261 Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner(
262 Benchmark::ModeE Mode, const LLVMState &State,
263 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
264 BenchmarkRunner::ExecutionModeE ExecutionMode,
265 Benchmark::ResultAggregationModeE ResultAggMode = Benchmark::Min) const;
267 // Returns the ExegesisTarget for the given triple or nullptr if the target
268 // does not exist.
269 static const ExegesisTarget *lookup(Triple TT);
270 // Returns the default (unspecialized) ExegesisTarget.
271 static const ExegesisTarget &getDefault();
272 // Registers a target. Not thread safe.
273 static void registerTarget(ExegesisTarget *T);
275 virtual ~ExegesisTarget();
277 // Returns the Pfm counters for the given CPU (or the default if no pfm
278 // counters are defined for this CPU).
279 const PfmCountersInfo &getPfmCounters(StringRef CpuName) const;
281 // Returns dummy Pfm counters which can be used to execute generated snippet
282 // without access to performance counters.
283 const PfmCountersInfo &getDummyPfmCounters() const;
285 // Saves the CPU state that needs to be preserved when running a benchmark,
286 // and returns and RAII object that restores the state on destruction.
287 // By default no state is preserved.
288 struct SavedState {
289 virtual ~SavedState();
291 virtual std::unique_ptr<SavedState> withSavedState() const {
292 return std::make_unique<SavedState>();
295 private:
296 virtual bool matchesArch(Triple::ArchType Arch) const = 0;
298 // Targets can implement their own snippet generators/benchmarks runners by
299 // implementing these.
300 std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator(
301 const LLVMState &State, const SnippetGenerator::Options &Opts) const;
302 std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
303 const LLVMState &State, const SnippetGenerator::Options &Opts) const;
304 std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
305 const LLVMState &State, Benchmark::ModeE Mode,
306 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
307 Benchmark::ResultAggregationModeE ResultAggMode,
308 BenchmarkRunner::ExecutionModeE ExecutionMode) const;
309 std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
310 const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
311 Benchmark::ResultAggregationModeE ResultAggMode,
312 BenchmarkRunner::ExecutionModeE ExecutionMode) const;
314 const ExegesisTarget *Next = nullptr;
315 const ArrayRef<CpuAndPfmCounters> CpuPfmCounters;
316 const OpcodeAvailabilityChecker IsOpcodeAvailable;
319 } // namespace exegesis
320 } // namespace llvm
322 #endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H