[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / llvm / tools / llvm-exegesis / lib / ParallelSnippetGenerator.cpp
blob7728fcb5d62eabd82a727cbe531dd970c75ccd82
1 //===-- ParallelSnippetGenerator.cpp ----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "ParallelSnippetGenerator.h"
11 #include "BenchmarkRunner.h"
12 #include "MCInstrDescView.h"
13 #include "Target.h"
15 // FIXME: Load constants into registers (e.g. with fld1) to not break
16 // instructions like x87.
18 // Ideally we would like the only limitation on executing instructions to be the
19 // availability of the CPU resources (e.g. execution ports) needed to execute
20 // them, instead of the availability of their data dependencies.
22 // To achieve that, one approach is to generate instructions that do not have
23 // data dependencies between them.
25 // For some instructions, this is trivial:
26 // mov rax, qword ptr [rsi]
27 // mov rax, qword ptr [rsi]
28 // mov rax, qword ptr [rsi]
29 // mov rax, qword ptr [rsi]
30 // For the above snippet, haswell just renames rax four times and executes the
31 // four instructions two at a time on P23 and P0126.
33 // For some instructions, we just need to make sure that the source is
34 // different from the destination. For example, IDIV8r reads from GPR and
35 // writes to AX. We just need to ensure that the Var is assigned a
36 // register which is different from AX:
37 // idiv bx
38 // idiv bx
39 // idiv bx
40 // idiv bx
41 // The above snippet will be able to fully saturate the ports, while the same
42 // with ax would issue one uop every `latency(IDIV8r)` cycles.
44 // Some instructions make this harder because they both read and write from
45 // the same register:
46 // inc rax
47 // inc rax
48 // inc rax
49 // inc rax
50 // This has a data dependency from each instruction to the next, limit the
51 // number of instructions that can be issued in parallel.
52 // It turns out that this is not a big issue on recent Intel CPUs because they
53 // have heuristics to balance port pressure. In the snippet above, subsequent
54 // instructions will end up evenly distributed on {P0,P1,P5,P6}, but some CPUs
55 // might end up executing them all on P0 (just because they can), or try
56 // avoiding P5 because it's usually under high pressure from vector
57 // instructions.
58 // This issue is even more important for high-latency instructions because
59 // they increase the idle time of the CPU, e.g. :
60 // imul rax, rbx
61 // imul rax, rbx
62 // imul rax, rbx
63 // imul rax, rbx
65 // To avoid that, we do the renaming statically by generating as many
66 // independent exclusive assignments as possible (until all possible registers
67 // are exhausted) e.g.:
68 // imul rax, rbx
69 // imul rcx, rbx
70 // imul rdx, rbx
71 // imul r8, rbx
73 // Some instruction even make the above static renaming impossible because
74 // they implicitly read and write from the same operand, e.g. ADC16rr reads
75 // and writes from EFLAGS.
76 // In that case we just use a greedy register assignment and hope for the
77 // best.
79 namespace llvm {
80 namespace exegesis {
82 static SmallVector<const Variable *, 8>
83 getVariablesWithTiedOperands(const Instruction &Instr) {
84 SmallVector<const Variable *, 8> Result;
85 for (const auto &Var : Instr.Variables)
86 if (Var.hasTiedOperands())
87 Result.push_back(&Var);
88 return Result;
91 ParallelSnippetGenerator::~ParallelSnippetGenerator() = default;
93 void ParallelSnippetGenerator::instantiateMemoryOperands(
94 const unsigned ScratchSpacePointerInReg,
95 std::vector<InstructionTemplate> &Instructions) const {
96 if (ScratchSpacePointerInReg == 0)
97 return; // no memory operands.
98 const auto &ET = State.getExegesisTarget();
99 const unsigned MemStep = ET.getMaxMemoryAccessSize();
100 const size_t OriginalInstructionsSize = Instructions.size();
101 size_t I = 0;
102 for (InstructionTemplate &IT : Instructions) {
103 ET.fillMemoryOperands(IT, ScratchSpacePointerInReg, I * MemStep);
104 ++I;
107 while (Instructions.size() < kMinNumDifferentAddresses) {
108 InstructionTemplate IT = Instructions[I % OriginalInstructionsSize];
109 ET.fillMemoryOperands(IT, ScratchSpacePointerInReg, I * MemStep);
110 ++I;
111 Instructions.push_back(std::move(IT));
113 assert(I * MemStep < BenchmarkRunner::ScratchSpace::kSize &&
114 "not enough scratch space");
117 static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
118 const LLVMState &State, const InstructionTemplate &IT,
119 const ArrayRef<const Variable *> TiedVariables,
120 const BitVector &ForbiddenRegisters) {
121 std::vector<InstructionTemplate> Instructions;
122 // Assign registers to variables in a round-robin manner. This is simple but
123 // ensures that the most register-constrained variable does not get starved.
124 std::vector<BitVector> PossibleRegsForVar;
125 for (const Variable *Var : TiedVariables) {
126 assert(Var);
127 const Operand &Op = IT.getInstr().getPrimaryOperand(*Var);
128 assert(Op.isReg());
129 BitVector PossibleRegs = Op.getRegisterAliasing().sourceBits();
130 remove(PossibleRegs, ForbiddenRegisters);
131 PossibleRegsForVar.push_back(std::move(PossibleRegs));
133 SmallVector<int, 2> Iterators(TiedVariables.size(), 0);
134 while (true) {
135 InstructionTemplate TmpIT = IT;
136 // Find a possible register for each variable in turn, marking the
137 // register as taken.
138 for (size_t VarId = 0; VarId < TiedVariables.size(); ++VarId) {
139 const int NextPossibleReg =
140 PossibleRegsForVar[VarId].find_next(Iterators[VarId]);
141 if (NextPossibleReg <= 0) {
142 return Instructions;
144 TmpIT.getValueFor(*TiedVariables[VarId]) =
145 MCOperand::createReg(NextPossibleReg);
146 // Bump iterator.
147 Iterators[VarId] = NextPossibleReg;
148 // Prevent other variables from using the register.
149 for (BitVector &OtherPossibleRegs : PossibleRegsForVar) {
150 OtherPossibleRegs.reset(NextPossibleReg);
153 Instructions.push_back(std::move(TmpIT));
157 Expected<std::vector<CodeTemplate>>
158 ParallelSnippetGenerator::generateCodeTemplates(
159 InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
160 const Instruction &Instr = Variant.getInstr();
161 CodeTemplate CT;
162 CT.ScratchSpacePointerInReg =
163 Instr.hasMemoryOperands()
164 ? State.getExegesisTarget().getScratchMemoryRegister(
165 State.getTargetMachine().getTargetTriple())
166 : 0;
167 const AliasingConfigurations SelfAliasing(Instr, Instr);
168 if (SelfAliasing.empty()) {
169 CT.Info = "instruction is parallel, repeating a random one.";
170 CT.Instructions.push_back(std::move(Variant));
171 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
172 return getSingleton(std::move(CT));
174 if (SelfAliasing.hasImplicitAliasing()) {
175 CT.Info = "instruction is serial, repeating a random one.";
176 CT.Instructions.push_back(std::move(Variant));
177 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
178 return getSingleton(std::move(CT));
180 const auto TiedVariables = getVariablesWithTiedOperands(Instr);
181 if (!TiedVariables.empty()) {
182 CT.Info = "instruction has tied variables, using static renaming.";
183 CT.Instructions = generateSnippetUsingStaticRenaming(
184 State, Variant, TiedVariables, ForbiddenRegisters);
185 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
186 return getSingleton(std::move(CT));
188 // No tied variables, we pick random values for defs.
190 // We don't want to accidentally serialize the instruction,
191 // so we must be sure that we don't pick a def that is an implicit use,
192 // or a use that is an implicit def, so record implicit regs now.
193 BitVector ImplicitUses(State.getRegInfo().getNumRegs());
194 BitVector ImplicitDefs(State.getRegInfo().getNumRegs());
195 for (const auto &Op : Instr.Operands) {
196 if (Op.isReg() && Op.isImplicit() && !Op.isMemory()) {
197 assert(Op.isImplicitReg() && "Not an implicit register operand?");
198 if (Op.isUse())
199 ImplicitUses.set(Op.getImplicitReg());
200 else {
201 assert(Op.isDef() && "Not a use and not a def?");
202 ImplicitDefs.set(Op.getImplicitReg());
206 const auto ImplicitUseAliases =
207 getAliasedBits(State.getRegInfo(), ImplicitUses);
208 const auto ImplicitDefAliases =
209 getAliasedBits(State.getRegInfo(), ImplicitDefs);
210 BitVector Defs(State.getRegInfo().getNumRegs());
211 for (const auto &Op : Instr.Operands) {
212 if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) {
213 auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
214 // Do not use forbidden registers and regs that are implicitly used.
215 // Note that we don't try to avoid using implicit defs explicitly.
216 remove(PossibleRegisters, ForbiddenRegisters);
217 remove(PossibleRegisters, ImplicitUseAliases);
218 if (!PossibleRegisters.any())
219 return make_error<StringError>(
220 Twine("no available registers:\ncandidates:\n")
221 .concat(debugString(State.getRegInfo(),
222 Op.getRegisterAliasing().sourceBits()))
223 .concat("\nforbidden:\n")
224 .concat(debugString(State.getRegInfo(), ForbiddenRegisters))
225 .concat("\nimplicit use:\n")
226 .concat(debugString(State.getRegInfo(), ImplicitUseAliases)),
227 inconvertibleErrorCode());
228 const auto RandomReg = randomBit(PossibleRegisters);
229 Defs.set(RandomReg);
230 Variant.getValueFor(Op) = MCOperand::createReg(RandomReg);
233 // And pick random use values that are not reserved and don't alias with defs.
234 // Note that we don't try to avoid using implicit uses explicitly.
235 const auto DefAliases = getAliasedBits(State.getRegInfo(), Defs);
236 for (const auto &Op : Instr.Operands) {
237 if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) {
238 auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
239 remove(PossibleRegisters, ForbiddenRegisters);
240 remove(PossibleRegisters, DefAliases);
241 remove(PossibleRegisters, ImplicitDefAliases);
242 assert(PossibleRegisters.any() && "No register left to choose from");
243 const auto RandomReg = randomBit(PossibleRegisters);
244 Variant.getValueFor(Op) = MCOperand::createReg(RandomReg);
247 CT.Info =
248 "instruction has no tied variables picking Uses different from defs";
249 CT.Instructions.push_back(std::move(Variant));
250 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
251 return getSingleton(std::move(CT));
254 constexpr const size_t ParallelSnippetGenerator::kMinNumDifferentAddresses;
256 } // namespace exegesis
257 } // namespace llvm