[llvm] Stop including unordered_map (NFC)
[llvm-project.git] / llvm / tools / llvm-exegesis / lib / X86 / Target.cpp
blobd025fe955be51572b333732a7dbdda53d3e7e74a
1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "../Target.h"
10 #include "../Error.h"
11 #include "../ParallelSnippetGenerator.h"
12 #include "../SerialSnippetGenerator.h"
13 #include "../SnippetGenerator.h"
14 #include "../SubprocessMemory.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86MCTargetDesc.h"
17 #include "X86.h"
18 #include "X86Counter.h"
19 #include "X86RegisterInfo.h"
20 #include "llvm/ADT/Sequence.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/MC/MCInstBuilder.h"
23 #include "llvm/Support/Errc.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/FormatVariadic.h"
27 #include "llvm/TargetParser/Host.h"
29 #include <memory>
30 #include <string>
31 #include <vector>
32 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
33 #include <immintrin.h>
34 #include <intrin.h>
35 #endif
36 #if defined(_MSC_VER) && defined(_M_X64)
37 #include <float.h> // For _clearfp in ~X86SavedState().
38 #endif
40 #ifdef __linux__
41 #include <sys/mman.h>
42 #include <sys/syscall.h>
43 #include <unistd.h>
44 #endif
46 #define GET_AVAILABLE_OPCODE_CHECKER
47 #include "X86GenInstrInfo.inc"
49 namespace llvm {
50 namespace exegesis {
52 // If a positive value is specified, we are going to use the LBR in
53 // latency-mode.
55 // Note:
56 // - A small value is preferred, but too low a value could result in
57 // throttling.
58 // - A prime number is preferred to avoid always skipping certain blocks.
60 static cl::opt<unsigned> LbrSamplingPeriod(
61 "x86-lbr-sample-period",
62 cl::desc("The sample period (nbranches/sample), used for LBR sampling"),
63 cl::cat(BenchmarkOptions), cl::init(0));
65 static cl::opt<bool>
66 DisableUpperSSERegisters("x86-disable-upper-sse-registers",
67 cl::desc("Disable XMM8-XMM15 register usage"),
68 cl::cat(BenchmarkOptions), cl::init(false));
70 // FIXME: Validates that repetition-mode is loop if LBR is requested.
72 // Returns a non-null reason if we cannot handle the memory references in this
73 // instruction.
74 static const char *isInvalidMemoryInstr(const Instruction &Instr) {
75 switch (Instr.Description.TSFlags & X86II::FormMask) {
76 default:
77 return "Unknown FormMask value";
78 // These have no memory access.
79 case X86II::Pseudo:
80 case X86II::RawFrm:
81 case X86II::AddCCFrm:
82 case X86II::PrefixByte:
83 case X86II::MRMDestReg:
84 case X86II::MRMSrcReg:
85 case X86II::MRMSrcReg4VOp3:
86 case X86II::MRMSrcRegOp4:
87 case X86II::MRMSrcRegCC:
88 case X86II::MRMXrCC:
89 case X86II::MRMr0:
90 case X86II::MRMXr:
91 case X86II::MRM0r:
92 case X86II::MRM1r:
93 case X86II::MRM2r:
94 case X86II::MRM3r:
95 case X86II::MRM4r:
96 case X86II::MRM5r:
97 case X86II::MRM6r:
98 case X86II::MRM7r:
99 case X86II::MRM0X:
100 case X86II::MRM1X:
101 case X86II::MRM2X:
102 case X86II::MRM3X:
103 case X86II::MRM4X:
104 case X86II::MRM5X:
105 case X86II::MRM6X:
106 case X86II::MRM7X:
107 case X86II::MRM_C0:
108 case X86II::MRM_C1:
109 case X86II::MRM_C2:
110 case X86II::MRM_C3:
111 case X86II::MRM_C4:
112 case X86II::MRM_C5:
113 case X86II::MRM_C6:
114 case X86II::MRM_C7:
115 case X86II::MRM_C8:
116 case X86II::MRM_C9:
117 case X86II::MRM_CA:
118 case X86II::MRM_CB:
119 case X86II::MRM_CC:
120 case X86II::MRM_CD:
121 case X86II::MRM_CE:
122 case X86II::MRM_CF:
123 case X86II::MRM_D0:
124 case X86II::MRM_D1:
125 case X86II::MRM_D2:
126 case X86II::MRM_D3:
127 case X86II::MRM_D4:
128 case X86II::MRM_D5:
129 case X86II::MRM_D6:
130 case X86II::MRM_D7:
131 case X86II::MRM_D8:
132 case X86II::MRM_D9:
133 case X86II::MRM_DA:
134 case X86II::MRM_DB:
135 case X86II::MRM_DC:
136 case X86II::MRM_DD:
137 case X86II::MRM_DE:
138 case X86II::MRM_DF:
139 case X86II::MRM_E0:
140 case X86II::MRM_E1:
141 case X86II::MRM_E2:
142 case X86II::MRM_E3:
143 case X86II::MRM_E4:
144 case X86II::MRM_E5:
145 case X86II::MRM_E6:
146 case X86II::MRM_E7:
147 case X86II::MRM_E8:
148 case X86II::MRM_E9:
149 case X86II::MRM_EA:
150 case X86II::MRM_EB:
151 case X86II::MRM_EC:
152 case X86II::MRM_ED:
153 case X86II::MRM_EE:
154 case X86II::MRM_EF:
155 case X86II::MRM_F0:
156 case X86II::MRM_F1:
157 case X86II::MRM_F2:
158 case X86II::MRM_F3:
159 case X86II::MRM_F4:
160 case X86II::MRM_F5:
161 case X86II::MRM_F6:
162 case X86II::MRM_F7:
163 case X86II::MRM_F8:
164 case X86II::MRM_F9:
165 case X86II::MRM_FA:
166 case X86II::MRM_FB:
167 case X86II::MRM_FC:
168 case X86II::MRM_FD:
169 case X86II::MRM_FE:
170 case X86II::MRM_FF:
171 case X86II::RawFrmImm8:
172 return nullptr;
173 case X86II::AddRegFrm:
174 return (Instr.Description.Opcode == X86::POP16r ||
175 Instr.Description.Opcode == X86::POP32r ||
176 Instr.Description.Opcode == X86::PUSH16r ||
177 Instr.Description.Opcode == X86::PUSH32r)
178 ? "unsupported opcode: unsupported memory access"
179 : nullptr;
180 // These access memory and are handled.
181 case X86II::MRMDestMem:
182 case X86II::MRMSrcMem:
183 case X86II::MRMSrcMem4VOp3:
184 case X86II::MRMSrcMemOp4:
185 case X86II::MRMSrcMemCC:
186 case X86II::MRMXmCC:
187 case X86II::MRMXm:
188 case X86II::MRM0m:
189 case X86II::MRM1m:
190 case X86II::MRM2m:
191 case X86II::MRM3m:
192 case X86II::MRM4m:
193 case X86II::MRM5m:
194 case X86II::MRM6m:
195 case X86II::MRM7m:
196 return nullptr;
197 // These access memory and are not handled yet.
198 case X86II::RawFrmImm16:
199 case X86II::RawFrmMemOffs:
200 case X86II::RawFrmSrc:
201 case X86II::RawFrmDst:
202 case X86II::RawFrmDstSrc:
203 return "unsupported opcode: non uniform memory access";
207 // If the opcode is invalid, returns a pointer to a character literal indicating
208 // the reason. nullptr indicates a valid opcode.
209 static const char *isInvalidOpcode(const Instruction &Instr) {
210 const auto OpcodeName = Instr.Name;
211 if ((Instr.Description.TSFlags & X86II::FormMask) == X86II::Pseudo)
212 return "unsupported opcode: pseudo instruction";
213 if ((OpcodeName.startswith("POP") && !OpcodeName.startswith("POPCNT")) ||
214 OpcodeName.startswith("PUSH") || OpcodeName.startswith("ADJCALLSTACK") ||
215 OpcodeName.startswith("LEAVE"))
216 return "unsupported opcode: Push/Pop/AdjCallStack/Leave";
217 switch (Instr.Description.Opcode) {
218 case X86::LFS16rm:
219 case X86::LFS32rm:
220 case X86::LFS64rm:
221 case X86::LGS16rm:
222 case X86::LGS32rm:
223 case X86::LGS64rm:
224 case X86::LSS16rm:
225 case X86::LSS32rm:
226 case X86::LSS64rm:
227 case X86::SYSENTER:
228 case X86::WRFSBASE:
229 case X86::WRFSBASE64:
230 return "unsupported opcode";
231 default:
232 break;
234 if (const auto reason = isInvalidMemoryInstr(Instr))
235 return reason;
236 // We do not handle instructions with OPERAND_PCREL.
237 for (const Operand &Op : Instr.Operands)
238 if (Op.isExplicit() &&
239 Op.getExplicitOperandInfo().OperandType == MCOI::OPERAND_PCREL)
240 return "unsupported opcode: PC relative operand";
241 // We do not handle second-form X87 instructions. We only handle first-form
242 // ones (_Fp), see comment in X86InstrFPStack.td.
243 for (const Operand &Op : Instr.Operands)
244 if (Op.isReg() && Op.isExplicit() &&
245 Op.getExplicitOperandInfo().RegClass == X86::RSTRegClassID)
246 return "unsupported second-form X87 instruction";
247 return nullptr;
250 static unsigned getX86FPFlags(const Instruction &Instr) {
251 return Instr.Description.TSFlags & X86II::FPTypeMask;
254 // Helper to fill a memory operand with a value.
255 static void setMemOp(InstructionTemplate &IT, int OpIdx,
256 const MCOperand &OpVal) {
257 const auto Op = IT.getInstr().Operands[OpIdx];
258 assert(Op.isExplicit() && "invalid memory pattern");
259 IT.getValueFor(Op) = OpVal;
262 // Common (latency, uops) code for LEA templates. `GetDestReg` takes the
263 // addressing base and index registers and returns the LEA destination register.
264 static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon(
265 const Instruction &Instr, const BitVector &ForbiddenRegisters,
266 const LLVMState &State, const SnippetGenerator::Options &Opts,
267 std::function<void(unsigned, unsigned, BitVector &CandidateDestRegs)>
268 RestrictDestRegs) {
269 assert(Instr.Operands.size() == 6 && "invalid LEA");
270 assert(X86II::getMemoryOperandNo(Instr.Description.TSFlags) == 1 &&
271 "invalid LEA");
273 constexpr const int kDestOp = 0;
274 constexpr const int kBaseOp = 1;
275 constexpr const int kIndexOp = 3;
276 auto PossibleDestRegs =
277 Instr.Operands[kDestOp].getRegisterAliasing().sourceBits();
278 remove(PossibleDestRegs, ForbiddenRegisters);
279 auto PossibleBaseRegs =
280 Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits();
281 remove(PossibleBaseRegs, ForbiddenRegisters);
282 auto PossibleIndexRegs =
283 Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits();
284 remove(PossibleIndexRegs, ForbiddenRegisters);
286 const auto &RegInfo = State.getRegInfo();
287 std::vector<CodeTemplate> Result;
288 for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) {
289 for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) {
290 for (int LogScale = 0; LogScale <= 3; ++LogScale) {
291 // FIXME: Add an option for controlling how we explore immediates.
292 for (const int Disp : {0, 42}) {
293 InstructionTemplate IT(&Instr);
294 const int64_t Scale = 1ull << LogScale;
295 setMemOp(IT, 1, MCOperand::createReg(BaseReg));
296 setMemOp(IT, 2, MCOperand::createImm(Scale));
297 setMemOp(IT, 3, MCOperand::createReg(IndexReg));
298 setMemOp(IT, 4, MCOperand::createImm(Disp));
299 // SegmentReg must be 0 for LEA.
300 setMemOp(IT, 5, MCOperand::createReg(0));
302 // Output reg candidates are selected by the caller.
303 auto PossibleDestRegsNow = PossibleDestRegs;
304 RestrictDestRegs(BaseReg, IndexReg, PossibleDestRegsNow);
305 assert(PossibleDestRegsNow.set_bits().begin() !=
306 PossibleDestRegsNow.set_bits().end() &&
307 "no remaining registers");
308 setMemOp(
309 IT, 0,
310 MCOperand::createReg(*PossibleDestRegsNow.set_bits().begin()));
312 CodeTemplate CT;
313 CT.Instructions.push_back(std::move(IT));
314 CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg),
315 RegInfo.getName(IndexReg), Scale, Disp)
316 .str();
317 Result.push_back(std::move(CT));
318 if (Result.size() >= Opts.MaxConfigsPerOpcode)
319 return std::move(Result);
325 return std::move(Result);
328 namespace {
329 class X86SerialSnippetGenerator : public SerialSnippetGenerator {
330 public:
331 using SerialSnippetGenerator::SerialSnippetGenerator;
333 Expected<std::vector<CodeTemplate>>
334 generateCodeTemplates(InstructionTemplate Variant,
335 const BitVector &ForbiddenRegisters) const override;
337 } // namespace
339 Expected<std::vector<CodeTemplate>>
340 X86SerialSnippetGenerator::generateCodeTemplates(
341 InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
342 const Instruction &Instr = Variant.getInstr();
344 if (const auto reason = isInvalidOpcode(Instr))
345 return make_error<Failure>(reason);
347 // LEA gets special attention.
348 const auto Opcode = Instr.Description.getOpcode();
349 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
350 return generateLEATemplatesCommon(
351 Instr, ForbiddenRegisters, State, Opts,
352 [this](unsigned BaseReg, unsigned IndexReg,
353 BitVector &CandidateDestRegs) {
354 // We just select a destination register that aliases the base
355 // register.
356 CandidateDestRegs &=
357 State.getRATC().getRegister(BaseReg).aliasedBits();
361 if (Instr.hasMemoryOperands())
362 return make_error<Failure>(
363 "unsupported memory operand in latency measurements");
365 switch (getX86FPFlags(Instr)) {
366 case X86II::NotFP:
367 return SerialSnippetGenerator::generateCodeTemplates(Variant,
368 ForbiddenRegisters);
369 case X86II::ZeroArgFP:
370 case X86II::OneArgFP:
371 case X86II::SpecialFP:
372 case X86II::CompareFP:
373 case X86II::CondMovFP:
374 return make_error<Failure>("Unsupported x87 Instruction");
375 case X86II::OneArgFPRW:
376 case X86II::TwoArgFP:
377 // These are instructions like
378 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
379 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
380 // They are intrinsically serial and do not modify the state of the stack.
381 return generateSelfAliasingCodeTemplates(Variant, ForbiddenRegisters);
382 default:
383 llvm_unreachable("Unknown FP Type!");
387 namespace {
388 class X86ParallelSnippetGenerator : public ParallelSnippetGenerator {
389 public:
390 using ParallelSnippetGenerator::ParallelSnippetGenerator;
392 Expected<std::vector<CodeTemplate>>
393 generateCodeTemplates(InstructionTemplate Variant,
394 const BitVector &ForbiddenRegisters) const override;
397 } // namespace
399 Expected<std::vector<CodeTemplate>>
400 X86ParallelSnippetGenerator::generateCodeTemplates(
401 InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
402 const Instruction &Instr = Variant.getInstr();
404 if (const auto reason = isInvalidOpcode(Instr))
405 return make_error<Failure>(reason);
407 // LEA gets special attention.
408 const auto Opcode = Instr.Description.getOpcode();
409 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
410 return generateLEATemplatesCommon(
411 Instr, ForbiddenRegisters, State, Opts,
412 [this](unsigned BaseReg, unsigned IndexReg,
413 BitVector &CandidateDestRegs) {
414 // Any destination register that is not used for addressing is fine.
415 remove(CandidateDestRegs,
416 State.getRATC().getRegister(BaseReg).aliasedBits());
417 remove(CandidateDestRegs,
418 State.getRATC().getRegister(IndexReg).aliasedBits());
422 switch (getX86FPFlags(Instr)) {
423 case X86II::NotFP:
424 return ParallelSnippetGenerator::generateCodeTemplates(Variant,
425 ForbiddenRegisters);
426 case X86II::ZeroArgFP:
427 case X86II::OneArgFP:
428 case X86II::SpecialFP:
429 return make_error<Failure>("Unsupported x87 Instruction");
430 case X86II::OneArgFPRW:
431 case X86II::TwoArgFP:
432 // These are instructions like
433 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
434 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
435 // They are intrinsically serial and do not modify the state of the stack.
436 // We generate the same code for latency and uops.
437 return generateSelfAliasingCodeTemplates(Variant, ForbiddenRegisters);
438 case X86II::CompareFP:
439 case X86II::CondMovFP:
440 // We can compute uops for any FP instruction that does not grow or shrink
441 // the stack (either do not touch the stack or push as much as they pop).
442 return generateUnconstrainedCodeTemplates(
443 Variant, "instruction does not grow/shrink the FP stack");
444 default:
445 llvm_unreachable("Unknown FP Type!");
449 static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
450 switch (RegBitWidth) {
451 case 8:
452 return X86::MOV8ri;
453 case 16:
454 return X86::MOV16ri;
455 case 32:
456 return X86::MOV32ri;
457 case 64:
458 return X86::MOV64ri;
460 llvm_unreachable("Invalid Value Width");
463 // Generates instruction to load an immediate value into a register.
464 static MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
465 const APInt &Value) {
466 if (Value.getBitWidth() > RegBitWidth)
467 llvm_unreachable("Value must fit in the Register");
468 return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
469 .addReg(Reg)
470 .addImm(Value.getZExtValue());
473 // Allocates scratch memory on the stack.
474 static MCInst allocateStackSpace(unsigned Bytes) {
475 return MCInstBuilder(X86::SUB64ri8)
476 .addReg(X86::RSP)
477 .addReg(X86::RSP)
478 .addImm(Bytes);
481 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
482 static MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
483 uint64_t Imm) {
484 return MCInstBuilder(MovOpcode)
485 // Address = ESP
486 .addReg(X86::RSP) // BaseReg
487 .addImm(1) // ScaleAmt
488 .addReg(0) // IndexReg
489 .addImm(OffsetBytes) // Disp
490 .addReg(0) // Segment
491 // Immediate.
492 .addImm(Imm);
495 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
496 static MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
497 return MCInstBuilder(RMOpcode)
498 .addReg(Reg)
499 // Address = ESP
500 .addReg(X86::RSP) // BaseReg
501 .addImm(1) // ScaleAmt
502 .addReg(0) // IndexReg
503 .addImm(0) // Disp
504 .addReg(0); // Segment
507 // Releases scratch memory.
508 static MCInst releaseStackSpace(unsigned Bytes) {
509 return MCInstBuilder(X86::ADD64ri8)
510 .addReg(X86::RSP)
511 .addReg(X86::RSP)
512 .addImm(Bytes);
515 // Reserves some space on the stack, fills it with the content of the provided
516 // constant and provide methods to load the stack value into a register.
517 namespace {
518 struct ConstantInliner {
519 explicit ConstantInliner(const APInt &Constant) : Constant_(Constant) {}
521 std::vector<MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
522 unsigned Opcode);
524 std::vector<MCInst> loadX87STAndFinalize(unsigned Reg);
526 std::vector<MCInst> loadX87FPAndFinalize(unsigned Reg);
528 std::vector<MCInst> popFlagAndFinalize();
530 std::vector<MCInst> loadImplicitRegAndFinalize(unsigned Opcode,
531 unsigned Value);
533 private:
534 ConstantInliner &add(const MCInst &Inst) {
535 Instructions.push_back(Inst);
536 return *this;
539 void initStack(unsigned Bytes);
541 static constexpr const unsigned kF80Bytes = 10; // 80 bits.
543 APInt Constant_;
544 std::vector<MCInst> Instructions;
546 } // namespace
548 std::vector<MCInst> ConstantInliner::loadAndFinalize(unsigned Reg,
549 unsigned RegBitWidth,
550 unsigned Opcode) {
551 assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits");
552 initStack(RegBitWidth / 8);
553 add(loadToReg(Reg, Opcode));
554 add(releaseStackSpace(RegBitWidth / 8));
555 return std::move(Instructions);
558 std::vector<MCInst> ConstantInliner::loadX87STAndFinalize(unsigned Reg) {
559 initStack(kF80Bytes);
560 add(MCInstBuilder(X86::LD_F80m)
561 // Address = ESP
562 .addReg(X86::RSP) // BaseReg
563 .addImm(1) // ScaleAmt
564 .addReg(0) // IndexReg
565 .addImm(0) // Disp
566 .addReg(0)); // Segment
567 if (Reg != X86::ST0)
568 add(MCInstBuilder(X86::ST_Frr).addReg(Reg));
569 add(releaseStackSpace(kF80Bytes));
570 return std::move(Instructions);
573 std::vector<MCInst> ConstantInliner::loadX87FPAndFinalize(unsigned Reg) {
574 initStack(kF80Bytes);
575 add(MCInstBuilder(X86::LD_Fp80m)
576 .addReg(Reg)
577 // Address = ESP
578 .addReg(X86::RSP) // BaseReg
579 .addImm(1) // ScaleAmt
580 .addReg(0) // IndexReg
581 .addImm(0) // Disp
582 .addReg(0)); // Segment
583 add(releaseStackSpace(kF80Bytes));
584 return std::move(Instructions);
587 std::vector<MCInst> ConstantInliner::popFlagAndFinalize() {
588 initStack(8);
589 add(MCInstBuilder(X86::POPF64));
590 return std::move(Instructions);
593 std::vector<MCInst>
594 ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) {
595 add(allocateStackSpace(4));
596 add(fillStackSpace(X86::MOV32mi, 0, Value)); // Mask all FP exceptions
597 add(MCInstBuilder(Opcode)
598 // Address = ESP
599 .addReg(X86::RSP) // BaseReg
600 .addImm(1) // ScaleAmt
601 .addReg(0) // IndexReg
602 .addImm(0) // Disp
603 .addReg(0)); // Segment
604 add(releaseStackSpace(4));
605 return std::move(Instructions);
608 void ConstantInliner::initStack(unsigned Bytes) {
609 assert(Constant_.getBitWidth() <= Bytes * 8 &&
610 "Value does not have the correct size");
611 const APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
612 ? Constant_.sext(Bytes * 8)
613 : Constant_;
614 add(allocateStackSpace(Bytes));
615 size_t ByteOffset = 0;
616 for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
617 add(fillStackSpace(
618 X86::MOV32mi, ByteOffset,
619 WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
620 if (Bytes - ByteOffset >= 2) {
621 add(fillStackSpace(
622 X86::MOV16mi, ByteOffset,
623 WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
624 ByteOffset += 2;
626 if (Bytes - ByteOffset >= 1)
627 add(fillStackSpace(
628 X86::MOV8mi, ByteOffset,
629 WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
632 #include "X86GenExegesis.inc"
634 namespace {
636 class X86SavedState : public ExegesisTarget::SavedState {
637 public:
638 X86SavedState() {
639 #if defined(_MSC_VER) && defined(_M_X64)
640 _fxsave64(FPState);
641 Eflags = __readeflags();
642 #elif defined(__GNUC__) && defined(__x86_64__)
643 __builtin_ia32_fxsave64(FPState);
644 Eflags = __builtin_ia32_readeflags_u64();
645 #else
646 report_fatal_error("X86 exegesis running on unsupported target");
647 #endif
650 ~X86SavedState() {
651 // Restoring the X87 state does not flush pending exceptions, make sure
652 // these exceptions are flushed now.
653 #if defined(_MSC_VER) && defined(_M_X64)
654 _clearfp();
655 _fxrstor64(FPState);
656 __writeeflags(Eflags);
657 #elif defined(__GNUC__) && defined(__x86_64__)
658 asm volatile("fwait");
659 __builtin_ia32_fxrstor64(FPState);
660 __builtin_ia32_writeeflags_u64(Eflags);
661 #else
662 report_fatal_error("X86 exegesis running on unsupported target");
663 #endif
666 private:
667 #if defined(__x86_64__) || defined(_M_X64)
668 alignas(16) char FPState[512];
669 uint64_t Eflags;
670 #endif
673 class ExegesisX86Target : public ExegesisTarget {
674 public:
675 ExegesisX86Target()
676 : ExegesisTarget(X86CpuPfmCounters, X86_MC::isOpcodeAvailable) {}
678 Expected<std::unique_ptr<pfm::Counter>>
679 createCounter(StringRef CounterName, const LLVMState &State,
680 const pid_t ProcessID) const override {
681 // If LbrSamplingPeriod was provided, then ignore the
682 // CounterName because we only have one for LBR.
683 if (LbrSamplingPeriod > 0) {
684 // Can't use LBR without HAVE_LIBPFM, LIBPFM_HAS_FIELD_CYCLES, or without
685 // __linux__ (for now)
686 #if defined(HAVE_LIBPFM) && defined(LIBPFM_HAS_FIELD_CYCLES) && \
687 defined(__linux__)
688 return std::make_unique<X86LbrCounter>(
689 X86LbrPerfEvent(LbrSamplingPeriod));
690 #else
691 return llvm::make_error<llvm::StringError>(
692 "LBR counter requested without HAVE_LIBPFM, LIBPFM_HAS_FIELD_CYCLES, "
693 "or running on Linux.",
694 llvm::errc::invalid_argument);
695 #endif
697 return ExegesisTarget::createCounter(CounterName, State, ProcessID);
700 enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 };
702 private:
703 void addTargetSpecificPasses(PassManagerBase &PM) const override;
705 unsigned getScratchMemoryRegister(const Triple &TT) const override;
707 unsigned getLoopCounterRegister(const Triple &) const override;
709 unsigned getMaxMemoryAccessSize() const override { return 64; }
711 Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
712 MCOperand &AssignedValue,
713 const BitVector &ForbiddenRegs) const override;
715 void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
716 unsigned Offset) const override;
718 void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
719 MachineBasicBlock &TargetMBB,
720 const MCInstrInfo &MII) const override;
722 std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg,
723 const APInt &Value) const override;
725 #ifdef __linux__
726 void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const override;
728 void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const override;
730 std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override;
732 std::vector<MCInst>
733 generateMmap(intptr_t Address, size_t Length,
734 intptr_t FileDescriptorAddress) const override;
736 void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override;
738 void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const override;
740 std::vector<MCInst> generateMemoryInitialSetup() const override;
742 std::vector<MCInst> setStackRegisterToAuxMem() const override;
744 intptr_t getAuxiliaryMemoryStartAddress() const override;
746 std::vector<MCInst> configurePerfCounter(long Request, bool SaveRegisters) const override;
748 std::vector<unsigned> getArgumentRegisters() const override;
750 std::vector<unsigned> getRegistersNeedSaving() const override;
751 #endif // __linux__
753 ArrayRef<unsigned> getUnavailableRegisters() const override {
754 if (DisableUpperSSERegisters)
755 return ArrayRef(kUnavailableRegistersSSE,
756 sizeof(kUnavailableRegistersSSE) /
757 sizeof(kUnavailableRegistersSSE[0]));
759 return ArrayRef(kUnavailableRegisters, std::size(kUnavailableRegisters));
762 bool allowAsBackToBack(const Instruction &Instr) const override {
763 const unsigned Opcode = Instr.Description.Opcode;
764 return !isInvalidOpcode(Instr) && Opcode != X86::LEA64r &&
765 Opcode != X86::LEA64_32r && Opcode != X86::LEA16r;
768 std::vector<InstructionTemplate>
769 generateInstructionVariants(const Instruction &Instr,
770 unsigned MaxConfigsPerOpcode) const override;
772 std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
773 const LLVMState &State,
774 const SnippetGenerator::Options &Opts) const override {
775 return std::make_unique<X86SerialSnippetGenerator>(State, Opts);
778 std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
779 const LLVMState &State,
780 const SnippetGenerator::Options &Opts) const override {
781 return std::make_unique<X86ParallelSnippetGenerator>(State, Opts);
784 bool matchesArch(Triple::ArchType Arch) const override {
785 return Arch == Triple::x86_64 || Arch == Triple::x86;
788 Error checkFeatureSupport() const override {
789 // LBR is the only feature we conditionally support now.
790 // So if LBR is not requested, then we should be able to run the benchmarks.
791 if (LbrSamplingPeriod == 0)
792 return Error::success();
794 #if defined(__linux__) && defined(HAVE_LIBPFM) && \
795 defined(LIBPFM_HAS_FIELD_CYCLES)
796 // FIXME: Fix this.
797 // https://bugs.llvm.org/show_bug.cgi?id=48918
798 // For now, only do the check if we see an Intel machine because
799 // the counter uses some intel-specific magic and it could
800 // be confuse and think an AMD machine actually has LBR support.
801 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
802 defined(_M_X64)
803 using namespace sys::detail::x86;
805 if (getVendorSignature() == VendorSignatures::GENUINE_INTEL)
806 // If the kernel supports it, the hardware still may not have it.
807 return X86LbrCounter::checkLbrSupport();
808 #else
809 report_fatal_error("Running X86 exegesis on unsupported target");
810 #endif
811 #endif
812 return llvm::make_error<llvm::StringError>(
813 "LBR not supported on this kernel and/or platform",
814 llvm::errc::not_supported);
817 std::unique_ptr<SavedState> withSavedState() const override {
818 return std::make_unique<X86SavedState>();
821 static const unsigned kUnavailableRegisters[4];
822 static const unsigned kUnavailableRegistersSSE[12];
825 // We disable a few registers that cannot be encoded on instructions with a REX
826 // prefix.
827 const unsigned ExegesisX86Target::kUnavailableRegisters[4] = {X86::AH, X86::BH,
828 X86::CH, X86::DH};
830 // Optionally, also disable the upper (x86_64) SSE registers to reduce frontend
831 // decoder load.
832 const unsigned ExegesisX86Target::kUnavailableRegistersSSE[12] = {
833 X86::AH, X86::BH, X86::CH, X86::DH, X86::XMM8, X86::XMM9,
834 X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13, X86::XMM14, X86::XMM15};
836 // We're using one of R8-R15 because these registers are never hardcoded in
837 // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less
838 // conflicts.
839 constexpr const unsigned kLoopCounterReg = X86::R8;
841 } // namespace
843 void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase &PM) const {
844 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
845 PM.add(createX86FloatingPointStackifierPass());
848 unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const {
849 if (!TT.isArch64Bit()) {
850 // FIXME: This would require popping from the stack, so we would have to
851 // add some additional setup code.
852 return 0;
854 return TT.isOSWindows() ? X86::RCX : X86::RDI;
857 unsigned ExegesisX86Target::getLoopCounterRegister(const Triple &TT) const {
858 if (!TT.isArch64Bit()) {
859 return 0;
861 return kLoopCounterReg;
864 Error ExegesisX86Target::randomizeTargetMCOperand(
865 const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
866 const BitVector &ForbiddenRegs) const {
867 const Operand &Op = Instr.getPrimaryOperand(Var);
868 switch (Op.getExplicitOperandInfo().OperandType) {
869 case X86::OperandType::OPERAND_ROUNDING_CONTROL:
870 AssignedValue =
871 MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::TO_ZERO));
872 return Error::success();
873 default:
874 break;
876 return make_error<Failure>(
877 Twine("unimplemented operand type ")
878 .concat(Twine(Op.getExplicitOperandInfo().OperandType)));
881 void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
882 unsigned Reg,
883 unsigned Offset) const {
884 assert(!isInvalidMemoryInstr(IT.getInstr()) &&
885 "fillMemoryOperands requires a valid memory instruction");
886 int MemOpIdx = X86II::getMemoryOperandNo(IT.getInstr().Description.TSFlags);
887 assert(MemOpIdx >= 0 && "invalid memory operand index");
888 // getMemoryOperandNo() ignores tied operands, so we have to add them back.
889 MemOpIdx += X86II::getOperandBias(IT.getInstr().Description);
890 setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
891 setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
892 setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
893 setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
894 setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0)); // Segment
897 void ExegesisX86Target::decrementLoopCounterAndJump(
898 MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
899 const MCInstrInfo &MII) const {
900 BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8))
901 .addDef(kLoopCounterReg)
902 .addUse(kLoopCounterReg)
903 .addImm(-1);
904 BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1))
905 .addMBB(&TargetMBB)
906 .addImm(X86::COND_NE);
909 std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
910 unsigned Reg,
911 const APInt &Value) const {
912 if (X86::GR8RegClass.contains(Reg))
913 return {loadImmediate(Reg, 8, Value)};
914 if (X86::GR16RegClass.contains(Reg))
915 return {loadImmediate(Reg, 16, Value)};
916 if (X86::GR32RegClass.contains(Reg))
917 return {loadImmediate(Reg, 32, Value)};
918 if (X86::GR64RegClass.contains(Reg))
919 return {loadImmediate(Reg, 64, Value)};
920 if (X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) ||
921 X86::VK32RegClass.contains(Reg) || X86::VK64RegClass.contains(Reg)) {
922 switch (Value.getBitWidth()) {
923 case 8:
924 if (STI.getFeatureBits()[X86::FeatureDQI]) {
925 ConstantInliner CI(Value);
926 return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVBkm);
928 [[fallthrough]];
929 case 16:
930 if (STI.getFeatureBits()[X86::FeatureAVX512]) {
931 ConstantInliner CI(Value.zextOrTrunc(16));
932 return CI.loadAndFinalize(Reg, 16, X86::KMOVWkm);
934 break;
935 case 32:
936 if (STI.getFeatureBits()[X86::FeatureBWI]) {
937 ConstantInliner CI(Value);
938 return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVDkm);
940 break;
941 case 64:
942 if (STI.getFeatureBits()[X86::FeatureBWI]) {
943 ConstantInliner CI(Value);
944 return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVQkm);
946 break;
949 ConstantInliner CI(Value);
950 if (X86::VR64RegClass.contains(Reg))
951 return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm);
952 if (X86::VR128XRegClass.contains(Reg)) {
953 if (STI.getFeatureBits()[X86::FeatureAVX512])
954 return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm);
955 if (STI.getFeatureBits()[X86::FeatureAVX])
956 return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm);
957 return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm);
959 if (X86::VR256XRegClass.contains(Reg)) {
960 if (STI.getFeatureBits()[X86::FeatureAVX512])
961 return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm);
962 if (STI.getFeatureBits()[X86::FeatureAVX])
963 return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm);
965 if (X86::VR512RegClass.contains(Reg))
966 if (STI.getFeatureBits()[X86::FeatureAVX512])
967 return CI.loadAndFinalize(Reg, 512, X86::VMOVDQU32Zrm);
968 if (X86::RSTRegClass.contains(Reg)) {
969 return CI.loadX87STAndFinalize(Reg);
971 if (X86::RFP32RegClass.contains(Reg) || X86::RFP64RegClass.contains(Reg) ||
972 X86::RFP80RegClass.contains(Reg)) {
973 return CI.loadX87FPAndFinalize(Reg);
975 if (Reg == X86::EFLAGS)
976 return CI.popFlagAndFinalize();
977 if (Reg == X86::MXCSR)
978 return CI.loadImplicitRegAndFinalize(
979 STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR : X86::LDMXCSR,
980 0x1f80);
981 if (Reg == X86::FPCW)
982 return CI.loadImplicitRegAndFinalize(X86::FLDCW16m, 0x37f);
983 return {}; // Not yet implemented.
986 #ifdef __linux__
988 #ifdef __arm__
989 static constexpr const intptr_t VAddressSpaceCeiling = 0xC0000000;
990 #else
991 static constexpr const intptr_t VAddressSpaceCeiling = 0x0000800000000000;
992 #endif
994 void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
995 GeneratedCode.push_back(
996 loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber)));
997 GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL));
1000 void generateRoundToNearestPage(unsigned int Register,
1001 std::vector<MCInst> &GeneratedCode) {
1002 int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
1003 // Round down to the nearest page by getting rid of the least significant bits
1004 // representing location in the page. Shift right to get rid of this info and
1005 // then shift back left.
1006 GeneratedCode.push_back(MCInstBuilder(X86::SHR64ri)
1007 .addReg(Register)
1008 .addReg(Register)
1009 .addImm(PageSizeShift));
1010 GeneratedCode.push_back(MCInstBuilder(X86::SHL64ri)
1011 .addReg(Register)
1012 .addReg(Register)
1013 .addImm(PageSizeShift));
1016 void generateGetInstructionPointer(unsigned int ResultRegister,
1017 std::vector<MCInst> &GeneratedCode) {
1018 // Use a load effective address to get the current instruction pointer and put
1019 // it into the result register.
1020 GeneratedCode.push_back(MCInstBuilder(X86::LEA64r)
1021 .addReg(ResultRegister)
1022 .addReg(X86::RIP)
1023 .addImm(1)
1024 .addReg(0)
1025 .addImm(0)
1026 .addReg(0));
1029 void ExegesisX86Target::generateLowerMunmap(
1030 std::vector<MCInst> &GeneratedCode) const {
1031 // Unmap starting at address zero
1032 GeneratedCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, 0)));
1033 // Get the current instruction pointer so we know where to unmap up to.
1034 generateGetInstructionPointer(X86::RSI, GeneratedCode);
1035 generateRoundToNearestPage(X86::RSI, GeneratedCode);
1036 // Subtract a page from the end of the unmap so we don't unmap the currently
1037 // executing section.
1038 GeneratedCode.push_back(MCInstBuilder(X86::SUB64ri32)
1039 .addReg(X86::RSI)
1040 .addReg(X86::RSI)
1041 .addImm(getpagesize()));
1042 generateSyscall(SYS_munmap, GeneratedCode);
1045 void ExegesisX86Target::generateUpperMunmap(
1046 std::vector<MCInst> &GeneratedCode) const {
1047 generateGetInstructionPointer(X86::R8, GeneratedCode);
1048 // Load in the size of the snippet to RDI from from the argument register.
1049 GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
1050 .addReg(X86::RDI)
1051 .addReg(ArgumentRegisters::CodeSize));
1052 // Add the length of the snippet (in %RDI) to the current instruction pointer
1053 // (%R8) to get the address where we should start unmapping at.
1054 GeneratedCode.push_back(MCInstBuilder(X86::ADD64rr)
1055 .addReg(X86::RDI)
1056 .addReg(X86::RDI)
1057 .addReg(X86::R8));
1058 generateRoundToNearestPage(X86::RDI, GeneratedCode);
1059 // Add a one page to the start address to ensure that we're above the snippet
1060 // since the above function rounds down.
1061 GeneratedCode.push_back(MCInstBuilder(X86::ADD64ri32)
1062 .addReg(X86::RDI)
1063 .addReg(X86::RDI)
1064 .addImm(getpagesize()));
1065 // Unmap to just one page under the ceiling of the address space.
1066 GeneratedCode.push_back(loadImmediate(
1067 X86::RSI, 64, APInt(64, VAddressSpaceCeiling - getpagesize())));
1068 GeneratedCode.push_back(MCInstBuilder(X86::SUB64rr)
1069 .addReg(X86::RSI)
1070 .addReg(X86::RSI)
1071 .addReg(X86::RDI));
1072 generateSyscall(SYS_munmap, GeneratedCode);
1075 std::vector<MCInst>
1076 ExegesisX86Target::generateExitSyscall(unsigned ExitCode) const {
1077 std::vector<MCInst> ExitCallCode;
1078 ExitCallCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, ExitCode)));
1079 generateSyscall(SYS_exit, ExitCallCode);
1080 return ExitCallCode;
1083 // Before kernel 4.17, Linux did not support MAP_FIXED_NOREPLACE, so if it is
1084 // not available, simplfy define it as MAP_FIXED which performs the same
1085 // function but does not guarantee existing mappings won't get clobbered.
1086 #ifndef MAP_FIXED_NOREPLACE
1087 #define MAP_FIXED_NOREPLACE MAP_FIXED
1088 #endif
1090 // Some 32-bit architectures don't have mmap and define mmap2 instead. The only
1091 // difference between the two syscalls is that mmap2's offset parameter is in
1092 // terms 4096 byte offsets rather than individual bytes, so for our purposes
1093 // they are effectively the same as all ofsets here are set to 0.
1094 #if defined(SYS_mmap2) && !defined(SYS_mmap)
1095 #define SYS_mmap SYS_mmap2
1096 #endif
1098 std::vector<MCInst>
1099 ExegesisX86Target::generateMmap(intptr_t Address, size_t Length,
1100 intptr_t FileDescriptorAddress) const {
1101 std::vector<MCInst> MmapCode;
1102 MmapCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, Address)));
1103 MmapCode.push_back(loadImmediate(X86::RSI, 64, APInt(64, Length)));
1104 MmapCode.push_back(
1105 loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE)));
1106 MmapCode.push_back(
1107 loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE)));
1108 // Copy file descriptor location from aux memory into R8
1109 MmapCode.push_back(
1110 loadImmediate(X86::R8, 64, APInt(64, FileDescriptorAddress)));
1111 // Dereference file descriptor into FD argument register
1112 MmapCode.push_back(MCInstBuilder(X86::MOV32rm)
1113 .addReg(X86::R8D)
1114 .addReg(X86::R8)
1115 .addImm(1)
1116 .addReg(0)
1117 .addImm(0)
1118 .addReg(0));
1119 MmapCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0)));
1120 generateSyscall(SYS_mmap, MmapCode);
1121 return MmapCode;
1124 void ExegesisX86Target::generateMmapAuxMem(
1125 std::vector<MCInst> &GeneratedCode) const {
1126 GeneratedCode.push_back(
1127 loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress())));
1128 GeneratedCode.push_back(loadImmediate(
1129 X86::RSI, 64, APInt(64, SubprocessMemory::AuxiliaryMemorySize)));
1130 GeneratedCode.push_back(
1131 loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE)));
1132 GeneratedCode.push_back(
1133 loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE)));
1134 GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
1135 .addReg(X86::R8)
1136 .addReg(ArgumentRegisters::AuxiliaryMemoryFD));
1137 GeneratedCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0)));
1138 generateSyscall(SYS_mmap, GeneratedCode);
1141 void ExegesisX86Target::moveArgumentRegisters(
1142 std::vector<MCInst> &GeneratedCode) const {
1143 GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
1144 .addReg(ArgumentRegisters::CodeSize)
1145 .addReg(X86::RDI));
1146 GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
1147 .addReg(ArgumentRegisters::AuxiliaryMemoryFD)
1148 .addReg(X86::RSI));
1151 std::vector<MCInst> ExegesisX86Target::generateMemoryInitialSetup() const {
1152 std::vector<MCInst> MemoryInitialSetupCode;
1153 moveArgumentRegisters(MemoryInitialSetupCode);
1154 generateLowerMunmap(MemoryInitialSetupCode);
1155 generateUpperMunmap(MemoryInitialSetupCode);
1156 generateMmapAuxMem(MemoryInitialSetupCode);
1157 return MemoryInitialSetupCode;
1160 std::vector<MCInst> ExegesisX86Target::setStackRegisterToAuxMem() const {
1161 // Moves %rsp to the end of the auxiliary memory
1162 return {MCInstBuilder(X86::MOV64ri)
1163 .addReg(X86::RSP)
1164 .addImm(getAuxiliaryMemoryStartAddress() +
1165 SubprocessMemory::AuxiliaryMemorySize)};
1168 intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const {
1169 // Return the second to last page in the virtual address space to try and
1170 // prevent interference with memory annotations in the snippet
1171 return VAddressSpaceCeiling - 2 * getpagesize();
1174 void generateRegisterStackPush(unsigned int Register,
1175 std::vector<MCInst> &GeneratedCode) {
1176 GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register));
1179 void generateRegisterStackPop(unsigned int Register,
1180 std::vector<MCInst> &GeneratedCode) {
1181 GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register));
1184 std::vector<MCInst>
1185 ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const {
1186 std::vector<MCInst> ConfigurePerfCounterCode;
1187 if(SaveRegisters) {
1188 // Preserve RAX, RDI, and RSI by pushing them to the stack.
1189 generateRegisterStackPush(X86::RAX, ConfigurePerfCounterCode);
1190 generateRegisterStackPush(X86::RDI, ConfigurePerfCounterCode);
1191 generateRegisterStackPush(X86::RSI, ConfigurePerfCounterCode);
1192 // RCX and R11 will get clobbered by the syscall instruction, so save them
1193 // as well.
1194 generateRegisterStackPush(X86::RCX, ConfigurePerfCounterCode);
1195 generateRegisterStackPush(X86::R11, ConfigurePerfCounterCode);
1197 ConfigurePerfCounterCode.push_back(
1198 loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress())));
1199 ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm)
1200 .addReg(X86::EDI)
1201 .addReg(X86::RDI)
1202 .addImm(1)
1203 .addReg(0)
1204 .addImm(0)
1205 .addReg(0));
1206 ConfigurePerfCounterCode.push_back(
1207 loadImmediate(X86::RSI, 64, APInt(64, Request)));
1208 generateSyscall(SYS_ioctl, ConfigurePerfCounterCode);
1209 if(SaveRegisters) {
1210 // Restore R11 then RCX
1211 generateRegisterStackPop(X86::R11, ConfigurePerfCounterCode);
1212 generateRegisterStackPop(X86::RCX, ConfigurePerfCounterCode);
1213 // Restore RAX, RDI, and RSI, in reverse order.
1214 generateRegisterStackPop(X86::RSI, ConfigurePerfCounterCode);
1215 generateRegisterStackPop(X86::RDI, ConfigurePerfCounterCode);
1216 generateRegisterStackPop(X86::RAX, ConfigurePerfCounterCode);
1218 return ConfigurePerfCounterCode;
1221 std::vector<unsigned> ExegesisX86Target::getArgumentRegisters() const {
1222 return {X86::RDI, X86::RSI};
1225 std::vector<unsigned> ExegesisX86Target::getRegistersNeedSaving() const {
1226 return {X86::RAX, X86::RDI, X86::RSI, X86::RCX, X86::R11};
1229 #endif // __linux__
1231 // Instruction can have some variable operands, and we may want to see how
1232 // different operands affect performance. So for each operand position,
1233 // precompute all the possible choices we might care about,
1234 // and greedily generate all the possible combinations of choices.
1235 std::vector<InstructionTemplate> ExegesisX86Target::generateInstructionVariants(
1236 const Instruction &Instr, unsigned MaxConfigsPerOpcode) const {
1237 bool Exploration = false;
1238 SmallVector<SmallVector<MCOperand, 1>, 4> VariableChoices;
1239 VariableChoices.resize(Instr.Variables.size());
1240 for (auto I : llvm::zip(Instr.Variables, VariableChoices)) {
1241 const Variable &Var = std::get<0>(I);
1242 SmallVectorImpl<MCOperand> &Choices = std::get<1>(I);
1244 switch (Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType) {
1245 default:
1246 // We don't wish to explicitly explore this variable.
1247 Choices.emplace_back(); // But add invalid MCOperand to simplify logic.
1248 continue;
1249 case X86::OperandType::OPERAND_COND_CODE: {
1250 Exploration = true;
1251 auto CondCodes = enum_seq_inclusive(X86::CondCode::COND_O,
1252 X86::CondCode::LAST_VALID_COND,
1253 force_iteration_on_noniterable_enum);
1254 Choices.reserve(CondCodes.size());
1255 for (int CondCode : CondCodes)
1256 Choices.emplace_back(MCOperand::createImm(CondCode));
1257 break;
1262 // If we don't wish to explore any variables, defer to the baseline method.
1263 if (!Exploration)
1264 return ExegesisTarget::generateInstructionVariants(Instr,
1265 MaxConfigsPerOpcode);
1267 std::vector<InstructionTemplate> Variants;
1268 size_t NumVariants;
1269 CombinationGenerator<MCOperand, decltype(VariableChoices)::value_type, 4> G(
1270 VariableChoices);
1272 // How many operand combinations can we produce, within the limit?
1273 NumVariants = std::min(G.numCombinations(), (size_t)MaxConfigsPerOpcode);
1274 // And actually produce all the wanted operand combinations.
1275 Variants.reserve(NumVariants);
1276 G.generate([&](ArrayRef<MCOperand> State) -> bool {
1277 Variants.emplace_back(&Instr);
1278 Variants.back().setVariableValues(State);
1279 // Did we run out of space for variants?
1280 return Variants.size() >= NumVariants;
1283 assert(Variants.size() == NumVariants &&
1284 Variants.size() <= MaxConfigsPerOpcode &&
1285 "Should not produce too many variants");
1286 return Variants;
1289 static ExegesisTarget *getTheExegesisX86Target() {
1290 static ExegesisX86Target Target;
1291 return &Target;
1294 void InitializeX86ExegesisTarget() {
1295 ExegesisTarget::registerTarget(getTheExegesisX86Target());
1298 } // namespace exegesis
1299 } // namespace llvm