[mlir][py] Enable loading only specified dialects during creation. (#121421)
[llvm-project.git] / lld / ELF / Arch / X86.cpp
blob0edcde950d850aca9975e6e8cff483877a61c981
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "OutputSections.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25 X86(Ctx &);
26 int getTlsGdRelaxSkip(RelType type) const override;
27 RelExpr getRelExpr(RelType type, const Symbol &s,
28 const uint8_t *loc) const override;
29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30 void writeGotPltHeader(uint8_t *buf) const override;
31 RelType getDynRel(RelType type) const override;
32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34 void writePltHeader(uint8_t *buf) const override;
35 void writePlt(uint8_t *buf, const Symbol &sym,
36 uint64_t pltEntryAddr) const override;
37 void relocate(uint8_t *loc, const Relocation &rel,
38 uint64_t val) const override;
40 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
41 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
43 private:
44 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
45 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
46 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
47 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
49 } // namespace
51 X86::X86(Ctx &ctx) : TargetInfo(ctx) {
52 copyRel = R_386_COPY;
53 gotRel = R_386_GLOB_DAT;
54 pltRel = R_386_JUMP_SLOT;
55 iRelativeRel = R_386_IRELATIVE;
56 relativeRel = R_386_RELATIVE;
57 symbolicRel = R_386_32;
58 tlsDescRel = R_386_TLS_DESC;
59 tlsGotRel = R_386_TLS_TPOFF;
60 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
61 tlsOffsetRel = R_386_TLS_DTPOFF32;
62 gotBaseSymInGotPlt = true;
63 pltHeaderSize = 16;
64 pltEntrySize = 16;
65 ipltEntrySize = 16;
66 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
68 // Align to the non-PAE large page size (known as a superpage or huge page).
69 // FreeBSD automatically promotes large, superpage-aligned allocations.
70 defaultImageBase = 0x400000;
73 int X86::getTlsGdRelaxSkip(RelType type) const {
74 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
75 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
78 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
79 const uint8_t *loc) const {
80 switch (type) {
81 case R_386_8:
82 case R_386_16:
83 case R_386_32:
84 return R_ABS;
85 case R_386_TLS_LDO_32:
86 return R_DTPREL;
87 case R_386_TLS_GD:
88 return R_TLSGD_GOTPLT;
89 case R_386_TLS_LDM:
90 return R_TLSLD_GOTPLT;
91 case R_386_PLT32:
92 return R_PLT_PC;
93 case R_386_PC8:
94 case R_386_PC16:
95 case R_386_PC32:
96 return R_PC;
97 case R_386_GOTPC:
98 return R_GOTPLTONLY_PC;
99 case R_386_TLS_IE:
100 return R_GOT;
101 case R_386_GOT32:
102 case R_386_GOT32X:
103 // These relocations are arguably mis-designed because their calculations
104 // depend on the instructions they are applied to. This is bad because we
105 // usually don't care about whether the target section contains valid
106 // machine instructions or not. But this is part of the documented ABI, so
107 // we had to implement as the standard requires.
109 // x86 does not support PC-relative data access. Therefore, in order to
110 // access GOT contents, a GOT address needs to be known at link-time
111 // (which means non-PIC) or compilers have to emit code to get a GOT
112 // address at runtime (which means code is position-independent but
113 // compilers need to emit extra code for each GOT access.) This decision
114 // is made at compile-time. In the latter case, compilers emit code to
115 // load a GOT address to a register, which is usually %ebx.
117 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
118 // foo@GOT(%ebx).
120 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
121 // find such relocation, we should report an error. foo@GOT is resolved to
122 // an *absolute* address of foo's GOT entry, because both GOT address and
123 // foo's offset are known. In other words, it's G + A.
125 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
126 // foo's GOT entry in the table, because GOT address is not known but foo's
127 // offset in the table is known. It's G + A - GOT.
129 // It's unfortunate that compilers emit the same relocation for these
130 // different use cases. In order to distinguish them, we have to read a
131 // machine instruction.
133 // The following code implements it. We assume that Loc[0] is the first byte
134 // of a displacement or an immediate field of a valid machine
135 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
136 // the byte, we can determine whether the instruction uses the operand as an
137 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
138 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
139 case R_386_TLS_GOTDESC:
140 return R_TLSDESC_GOTPLT;
141 case R_386_TLS_DESC_CALL:
142 return R_TLSDESC_CALL;
143 case R_386_TLS_GOTIE:
144 return R_GOTPLT;
145 case R_386_GOTOFF:
146 return R_GOTPLTREL;
147 case R_386_TLS_LE:
148 return R_TPREL;
149 case R_386_TLS_LE_32:
150 return R_TPREL_NEG;
151 case R_386_NONE:
152 return R_NONE;
153 default:
154 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
155 << ") against symbol " << &s;
156 return R_NONE;
160 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
161 switch (expr) {
162 default:
163 return expr;
164 case R_RELAX_TLS_GD_TO_IE:
165 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
166 case R_RELAX_TLS_GD_TO_LE:
167 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
168 : R_RELAX_TLS_GD_TO_LE;
172 void X86::writeGotPltHeader(uint8_t *buf) const {
173 write32le(buf, ctx.mainPart->dynamic->getVA());
176 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
177 // Entries in .got.plt initially points back to the corresponding
178 // PLT entries with a fixed offset to skip the first instruction.
179 write32le(buf, s.getPltVA(ctx) + 6);
182 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
183 // An x86 entry is the address of the ifunc resolver function.
184 write32le(buf, s.getVA(ctx));
187 RelType X86::getDynRel(RelType type) const {
188 if (type == R_386_TLS_LE)
189 return R_386_TLS_TPOFF;
190 if (type == R_386_TLS_LE_32)
191 return R_386_TLS_TPOFF32;
192 return type;
195 void X86::writePltHeader(uint8_t *buf) const {
196 if (ctx.arg.isPic) {
197 const uint8_t v[] = {
198 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
199 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
200 0x90, 0x90, 0x90, 0x90 // nop
202 memcpy(buf, v, sizeof(v));
203 return;
206 const uint8_t pltData[] = {
207 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
208 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
209 0x90, 0x90, 0x90, 0x90, // nop
211 memcpy(buf, pltData, sizeof(pltData));
212 uint32_t gotPlt = ctx.in.gotPlt->getVA();
213 write32le(buf + 2, gotPlt + 4);
214 write32le(buf + 8, gotPlt + 8);
217 void X86::writePlt(uint8_t *buf, const Symbol &sym,
218 uint64_t pltEntryAddr) const {
219 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
220 if (ctx.arg.isPic) {
221 const uint8_t inst[] = {
222 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
223 0x68, 0, 0, 0, 0, // pushl $reloc_offset
224 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
226 memcpy(buf, inst, sizeof(inst));
227 write32le(buf + 2, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
228 } else {
229 const uint8_t inst[] = {
230 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
231 0x68, 0, 0, 0, 0, // pushl $reloc_offset
232 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
234 memcpy(buf, inst, sizeof(inst));
235 write32le(buf + 2, sym.getGotPltVA(ctx));
238 write32le(buf + 7, relOff);
239 write32le(buf + 12, ctx.in.plt->getVA() - pltEntryAddr - 16);
242 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
243 switch (type) {
244 case R_386_8:
245 case R_386_PC8:
246 return SignExtend64<8>(*buf);
247 case R_386_16:
248 case R_386_PC16:
249 return SignExtend64<16>(read16le(buf));
250 case R_386_32:
251 case R_386_GLOB_DAT:
252 case R_386_GOT32:
253 case R_386_GOT32X:
254 case R_386_GOTOFF:
255 case R_386_GOTPC:
256 case R_386_IRELATIVE:
257 case R_386_PC32:
258 case R_386_PLT32:
259 case R_386_RELATIVE:
260 case R_386_TLS_GOTDESC:
261 case R_386_TLS_DESC_CALL:
262 case R_386_TLS_DTPMOD32:
263 case R_386_TLS_DTPOFF32:
264 case R_386_TLS_LDO_32:
265 case R_386_TLS_LDM:
266 case R_386_TLS_IE:
267 case R_386_TLS_IE_32:
268 case R_386_TLS_LE:
269 case R_386_TLS_LE_32:
270 case R_386_TLS_GD:
271 case R_386_TLS_GD_32:
272 case R_386_TLS_GOTIE:
273 case R_386_TLS_TPOFF:
274 case R_386_TLS_TPOFF32:
275 return SignExtend64<32>(read32le(buf));
276 case R_386_TLS_DESC:
277 return SignExtend64<32>(read32le(buf + 4));
278 case R_386_NONE:
279 case R_386_JUMP_SLOT:
280 // These relocations are defined as not having an implicit addend.
281 return 0;
282 default:
283 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
284 return 0;
288 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
289 switch (rel.type) {
290 case R_386_8:
291 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
292 // being used for some 16-bit programs such as boot loaders, so
293 // we want to support them.
294 checkIntUInt(ctx, loc, val, 8, rel);
295 *loc = val;
296 break;
297 case R_386_PC8:
298 checkInt(ctx, loc, val, 8, rel);
299 *loc = val;
300 break;
301 case R_386_16:
302 checkIntUInt(ctx, loc, val, 16, rel);
303 write16le(loc, val);
304 break;
305 case R_386_PC16:
306 // R_386_PC16 is normally used with 16 bit code. In that situation
307 // the PC is 16 bits, just like the addend. This means that it can
308 // point from any 16 bit address to any other if the possibility
309 // of wrapping is included.
310 // The only restriction we have to check then is that the destination
311 // address fits in 16 bits. That is impossible to do here. The problem is
312 // that we are passed the final value, which already had the
313 // current location subtracted from it.
314 // We just check that Val fits in 17 bits. This misses some cases, but
315 // should have no false positives.
316 checkInt(ctx, loc, val, 17, rel);
317 write16le(loc, val);
318 break;
319 case R_386_32:
320 case R_386_GOT32:
321 case R_386_GOT32X:
322 case R_386_GOTOFF:
323 case R_386_GOTPC:
324 case R_386_PC32:
325 case R_386_PLT32:
326 case R_386_RELATIVE:
327 case R_386_TLS_GOTDESC:
328 case R_386_TLS_DESC_CALL:
329 case R_386_TLS_DTPMOD32:
330 case R_386_TLS_DTPOFF32:
331 case R_386_TLS_GD:
332 case R_386_TLS_GOTIE:
333 case R_386_TLS_IE:
334 case R_386_TLS_LDM:
335 case R_386_TLS_LDO_32:
336 case R_386_TLS_LE:
337 case R_386_TLS_LE_32:
338 case R_386_TLS_TPOFF:
339 case R_386_TLS_TPOFF32:
340 checkInt(ctx, loc, val, 32, rel);
341 write32le(loc, val);
342 break;
343 case R_386_TLS_DESC:
344 // The addend is stored in the second 32-bit word.
345 write32le(loc + 4, val);
346 break;
347 default:
348 llvm_unreachable("unknown relocation");
352 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
353 uint64_t val) const {
354 if (rel.type == R_386_TLS_GD) {
355 // Convert (loc[-2] == 0x04)
356 // leal x@tlsgd(, %ebx, 1), %eax
357 // call ___tls_get_addr@plt
358 // or
359 // leal x@tlsgd(%reg), %eax
360 // call *___tls_get_addr@got(%reg)
361 // to
362 const uint8_t inst[] = {
363 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
364 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax
366 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
367 memcpy(w, inst, sizeof(inst));
368 write32le(w + 8, val);
369 } else if (rel.type == R_386_TLS_GOTDESC) {
370 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
372 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
373 if (memcmp(loc - 2, "\x8d\x83", 2)) {
374 ErrAlways(ctx)
375 << getErrorLoc(ctx, loc - 2)
376 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
377 return;
379 loc[-1] = 0x05;
380 write32le(loc, val);
381 } else {
382 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
383 assert(rel.type == R_386_TLS_DESC_CALL);
384 loc[0] = 0x66;
385 loc[1] = 0x90;
389 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
390 uint64_t val) const {
391 if (rel.type == R_386_TLS_GD) {
392 // Convert (loc[-2] == 0x04)
393 // leal x@tlsgd(, %ebx, 1), %eax
394 // call ___tls_get_addr@plt
395 // or
396 // leal x@tlsgd(%reg), %eax
397 // call *___tls_get_addr@got(%reg)
398 const uint8_t inst[] = {
399 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
400 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax
402 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
403 memcpy(w, inst, sizeof(inst));
404 write32le(w + 8, val);
405 } else if (rel.type == R_386_TLS_GOTDESC) {
406 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
407 if (memcmp(loc - 2, "\x8d\x83", 2)) {
408 ErrAlways(ctx)
409 << getErrorLoc(ctx, loc - 2)
410 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
411 return;
413 loc[-2] = 0x8b;
414 write32le(loc, val);
415 } else {
416 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
417 assert(rel.type == R_386_TLS_DESC_CALL);
418 loc[0] = 0x66;
419 loc[1] = 0x90;
423 // In some conditions, relocations can be optimized to avoid using GOT.
424 // This function does that for Initial Exec to Local Exec case.
425 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
426 uint64_t val) const {
427 // Ulrich's document section 6.2 says that @gotntpoff can
428 // be used with MOVL or ADDL instructions.
429 // @indntpoff is similar to @gotntpoff, but for use in
430 // position dependent code.
431 uint8_t reg = (loc[-1] >> 3) & 7;
433 if (rel.type == R_386_TLS_IE) {
434 if (loc[-1] == 0xa1) {
435 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
436 // This case is different from the generic case below because
437 // this is a 5 byte instruction while below is 6 bytes.
438 loc[-1] = 0xb8;
439 } else if (loc[-2] == 0x8b) {
440 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
441 loc[-2] = 0xc7;
442 loc[-1] = 0xc0 | reg;
443 } else {
444 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
445 loc[-2] = 0x81;
446 loc[-1] = 0xc0 | reg;
448 } else {
449 assert(rel.type == R_386_TLS_GOTIE);
450 if (loc[-2] == 0x8b) {
451 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
452 loc[-2] = 0xc7;
453 loc[-1] = 0xc0 | reg;
454 } else {
455 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
456 loc[-2] = 0x8d;
457 loc[-1] = 0x80 | (reg << 3) | reg;
460 write32le(loc, val);
463 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
464 uint64_t val) const {
465 if (rel.type == R_386_TLS_LDO_32) {
466 write32le(loc, val);
467 return;
470 if (loc[4] == 0xe8) {
471 // Convert
472 // leal x(%reg),%eax
473 // call ___tls_get_addr@plt
474 // to
475 const uint8_t inst[] = {
476 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
477 0x90, // nop
478 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
480 memcpy(loc - 2, inst, sizeof(inst));
481 return;
484 // Convert
485 // leal x(%reg),%eax
486 // call *___tls_get_addr@got(%reg)
487 // to
488 const uint8_t inst[] = {
489 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
490 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
492 memcpy(loc - 2, inst, sizeof(inst));
495 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
496 uint64_t secAddr = sec.getOutputSection()->addr;
497 if (auto *s = dyn_cast<InputSection>(&sec))
498 secAddr += s->outSecOff;
499 for (const Relocation &rel : sec.relocs()) {
500 uint8_t *loc = buf + rel.offset;
501 const uint64_t val =
502 SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), 32);
503 switch (rel.expr) {
504 case R_RELAX_TLS_GD_TO_IE_GOTPLT:
505 relaxTlsGdToIe(loc, rel, val);
506 continue;
507 case R_RELAX_TLS_GD_TO_LE:
508 case R_RELAX_TLS_GD_TO_LE_NEG:
509 relaxTlsGdToLe(loc, rel, val);
510 continue;
511 case R_RELAX_TLS_LD_TO_LE:
512 relaxTlsLdToLe(loc, rel, val);
513 break;
514 case R_RELAX_TLS_IE_TO_LE:
515 relaxTlsIeToLe(loc, rel, val);
516 continue;
517 default:
518 relocate(loc, rel, val);
519 break;
524 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
525 // entries containing endbr32 instructions. A PLT entry will be split into two
526 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
527 namespace {
528 class IntelIBT : public X86 {
529 public:
530 IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; }
531 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
532 void writePlt(uint8_t *buf, const Symbol &sym,
533 uint64_t pltEntryAddr) const override;
534 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
536 static const unsigned IBTPltHeaderSize = 16;
538 } // namespace
540 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
541 uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize +
542 s.getPltIdx(ctx) * pltEntrySize;
543 write32le(buf, va);
546 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
547 uint64_t /*pltEntryAddr*/) const {
548 if (ctx.arg.isPic) {
549 const uint8_t inst[] = {
550 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
551 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
552 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
554 memcpy(buf, inst, sizeof(inst));
555 write32le(buf + 6, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
556 return;
559 const uint8_t inst[] = {
560 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
561 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
562 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
564 memcpy(buf, inst, sizeof(inst));
565 write32le(buf + 6, sym.getGotPltVA(ctx));
568 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
569 writePltHeader(buf);
570 buf += IBTPltHeaderSize;
572 const uint8_t inst[] = {
573 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
574 0x68, 0, 0, 0, 0, // pushl $reloc_offset
575 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
576 0x66, 0x90, // nop
579 for (size_t i = 0; i < numEntries; ++i) {
580 memcpy(buf, inst, sizeof(inst));
581 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
582 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
583 buf += sizeof(inst);
587 namespace {
588 class RetpolinePic : public X86 {
589 public:
590 RetpolinePic(Ctx &);
591 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
592 void writePltHeader(uint8_t *buf) const override;
593 void writePlt(uint8_t *buf, const Symbol &sym,
594 uint64_t pltEntryAddr) const override;
597 class RetpolineNoPic : public X86 {
598 public:
599 RetpolineNoPic(Ctx &);
600 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
601 void writePltHeader(uint8_t *buf) const override;
602 void writePlt(uint8_t *buf, const Symbol &sym,
603 uint64_t pltEntryAddr) const override;
605 } // namespace
607 RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) {
608 pltHeaderSize = 48;
609 pltEntrySize = 32;
610 ipltEntrySize = 32;
613 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
614 write32le(buf, s.getPltVA(ctx) + 17);
617 void RetpolinePic::writePltHeader(uint8_t *buf) const {
618 const uint8_t insn[] = {
619 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
620 0x50, // 6: pushl %eax
621 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
622 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
623 0xf3, 0x90, // 12: loop: pause
624 0x0f, 0xae, 0xe8, // 14: lfence
625 0xeb, 0xf9, // 17: jmp loop
626 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
627 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
628 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
629 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
630 0x89, 0xc8, // 2b: mov %ecx, %eax
631 0x59, // 2d: pop %ecx
632 0xc3, // 2e: ret
633 0xcc, // 2f: int3; padding
635 memcpy(buf, insn, sizeof(insn));
638 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
639 uint64_t pltEntryAddr) const {
640 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
641 const uint8_t insn[] = {
642 0x50, // pushl %eax
643 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
644 0xe8, 0, 0, 0, 0, // call plt+0x20
645 0xe9, 0, 0, 0, 0, // jmp plt+0x12
646 0x68, 0, 0, 0, 0, // pushl $reloc_offset
647 0xe9, 0, 0, 0, 0, // jmp plt+0
648 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
650 memcpy(buf, insn, sizeof(insn));
652 uint32_t ebx = ctx.in.gotPlt->getVA();
653 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
654 write32le(buf + 3, sym.getGotPltVA(ctx) - ebx);
655 write32le(buf + 8, -off - 12 + 32);
656 write32le(buf + 13, -off - 17 + 18);
657 write32le(buf + 18, relOff);
658 write32le(buf + 23, -off - 27);
661 RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) {
662 pltHeaderSize = 48;
663 pltEntrySize = 32;
664 ipltEntrySize = 32;
667 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
668 write32le(buf, s.getPltVA(ctx) + 16);
671 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
672 const uint8_t insn[] = {
673 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
674 0x50, // 6: pushl %eax
675 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
676 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
677 0xf3, 0x90, // 11: loop: pause
678 0x0f, 0xae, 0xe8, // 13: lfence
679 0xeb, 0xf9, // 16: jmp loop
680 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
681 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
682 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
683 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
684 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
685 0x89, 0xc8, // 2b: mov %ecx, %eax
686 0x59, // 2d: pop %ecx
687 0xc3, // 2e: ret
688 0xcc, // 2f: int3; padding
690 memcpy(buf, insn, sizeof(insn));
692 uint32_t gotPlt = ctx.in.gotPlt->getVA();
693 write32le(buf + 2, gotPlt + 4);
694 write32le(buf + 8, gotPlt + 8);
697 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
698 uint64_t pltEntryAddr) const {
699 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
700 const uint8_t insn[] = {
701 0x50, // 0: pushl %eax
702 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
703 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
704 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
705 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
706 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
707 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
708 0xcc, // 1f: int3; padding
710 memcpy(buf, insn, sizeof(insn));
712 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
713 write32le(buf + 2, sym.getGotPltVA(ctx));
714 write32le(buf + 7, -off - 11 + 32);
715 write32le(buf + 12, -off - 16 + 17);
716 write32le(buf + 17, relOff);
717 write32le(buf + 22, -off - 26);
720 void elf::setX86TargetInfo(Ctx &ctx) {
721 if (ctx.arg.zRetpolineplt) {
722 if (ctx.arg.isPic)
723 ctx.target.reset(new RetpolinePic(ctx));
724 else
725 ctx.target.reset(new RetpolineNoPic(ctx));
726 return;
729 if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
730 ctx.target.reset(new IntelIBT(ctx));
731 else
732 ctx.target.reset(new X86(ctx));