Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / lld / ELF / Arch / X86.cpp
blob8d4f258e2cf24e5ee3d435cee32d99c8bcc55822
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "OutputSections.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25 X86();
26 int getTlsGdRelaxSkip(RelType type) const override;
27 RelExpr getRelExpr(RelType type, const Symbol &s,
28 const uint8_t *loc) const override;
29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30 void writeGotPltHeader(uint8_t *buf) const override;
31 RelType getDynRel(RelType type) const override;
32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34 void writePltHeader(uint8_t *buf) const override;
35 void writePlt(uint8_t *buf, const Symbol &sym,
36 uint64_t pltEntryAddr) const override;
37 void relocate(uint8_t *loc, const Relocation &rel,
38 uint64_t val) const override;
40 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
41 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
43 } // namespace
45 X86::X86() {
46 copyRel = R_386_COPY;
47 gotRel = R_386_GLOB_DAT;
48 pltRel = R_386_JUMP_SLOT;
49 iRelativeRel = R_386_IRELATIVE;
50 relativeRel = R_386_RELATIVE;
51 symbolicRel = R_386_32;
52 tlsDescRel = R_386_TLS_DESC;
53 tlsGotRel = R_386_TLS_TPOFF;
54 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
55 tlsOffsetRel = R_386_TLS_DTPOFF32;
56 gotBaseSymInGotPlt = true;
57 pltHeaderSize = 16;
58 pltEntrySize = 16;
59 ipltEntrySize = 16;
60 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
62 // Align to the non-PAE large page size (known as a superpage or huge page).
63 // FreeBSD automatically promotes large, superpage-aligned allocations.
64 defaultImageBase = 0x400000;
67 int X86::getTlsGdRelaxSkip(RelType type) const {
68 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
69 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
72 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
73 const uint8_t *loc) const {
74 switch (type) {
75 case R_386_8:
76 case R_386_16:
77 case R_386_32:
78 return R_ABS;
79 case R_386_TLS_LDO_32:
80 return R_DTPREL;
81 case R_386_TLS_GD:
82 return R_TLSGD_GOTPLT;
83 case R_386_TLS_LDM:
84 return R_TLSLD_GOTPLT;
85 case R_386_PLT32:
86 return R_PLT_PC;
87 case R_386_PC8:
88 case R_386_PC16:
89 case R_386_PC32:
90 return R_PC;
91 case R_386_GOTPC:
92 return R_GOTPLTONLY_PC;
93 case R_386_TLS_IE:
94 return R_GOT;
95 case R_386_GOT32:
96 case R_386_GOT32X:
97 // These relocations are arguably mis-designed because their calculations
98 // depend on the instructions they are applied to. This is bad because we
99 // usually don't care about whether the target section contains valid
100 // machine instructions or not. But this is part of the documented ABI, so
101 // we had to implement as the standard requires.
103 // x86 does not support PC-relative data access. Therefore, in order to
104 // access GOT contents, a GOT address needs to be known at link-time
105 // (which means non-PIC) or compilers have to emit code to get a GOT
106 // address at runtime (which means code is position-independent but
107 // compilers need to emit extra code for each GOT access.) This decision
108 // is made at compile-time. In the latter case, compilers emit code to
109 // load a GOT address to a register, which is usually %ebx.
111 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
112 // foo@GOT(%ebx).
114 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
115 // find such relocation, we should report an error. foo@GOT is resolved to
116 // an *absolute* address of foo's GOT entry, because both GOT address and
117 // foo's offset are known. In other words, it's G + A.
119 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
120 // foo's GOT entry in the table, because GOT address is not known but foo's
121 // offset in the table is known. It's G + A - GOT.
123 // It's unfortunate that compilers emit the same relocation for these
124 // different use cases. In order to distinguish them, we have to read a
125 // machine instruction.
127 // The following code implements it. We assume that Loc[0] is the first byte
128 // of a displacement or an immediate field of a valid machine
129 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
130 // the byte, we can determine whether the instruction uses the operand as an
131 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
132 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
133 case R_386_TLS_GOTDESC:
134 return R_TLSDESC_GOTPLT;
135 case R_386_TLS_DESC_CALL:
136 return R_TLSDESC_CALL;
137 case R_386_TLS_GOTIE:
138 return R_GOTPLT;
139 case R_386_GOTOFF:
140 return R_GOTPLTREL;
141 case R_386_TLS_LE:
142 return R_TPREL;
143 case R_386_TLS_LE_32:
144 return R_TPREL_NEG;
145 case R_386_NONE:
146 return R_NONE;
147 default:
148 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
149 ") against symbol " + toString(s));
150 return R_NONE;
154 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
155 switch (expr) {
156 default:
157 return expr;
158 case R_RELAX_TLS_GD_TO_IE:
159 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
160 case R_RELAX_TLS_GD_TO_LE:
161 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
162 : R_RELAX_TLS_GD_TO_LE;
166 void X86::writeGotPltHeader(uint8_t *buf) const {
167 write32le(buf, mainPart->dynamic->getVA());
170 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
171 // Entries in .got.plt initially points back to the corresponding
172 // PLT entries with a fixed offset to skip the first instruction.
173 write32le(buf, s.getPltVA() + 6);
176 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
177 // An x86 entry is the address of the ifunc resolver function.
178 write32le(buf, s.getVA());
181 RelType X86::getDynRel(RelType type) const {
182 if (type == R_386_TLS_LE)
183 return R_386_TLS_TPOFF;
184 if (type == R_386_TLS_LE_32)
185 return R_386_TLS_TPOFF32;
186 return type;
189 void X86::writePltHeader(uint8_t *buf) const {
190 if (config->isPic) {
191 const uint8_t v[] = {
192 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
193 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
194 0x90, 0x90, 0x90, 0x90 // nop
196 memcpy(buf, v, sizeof(v));
197 return;
200 const uint8_t pltData[] = {
201 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
202 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
203 0x90, 0x90, 0x90, 0x90, // nop
205 memcpy(buf, pltData, sizeof(pltData));
206 uint32_t gotPlt = in.gotPlt->getVA();
207 write32le(buf + 2, gotPlt + 4);
208 write32le(buf + 8, gotPlt + 8);
211 void X86::writePlt(uint8_t *buf, const Symbol &sym,
212 uint64_t pltEntryAddr) const {
213 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
214 if (config->isPic) {
215 const uint8_t inst[] = {
216 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
217 0x68, 0, 0, 0, 0, // pushl $reloc_offset
218 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
220 memcpy(buf, inst, sizeof(inst));
221 write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
222 } else {
223 const uint8_t inst[] = {
224 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
225 0x68, 0, 0, 0, 0, // pushl $reloc_offset
226 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
228 memcpy(buf, inst, sizeof(inst));
229 write32le(buf + 2, sym.getGotPltVA());
232 write32le(buf + 7, relOff);
233 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
236 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
237 switch (type) {
238 case R_386_8:
239 case R_386_PC8:
240 return SignExtend64<8>(*buf);
241 case R_386_16:
242 case R_386_PC16:
243 return SignExtend64<16>(read16le(buf));
244 case R_386_32:
245 case R_386_GLOB_DAT:
246 case R_386_GOT32:
247 case R_386_GOT32X:
248 case R_386_GOTOFF:
249 case R_386_GOTPC:
250 case R_386_IRELATIVE:
251 case R_386_PC32:
252 case R_386_PLT32:
253 case R_386_RELATIVE:
254 case R_386_TLS_GOTDESC:
255 case R_386_TLS_DESC_CALL:
256 case R_386_TLS_DTPMOD32:
257 case R_386_TLS_DTPOFF32:
258 case R_386_TLS_LDO_32:
259 case R_386_TLS_LDM:
260 case R_386_TLS_IE:
261 case R_386_TLS_IE_32:
262 case R_386_TLS_LE:
263 case R_386_TLS_LE_32:
264 case R_386_TLS_GD:
265 case R_386_TLS_GD_32:
266 case R_386_TLS_GOTIE:
267 case R_386_TLS_TPOFF:
268 case R_386_TLS_TPOFF32:
269 return SignExtend64<32>(read32le(buf));
270 case R_386_TLS_DESC:
271 return SignExtend64<32>(read32le(buf + 4));
272 case R_386_NONE:
273 case R_386_JUMP_SLOT:
274 // These relocations are defined as not having an implicit addend.
275 return 0;
276 default:
277 internalLinkerError(getErrorLocation(buf),
278 "cannot read addend for relocation " + toString(type));
279 return 0;
283 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
284 switch (rel.type) {
285 case R_386_8:
286 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
287 // being used for some 16-bit programs such as boot loaders, so
288 // we want to support them.
289 checkIntUInt(loc, val, 8, rel);
290 *loc = val;
291 break;
292 case R_386_PC8:
293 checkInt(loc, val, 8, rel);
294 *loc = val;
295 break;
296 case R_386_16:
297 checkIntUInt(loc, val, 16, rel);
298 write16le(loc, val);
299 break;
300 case R_386_PC16:
301 // R_386_PC16 is normally used with 16 bit code. In that situation
302 // the PC is 16 bits, just like the addend. This means that it can
303 // point from any 16 bit address to any other if the possibility
304 // of wrapping is included.
305 // The only restriction we have to check then is that the destination
306 // address fits in 16 bits. That is impossible to do here. The problem is
307 // that we are passed the final value, which already had the
308 // current location subtracted from it.
309 // We just check that Val fits in 17 bits. This misses some cases, but
310 // should have no false positives.
311 checkInt(loc, val, 17, rel);
312 write16le(loc, val);
313 break;
314 case R_386_32:
315 case R_386_GOT32:
316 case R_386_GOT32X:
317 case R_386_GOTOFF:
318 case R_386_GOTPC:
319 case R_386_PC32:
320 case R_386_PLT32:
321 case R_386_RELATIVE:
322 case R_386_TLS_GOTDESC:
323 case R_386_TLS_DESC_CALL:
324 case R_386_TLS_DTPMOD32:
325 case R_386_TLS_DTPOFF32:
326 case R_386_TLS_GD:
327 case R_386_TLS_GOTIE:
328 case R_386_TLS_IE:
329 case R_386_TLS_LDM:
330 case R_386_TLS_LDO_32:
331 case R_386_TLS_LE:
332 case R_386_TLS_LE_32:
333 case R_386_TLS_TPOFF:
334 case R_386_TLS_TPOFF32:
335 checkInt(loc, val, 32, rel);
336 write32le(loc, val);
337 break;
338 case R_386_TLS_DESC:
339 // The addend is stored in the second 32-bit word.
340 write32le(loc + 4, val);
341 break;
342 default:
343 llvm_unreachable("unknown relocation");
347 static void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
348 if (rel.type == R_386_TLS_GD) {
349 // Convert (loc[-2] == 0x04)
350 // leal x@tlsgd(, %ebx, 1), %eax
351 // call ___tls_get_addr@plt
352 // or
353 // leal x@tlsgd(%reg), %eax
354 // call *___tls_get_addr@got(%reg)
355 // to
356 const uint8_t inst[] = {
357 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
358 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax
360 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
361 memcpy(w, inst, sizeof(inst));
362 write32le(w + 8, val);
363 } else if (rel.type == R_386_TLS_GOTDESC) {
364 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
366 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
367 if (memcmp(loc - 2, "\x8d\x83", 2)) {
368 error(getErrorLocation(loc - 2) +
369 "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
370 return;
372 loc[-1] = 0x05;
373 write32le(loc, val);
374 } else {
375 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
376 assert(rel.type == R_386_TLS_DESC_CALL);
377 loc[0] = 0x66;
378 loc[1] = 0x90;
382 static void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) {
383 if (rel.type == R_386_TLS_GD) {
384 // Convert (loc[-2] == 0x04)
385 // leal x@tlsgd(, %ebx, 1), %eax
386 // call ___tls_get_addr@plt
387 // or
388 // leal x@tlsgd(%reg), %eax
389 // call *___tls_get_addr@got(%reg)
390 const uint8_t inst[] = {
391 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
392 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax
394 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
395 memcpy(w, inst, sizeof(inst));
396 write32le(w + 8, val);
397 } else if (rel.type == R_386_TLS_GOTDESC) {
398 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
399 if (memcmp(loc - 2, "\x8d\x83", 2)) {
400 error(getErrorLocation(loc - 2) +
401 "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
402 return;
404 loc[-2] = 0x8b;
405 write32le(loc, val);
406 } else {
407 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
408 assert(rel.type == R_386_TLS_DESC_CALL);
409 loc[0] = 0x66;
410 loc[1] = 0x90;
414 // In some conditions, relocations can be optimized to avoid using GOT.
415 // This function does that for Initial Exec to Local Exec case.
416 static void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
417 // Ulrich's document section 6.2 says that @gotntpoff can
418 // be used with MOVL or ADDL instructions.
419 // @indntpoff is similar to @gotntpoff, but for use in
420 // position dependent code.
421 uint8_t reg = (loc[-1] >> 3) & 7;
423 if (rel.type == R_386_TLS_IE) {
424 if (loc[-1] == 0xa1) {
425 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
426 // This case is different from the generic case below because
427 // this is a 5 byte instruction while below is 6 bytes.
428 loc[-1] = 0xb8;
429 } else if (loc[-2] == 0x8b) {
430 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
431 loc[-2] = 0xc7;
432 loc[-1] = 0xc0 | reg;
433 } else {
434 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
435 loc[-2] = 0x81;
436 loc[-1] = 0xc0 | reg;
438 } else {
439 assert(rel.type == R_386_TLS_GOTIE);
440 if (loc[-2] == 0x8b) {
441 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
442 loc[-2] = 0xc7;
443 loc[-1] = 0xc0 | reg;
444 } else {
445 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
446 loc[-2] = 0x8d;
447 loc[-1] = 0x80 | (reg << 3) | reg;
450 write32le(loc, val);
453 static void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
454 if (rel.type == R_386_TLS_LDO_32) {
455 write32le(loc, val);
456 return;
459 if (loc[4] == 0xe8) {
460 // Convert
461 // leal x(%reg),%eax
462 // call ___tls_get_addr@plt
463 // to
464 const uint8_t inst[] = {
465 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
466 0x90, // nop
467 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
469 memcpy(loc - 2, inst, sizeof(inst));
470 return;
473 // Convert
474 // leal x(%reg),%eax
475 // call *___tls_get_addr@got(%reg)
476 // to
477 const uint8_t inst[] = {
478 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
479 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
481 memcpy(loc - 2, inst, sizeof(inst));
484 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
485 uint64_t secAddr = sec.getOutputSection()->addr;
486 if (auto *s = dyn_cast<InputSection>(&sec))
487 secAddr += s->outSecOff;
488 for (const Relocation &rel : sec.relocs()) {
489 uint8_t *loc = buf + rel.offset;
490 const uint64_t val = SignExtend64(
491 sec.getRelocTargetVA(sec.file, rel.type, rel.addend,
492 secAddr + rel.offset, *rel.sym, rel.expr),
493 32);
494 switch (rel.expr) {
495 case R_RELAX_TLS_GD_TO_IE_GOTPLT:
496 relaxTlsGdToIe(loc, rel, val);
497 continue;
498 case R_RELAX_TLS_GD_TO_LE:
499 case R_RELAX_TLS_GD_TO_LE_NEG:
500 relaxTlsGdToLe(loc, rel, val);
501 continue;
502 case R_RELAX_TLS_LD_TO_LE:
503 relaxTlsLdToLe(loc, rel, val);
504 break;
505 case R_RELAX_TLS_IE_TO_LE:
506 relaxTlsIeToLe(loc, rel, val);
507 continue;
508 default:
509 relocate(loc, rel, val);
510 break;
515 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
516 // entries containing endbr32 instructions. A PLT entry will be split into two
517 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
518 namespace {
519 class IntelIBT : public X86 {
520 public:
521 IntelIBT();
522 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
523 void writePlt(uint8_t *buf, const Symbol &sym,
524 uint64_t pltEntryAddr) const override;
525 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
527 static const unsigned IBTPltHeaderSize = 16;
529 } // namespace
531 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
533 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
534 uint64_t va =
535 in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
536 write32le(buf, va);
539 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
540 uint64_t /*pltEntryAddr*/) const {
541 if (config->isPic) {
542 const uint8_t inst[] = {
543 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
544 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
545 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
547 memcpy(buf, inst, sizeof(inst));
548 write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
549 return;
552 const uint8_t inst[] = {
553 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
554 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
555 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
557 memcpy(buf, inst, sizeof(inst));
558 write32le(buf + 6, sym.getGotPltVA());
561 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
562 writePltHeader(buf);
563 buf += IBTPltHeaderSize;
565 const uint8_t inst[] = {
566 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
567 0x68, 0, 0, 0, 0, // pushl $reloc_offset
568 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
569 0x66, 0x90, // nop
572 for (size_t i = 0; i < numEntries; ++i) {
573 memcpy(buf, inst, sizeof(inst));
574 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
575 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
576 buf += sizeof(inst);
580 namespace {
581 class RetpolinePic : public X86 {
582 public:
583 RetpolinePic();
584 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
585 void writePltHeader(uint8_t *buf) const override;
586 void writePlt(uint8_t *buf, const Symbol &sym,
587 uint64_t pltEntryAddr) const override;
590 class RetpolineNoPic : public X86 {
591 public:
592 RetpolineNoPic();
593 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
594 void writePltHeader(uint8_t *buf) const override;
595 void writePlt(uint8_t *buf, const Symbol &sym,
596 uint64_t pltEntryAddr) const override;
598 } // namespace
600 RetpolinePic::RetpolinePic() {
601 pltHeaderSize = 48;
602 pltEntrySize = 32;
603 ipltEntrySize = 32;
606 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
607 write32le(buf, s.getPltVA() + 17);
610 void RetpolinePic::writePltHeader(uint8_t *buf) const {
611 const uint8_t insn[] = {
612 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
613 0x50, // 6: pushl %eax
614 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
615 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
616 0xf3, 0x90, // 12: loop: pause
617 0x0f, 0xae, 0xe8, // 14: lfence
618 0xeb, 0xf9, // 17: jmp loop
619 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
620 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
621 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
622 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
623 0x89, 0xc8, // 2b: mov %ecx, %eax
624 0x59, // 2d: pop %ecx
625 0xc3, // 2e: ret
626 0xcc, // 2f: int3; padding
628 memcpy(buf, insn, sizeof(insn));
631 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
632 uint64_t pltEntryAddr) const {
633 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
634 const uint8_t insn[] = {
635 0x50, // pushl %eax
636 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
637 0xe8, 0, 0, 0, 0, // call plt+0x20
638 0xe9, 0, 0, 0, 0, // jmp plt+0x12
639 0x68, 0, 0, 0, 0, // pushl $reloc_offset
640 0xe9, 0, 0, 0, 0, // jmp plt+0
641 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
643 memcpy(buf, insn, sizeof(insn));
645 uint32_t ebx = in.gotPlt->getVA();
646 unsigned off = pltEntryAddr - in.plt->getVA();
647 write32le(buf + 3, sym.getGotPltVA() - ebx);
648 write32le(buf + 8, -off - 12 + 32);
649 write32le(buf + 13, -off - 17 + 18);
650 write32le(buf + 18, relOff);
651 write32le(buf + 23, -off - 27);
654 RetpolineNoPic::RetpolineNoPic() {
655 pltHeaderSize = 48;
656 pltEntrySize = 32;
657 ipltEntrySize = 32;
660 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
661 write32le(buf, s.getPltVA() + 16);
664 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
665 const uint8_t insn[] = {
666 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
667 0x50, // 6: pushl %eax
668 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
669 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
670 0xf3, 0x90, // 11: loop: pause
671 0x0f, 0xae, 0xe8, // 13: lfence
672 0xeb, 0xf9, // 16: jmp loop
673 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
674 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
675 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
676 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
677 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
678 0x89, 0xc8, // 2b: mov %ecx, %eax
679 0x59, // 2d: pop %ecx
680 0xc3, // 2e: ret
681 0xcc, // 2f: int3; padding
683 memcpy(buf, insn, sizeof(insn));
685 uint32_t gotPlt = in.gotPlt->getVA();
686 write32le(buf + 2, gotPlt + 4);
687 write32le(buf + 8, gotPlt + 8);
690 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
691 uint64_t pltEntryAddr) const {
692 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
693 const uint8_t insn[] = {
694 0x50, // 0: pushl %eax
695 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
696 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
697 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
698 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
699 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
700 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
701 0xcc, // 1f: int3; padding
703 memcpy(buf, insn, sizeof(insn));
705 unsigned off = pltEntryAddr - in.plt->getVA();
706 write32le(buf + 2, sym.getGotPltVA());
707 write32le(buf + 7, -off - 11 + 32);
708 write32le(buf + 12, -off - 16 + 17);
709 write32le(buf + 17, relOff);
710 write32le(buf + 22, -off - 26);
713 TargetInfo *elf::getX86TargetInfo() {
714 if (config->zRetpolineplt) {
715 if (config->isPic) {
716 static RetpolinePic t;
717 return &t;
719 static RetpolineNoPic t;
720 return &t;
723 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
724 static IntelIBT t;
725 return &t;
728 static X86 t;
729 return &t;