lld/ELF/Arch/X86.cpp

   1 //===- X86.cpp ------------------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "OutputSections.h"
  10 #include "Symbols.h"
  11 #include "SyntheticSections.h"
  12 #include "Target.h"
  13 #include "lld/Common/ErrorHandler.h"
  14 #include "llvm/Support/Endian.h"
  15
  16 using namespace llvm;
  17 using namespace llvm::support::endian;
  18 using namespace llvm::ELF;
  19 using namespace lld;
  20 using namespace lld::elf;
  21
  22 namespace {
  23 class X86 : public TargetInfo {
  24 public:
  25   X86(Ctx &);
  26   int getTlsGdRelaxSkip(RelType type) const override;
  27   RelExpr getRelExpr(RelType type, const Symbol &s,
  28                      const uint8_t *loc) const override;
  29   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
  30   void writeGotPltHeader(uint8_t *buf) const override;
  31   RelType getDynRel(RelType type) const override;
  32   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
  33   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
  34   void writePltHeader(uint8_t *buf) const override;
  35   void writePlt(uint8_t *buf, const Symbol &sym,
  36                 uint64_t pltEntryAddr) const override;
  37   void relocate(uint8_t *loc, const Relocation &rel,
  38                 uint64_t val) const override;
  39
  40   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
  41   void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
  42
  43 private:
  44   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
  45   void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
  46   void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
  47   void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
  48 };
  49 } // namespace
  50
  51 X86::X86(Ctx &ctx) : TargetInfo(ctx) {
  52   copyRel = R_386_COPY;
  53   gotRel = R_386_GLOB_DAT;
  54   pltRel = R_386_JUMP_SLOT;
  55   iRelativeRel = R_386_IRELATIVE;
  56   relativeRel = R_386_RELATIVE;
  57   symbolicRel = R_386_32;
  58   tlsDescRel = R_386_TLS_DESC;
  59   tlsGotRel = R_386_TLS_TPOFF;
  60   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
  61   tlsOffsetRel = R_386_TLS_DTPOFF32;
  62   gotBaseSymInGotPlt = true;
  63   pltHeaderSize = 16;
  64   pltEntrySize = 16;
  65   ipltEntrySize = 16;
  66   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
  67
  68   // Align to the non-PAE large page size (known as a superpage or huge page).
  69   // FreeBSD automatically promotes large, superpage-aligned allocations.
  70   defaultImageBase = 0x400000;
  71 }
  72
  73 int X86::getTlsGdRelaxSkip(RelType type) const {
  74   // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
  75   return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
  76 }
  77
  78 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
  79                         const uint8_t *loc) const {
  80   switch (type) {
  81   case R_386_8:
  82   case R_386_16:
  83   case R_386_32:
  84     return R_ABS;
  85   case R_386_TLS_LDO_32:
  86     return R_DTPREL;
  87   case R_386_TLS_GD:
  88     return R_TLSGD_GOTPLT;
  89   case R_386_TLS_LDM:
  90     return R_TLSLD_GOTPLT;
  91   case R_386_PLT32:
  92     return R_PLT_PC;
  93   case R_386_PC8:
  94   case R_386_PC16:
  95   case R_386_PC32:
  96     return R_PC;
  97   case R_386_GOTPC:
  98     return R_GOTPLTONLY_PC;
  99   case R_386_TLS_IE:
 100     return R_GOT;
 101   case R_386_GOT32:
 102   case R_386_GOT32X:
 103     // These relocations are arguably mis-designed because their calculations
 104     // depend on the instructions they are applied to. This is bad because we
 105     // usually don't care about whether the target section contains valid
 106     // machine instructions or not. But this is part of the documented ABI, so
 107     // we had to implement as the standard requires.
 108     //
 109     // x86 does not support PC-relative data access. Therefore, in order to
 110     // access GOT contents, a GOT address needs to be known at link-time
 111     // (which means non-PIC) or compilers have to emit code to get a GOT
 112     // address at runtime (which means code is position-independent but
 113     // compilers need to emit extra code for each GOT access.) This decision
 114     // is made at compile-time. In the latter case, compilers emit code to
 115     // load a GOT address to a register, which is usually %ebx.
 116     //
 117     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
 118     // foo@GOT(%ebx).
 119     //
 120     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
 121     // find such relocation, we should report an error. foo@GOT is resolved to
 122     // an *absolute* address of foo's GOT entry, because both GOT address and
 123     // foo's offset are known. In other words, it's G + A.
 124     //
 125     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
 126     // foo's GOT entry in the table, because GOT address is not known but foo's
 127     // offset in the table is known. It's G + A - GOT.
 128     //
 129     // It's unfortunate that compilers emit the same relocation for these
 130     // different use cases. In order to distinguish them, we have to read a
 131     // machine instruction.
 132     //
 133     // The following code implements it. We assume that Loc[0] is the first byte
 134     // of a displacement or an immediate field of a valid machine
 135     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
 136     // the byte, we can determine whether the instruction uses the operand as an
 137     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
 138     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
 139   case R_386_TLS_GOTDESC:
 140     return R_TLSDESC_GOTPLT;
 141   case R_386_TLS_DESC_CALL:
 142     return R_TLSDESC_CALL;
 143   case R_386_TLS_GOTIE:
 144     return R_GOTPLT;
 145   case R_386_GOTOFF:
 146     return R_GOTPLTREL;
 147   case R_386_TLS_LE:
 148     return R_TPREL;
 149   case R_386_TLS_LE_32:
 150     return R_TPREL_NEG;
 151   case R_386_NONE:
 152     return R_NONE;
 153   default:
 154     Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
 155              << ") against symbol " << &s;
 156     return R_NONE;
 157   }
 158 }
 159
 160 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
 161   switch (expr) {
 162   default:
 163     return expr;
 164   case R_RELAX_TLS_GD_TO_IE:
 165     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
 166   case R_RELAX_TLS_GD_TO_LE:
 167     return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
 168                                 : R_RELAX_TLS_GD_TO_LE;
 169   }
 170 }
 171
 172 void X86::writeGotPltHeader(uint8_t *buf) const {
 173   write32le(buf, ctx.mainPart->dynamic->getVA());
 174 }
 175
 176 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
 177   // Entries in .got.plt initially points back to the corresponding
 178   // PLT entries with a fixed offset to skip the first instruction.
 179   write32le(buf, s.getPltVA(ctx) + 6);
 180 }
 181
 182 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
 183   // An x86 entry is the address of the ifunc resolver function.
 184   write32le(buf, s.getVA(ctx));
 185 }
 186
 187 RelType X86::getDynRel(RelType type) const {
 188   if (type == R_386_TLS_LE)
 189     return R_386_TLS_TPOFF;
 190   if (type == R_386_TLS_LE_32)
 191     return R_386_TLS_TPOFF32;
 192   return type;
 193 }
 194
 195 void X86::writePltHeader(uint8_t *buf) const {
 196   if (ctx.arg.isPic) {
 197     const uint8_t v[] = {
 198         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
 199         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
 200         0x90, 0x90, 0x90, 0x90              // nop
 201     };
 202     memcpy(buf, v, sizeof(v));
 203     return;
 204   }
 205
 206   const uint8_t pltData[] = {
 207       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
 208       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
 209       0x90, 0x90, 0x90, 0x90, // nop
 210   };
 211   memcpy(buf, pltData, sizeof(pltData));
 212   uint32_t gotPlt = ctx.in.gotPlt->getVA();
 213   write32le(buf + 2, gotPlt + 4);
 214   write32le(buf + 8, gotPlt + 8);
 215 }
 216
 217 void X86::writePlt(uint8_t *buf, const Symbol &sym,
 218                    uint64_t pltEntryAddr) const {
 219   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
 220   if (ctx.arg.isPic) {
 221     const uint8_t inst[] = {
 222         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
 223         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
 224         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
 225     };
 226     memcpy(buf, inst, sizeof(inst));
 227     write32le(buf + 2, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
 228   } else {
 229     const uint8_t inst[] = {
 230         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
 231         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
 232         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
 233     };
 234     memcpy(buf, inst, sizeof(inst));
 235     write32le(buf + 2, sym.getGotPltVA(ctx));
 236   }
 237
 238   write32le(buf + 7, relOff);
 239   write32le(buf + 12, ctx.in.plt->getVA() - pltEntryAddr - 16);
 240 }
 241
 242 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
 243   switch (type) {
 244   case R_386_8:
 245   case R_386_PC8:
 246     return SignExtend64<8>(*buf);
 247   case R_386_16:
 248   case R_386_PC16:
 249     return SignExtend64<16>(read16le(buf));
 250   case R_386_32:
 251   case R_386_GLOB_DAT:
 252   case R_386_GOT32:
 253   case R_386_GOT32X:
 254   case R_386_GOTOFF:
 255   case R_386_GOTPC:
 256   case R_386_IRELATIVE:
 257   case R_386_PC32:
 258   case R_386_PLT32:
 259   case R_386_RELATIVE:
 260   case R_386_TLS_GOTDESC:
 261   case R_386_TLS_DESC_CALL:
 262   case R_386_TLS_DTPMOD32:
 263   case R_386_TLS_DTPOFF32:
 264   case R_386_TLS_LDO_32:
 265   case R_386_TLS_LDM:
 266   case R_386_TLS_IE:
 267   case R_386_TLS_IE_32:
 268   case R_386_TLS_LE:
 269   case R_386_TLS_LE_32:
 270   case R_386_TLS_GD:
 271   case R_386_TLS_GD_32:
 272   case R_386_TLS_GOTIE:
 273   case R_386_TLS_TPOFF:
 274   case R_386_TLS_TPOFF32:
 275     return SignExtend64<32>(read32le(buf));
 276   case R_386_TLS_DESC:
 277     return SignExtend64<32>(read32le(buf + 4));
 278   case R_386_NONE:
 279   case R_386_JUMP_SLOT:
 280     // These relocations are defined as not having an implicit addend.
 281     return 0;
 282   default:
 283     InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
 284     return 0;
 285   }
 286 }
 287
 288 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
 289   switch (rel.type) {
 290   case R_386_8:
 291     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
 292     // being used for some 16-bit programs such as boot loaders, so
 293     // we want to support them.
 294     checkIntUInt(ctx, loc, val, 8, rel);
 295     *loc = val;
 296     break;
 297   case R_386_PC8:
 298     checkInt(ctx, loc, val, 8, rel);
 299     *loc = val;
 300     break;
 301   case R_386_16:
 302     checkIntUInt(ctx, loc, val, 16, rel);
 303     write16le(loc, val);
 304     break;
 305   case R_386_PC16:
 306     // R_386_PC16 is normally used with 16 bit code. In that situation
 307     // the PC is 16 bits, just like the addend. This means that it can
 308     // point from any 16 bit address to any other if the possibility
 309     // of wrapping is included.
 310     // The only restriction we have to check then is that the destination
 311     // address fits in 16 bits. That is impossible to do here. The problem is
 312     // that we are passed the final value, which already had the
 313     // current location subtracted from it.
 314     // We just check that Val fits in 17 bits. This misses some cases, but
 315     // should have no false positives.
 316     checkInt(ctx, loc, val, 17, rel);
 317     write16le(loc, val);
 318     break;
 319   case R_386_32:
 320   case R_386_GOT32:
 321   case R_386_GOT32X:
 322   case R_386_GOTOFF:
 323   case R_386_GOTPC:
 324   case R_386_PC32:
 325   case R_386_PLT32:
 326   case R_386_RELATIVE:
 327   case R_386_TLS_GOTDESC:
 328   case R_386_TLS_DESC_CALL:
 329   case R_386_TLS_DTPMOD32:
 330   case R_386_TLS_DTPOFF32:
 331   case R_386_TLS_GD:
 332   case R_386_TLS_GOTIE:
 333   case R_386_TLS_IE:
 334   case R_386_TLS_LDM:
 335   case R_386_TLS_LDO_32:
 336   case R_386_TLS_LE:
 337   case R_386_TLS_LE_32:
 338   case R_386_TLS_TPOFF:
 339   case R_386_TLS_TPOFF32:
 340     checkInt(ctx, loc, val, 32, rel);
 341     write32le(loc, val);
 342     break;
 343   case R_386_TLS_DESC:
 344     // The addend is stored in the second 32-bit word.
 345     write32le(loc + 4, val);
 346     break;
 347   default:
 348     llvm_unreachable("unknown relocation");
 349   }
 350 }
 351
 352 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
 353                          uint64_t val) const {
 354   if (rel.type == R_386_TLS_GD) {
 355     // Convert (loc[-2] == 0x04)
 356     //   leal x@tlsgd(, %ebx, 1), %eax
 357     //   call ___tls_get_addr@plt
 358     // or
 359     //   leal x@tlsgd(%reg), %eax
 360     //   call *___tls_get_addr@got(%reg)
 361     // to
 362     const uint8_t inst[] = {
 363         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
 364         0x81, 0xe8, 0,    0,    0,    0,    // subl x@ntpoff(%ebx), %eax
 365     };
 366     uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
 367     memcpy(w, inst, sizeof(inst));
 368     write32le(w + 8, val);
 369   } else if (rel.type == R_386_TLS_GOTDESC) {
 370     // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
 371     //
 372     // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
 373     if (memcmp(loc - 2, "\x8d\x83", 2)) {
 374       ErrAlways(ctx)
 375           << getErrorLoc(ctx, loc - 2)
 376           << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
 377       return;
 378     }
 379     loc[-1] = 0x05;
 380     write32le(loc, val);
 381   } else {
 382     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
 383     assert(rel.type == R_386_TLS_DESC_CALL);
 384     loc[0] = 0x66;
 385     loc[1] = 0x90;
 386   }
 387 }
 388
 389 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
 390                          uint64_t val) const {
 391   if (rel.type == R_386_TLS_GD) {
 392     // Convert (loc[-2] == 0x04)
 393     //   leal x@tlsgd(, %ebx, 1), %eax
 394     //   call ___tls_get_addr@plt
 395     // or
 396     //   leal x@tlsgd(%reg), %eax
 397     //   call *___tls_get_addr@got(%reg)
 398     const uint8_t inst[] = {
 399         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
 400         0x03, 0x83, 0,    0,    0,    0,    // addl x@gottpoff(%ebx), %eax
 401     };
 402     uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
 403     memcpy(w, inst, sizeof(inst));
 404     write32le(w + 8, val);
 405   } else if (rel.type == R_386_TLS_GOTDESC) {
 406     // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
 407     if (memcmp(loc - 2, "\x8d\x83", 2)) {
 408       ErrAlways(ctx)
 409           << getErrorLoc(ctx, loc - 2)
 410           << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
 411       return;
 412     }
 413     loc[-2] = 0x8b;
 414     write32le(loc, val);
 415   } else {
 416     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
 417     assert(rel.type == R_386_TLS_DESC_CALL);
 418     loc[0] = 0x66;
 419     loc[1] = 0x90;
 420   }
 421 }
 422
 423 // In some conditions, relocations can be optimized to avoid using GOT.
 424 // This function does that for Initial Exec to Local Exec case.
 425 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
 426                          uint64_t val) const {
 427   // Ulrich's document section 6.2 says that @gotntpoff can
 428   // be used with MOVL or ADDL instructions.
 429   // @indntpoff is similar to @gotntpoff, but for use in
 430   // position dependent code.
 431   uint8_t reg = (loc[-1] >> 3) & 7;
 432
 433   if (rel.type == R_386_TLS_IE) {
 434     if (loc[-1] == 0xa1) {
 435       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
 436       // This case is different from the generic case below because
 437       // this is a 5 byte instruction while below is 6 bytes.
 438       loc[-1] = 0xb8;
 439     } else if (loc[-2] == 0x8b) {
 440       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
 441       loc[-2] = 0xc7;
 442       loc[-1] = 0xc0 | reg;
 443     } else {
 444       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
 445       loc[-2] = 0x81;
 446       loc[-1] = 0xc0 | reg;
 447     }
 448   } else {
 449     assert(rel.type == R_386_TLS_GOTIE);
 450     if (loc[-2] == 0x8b) {
 451       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
 452       loc[-2] = 0xc7;
 453       loc[-1] = 0xc0 | reg;
 454     } else {
 455       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
 456       loc[-2] = 0x8d;
 457       loc[-1] = 0x80 | (reg << 3) | reg;
 458     }
 459   }
 460   write32le(loc, val);
 461 }
 462
 463 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
 464                          uint64_t val) const {
 465   if (rel.type == R_386_TLS_LDO_32) {
 466     write32le(loc, val);
 467     return;
 468   }
 469
 470   if (loc[4] == 0xe8) {
 471     // Convert
 472     //   leal x(%reg),%eax
 473     //   call ___tls_get_addr@plt
 474     // to
 475     const uint8_t inst[] = {
 476         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
 477         0x90,                               // nop
 478         0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
 479     };
 480     memcpy(loc - 2, inst, sizeof(inst));
 481     return;
 482   }
 483
 484   // Convert
 485   //   leal x(%reg),%eax
 486   //   call *___tls_get_addr@got(%reg)
 487   // to
 488   const uint8_t inst[] = {
 489       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
 490       0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
 491   };
 492   memcpy(loc - 2, inst, sizeof(inst));
 493 }
 494
 495 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
 496   uint64_t secAddr = sec.getOutputSection()->addr;
 497   if (auto *s = dyn_cast<InputSection>(&sec))
 498     secAddr += s->outSecOff;
 499   for (const Relocation &rel : sec.relocs()) {
 500     uint8_t *loc = buf + rel.offset;
 501     const uint64_t val =
 502         SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), 32);
 503     switch (rel.expr) {
 504     case R_RELAX_TLS_GD_TO_IE_GOTPLT:
 505       relaxTlsGdToIe(loc, rel, val);
 506       continue;
 507     case R_RELAX_TLS_GD_TO_LE:
 508     case R_RELAX_TLS_GD_TO_LE_NEG:
 509       relaxTlsGdToLe(loc, rel, val);
 510       continue;
 511     case R_RELAX_TLS_LD_TO_LE:
 512       relaxTlsLdToLe(loc, rel, val);
 513       break;
 514     case R_RELAX_TLS_IE_TO_LE:
 515       relaxTlsIeToLe(loc, rel, val);
 516       continue;
 517     default:
 518       relocate(loc, rel, val);
 519       break;
 520     }
 521   }
 522 }
 523
 524 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
 525 // entries containing endbr32 instructions. A PLT entry will be split into two
 526 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
 527 namespace {
 528 class IntelIBT : public X86 {
 529 public:
 530   IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; }
 531   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
 532   void writePlt(uint8_t *buf, const Symbol &sym,
 533                 uint64_t pltEntryAddr) const override;
 534   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
 535
 536   static const unsigned IBTPltHeaderSize = 16;
 537 };
 538 } // namespace
 539
 540 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
 541   uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize +
 542                 s.getPltIdx(ctx) * pltEntrySize;
 543   write32le(buf, va);
 544 }
 545
 546 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
 547                         uint64_t /*pltEntryAddr*/) const {
 548   if (ctx.arg.isPic) {
 549     const uint8_t inst[] = {
 550         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
 551         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
 552         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
 553     };
 554     memcpy(buf, inst, sizeof(inst));
 555     write32le(buf + 6, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
 556     return;
 557   }
 558
 559   const uint8_t inst[] = {
 560       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
 561       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
 562       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
 563   };
 564   memcpy(buf, inst, sizeof(inst));
 565   write32le(buf + 6, sym.getGotPltVA(ctx));
 566 }
 567
 568 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
 569   writePltHeader(buf);
 570   buf += IBTPltHeaderSize;
 571
 572   const uint8_t inst[] = {
 573       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
 574       0x68, 0,    0,    0,    0, // pushl $reloc_offset
 575       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
 576       0x66, 0x90,                // nop
 577   };
 578
 579   for (size_t i = 0; i < numEntries; ++i) {
 580     memcpy(buf, inst, sizeof(inst));
 581     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
 582     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
 583     buf += sizeof(inst);
 584   }
 585 }
 586
 587 namespace {
 588 class RetpolinePic : public X86 {
 589 public:
 590   RetpolinePic(Ctx &);
 591   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
 592   void writePltHeader(uint8_t *buf) const override;
 593   void writePlt(uint8_t *buf, const Symbol &sym,
 594                 uint64_t pltEntryAddr) const override;
 595 };
 596
 597 class RetpolineNoPic : public X86 {
 598 public:
 599   RetpolineNoPic(Ctx &);
 600   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
 601   void writePltHeader(uint8_t *buf) const override;
 602   void writePlt(uint8_t *buf, const Symbol &sym,
 603                 uint64_t pltEntryAddr) const override;
 604 };
 605 } // namespace
 606
 607 RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) {
 608   pltHeaderSize = 48;
 609   pltEntrySize = 32;
 610   ipltEntrySize = 32;
 611 }
 612
 613 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
 614   write32le(buf, s.getPltVA(ctx) + 17);
 615 }
 616
 617 void RetpolinePic::writePltHeader(uint8_t *buf) const {
 618   const uint8_t insn[] = {
 619       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
 620       0x50,                                     // 6:    pushl %eax
 621       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
 622       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
 623       0xf3, 0x90,                               // 12: loop: pause
 624       0x0f, 0xae, 0xe8,                         // 14:   lfence
 625       0xeb, 0xf9,                               // 17:   jmp loop
 626       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
 627       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
 628       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
 629       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
 630       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
 631       0x59,                                     // 2d:   pop %ecx
 632       0xc3,                                     // 2e:   ret
 633       0xcc,                                     // 2f:   int3; padding
 634   };
 635   memcpy(buf, insn, sizeof(insn));
 636 }
 637
 638 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
 639                             uint64_t pltEntryAddr) const {
 640   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
 641   const uint8_t insn[] = {
 642       0x50,                            // pushl %eax
 643       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
 644       0xe8, 0,    0,    0,    0,       // call plt+0x20
 645       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
 646       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
 647       0xe9, 0,    0,    0,    0,       // jmp plt+0
 648       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
 649   };
 650   memcpy(buf, insn, sizeof(insn));
 651
 652   uint32_t ebx = ctx.in.gotPlt->getVA();
 653   unsigned off = pltEntryAddr - ctx.in.plt->getVA();
 654   write32le(buf + 3, sym.getGotPltVA(ctx) - ebx);
 655   write32le(buf + 8, -off - 12 + 32);
 656   write32le(buf + 13, -off - 17 + 18);
 657   write32le(buf + 18, relOff);
 658   write32le(buf + 23, -off - 27);
 659 }
 660
 661 RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) {
 662   pltHeaderSize = 48;
 663   pltEntrySize = 32;
 664   ipltEntrySize = 32;
 665 }
 666
 667 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
 668   write32le(buf, s.getPltVA(ctx) + 16);
 669 }
 670
 671 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
 672   const uint8_t insn[] = {
 673       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
 674       0x50,                            // 6:    pushl %eax
 675       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
 676       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
 677       0xf3, 0x90,                      // 11: loop: pause
 678       0x0f, 0xae, 0xe8,                // 13:   lfence
 679       0xeb, 0xf9,                      // 16:   jmp loop
 680       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
 681       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
 682       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
 683       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
 684       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
 685       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
 686       0x59,                            // 2d:   pop %ecx
 687       0xc3,                            // 2e:   ret
 688       0xcc,                            // 2f:   int3; padding
 689   };
 690   memcpy(buf, insn, sizeof(insn));
 691
 692   uint32_t gotPlt = ctx.in.gotPlt->getVA();
 693   write32le(buf + 2, gotPlt + 4);
 694   write32le(buf + 8, gotPlt + 8);
 695 }
 696
 697 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
 698                               uint64_t pltEntryAddr) const {
 699   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
 700   const uint8_t insn[] = {
 701       0x50,                         // 0:  pushl %eax
 702       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
 703       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
 704       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
 705       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
 706       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
 707       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
 708       0xcc,                         // 1f: int3; padding
 709   };
 710   memcpy(buf, insn, sizeof(insn));
 711
 712   unsigned off = pltEntryAddr - ctx.in.plt->getVA();
 713   write32le(buf + 2, sym.getGotPltVA(ctx));
 714   write32le(buf + 7, -off - 11 + 32);
 715   write32le(buf + 12, -off - 16 + 17);
 716   write32le(buf + 17, relOff);
 717   write32le(buf + 22, -off - 26);
 718 }
 719
 720 void elf::setX86TargetInfo(Ctx &ctx) {
 721   if (ctx.arg.zRetpolineplt) {
 722     if (ctx.arg.isPic)
 723       ctx.target.reset(new RetpolinePic(ctx));
 724     else
 725       ctx.target.reset(new RetpolineNoPic(ctx));
 726     return;
 727   }
 728
 729   if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
 730     ctx.target.reset(new IntelIBT(ctx));
 731   else
 732     ctx.target.reset(new X86(ctx));
 733 }