llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp

   1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "MCTargetDesc/X86BaseInfo.h"
  10 #include "MCTargetDesc/X86FixupKinds.h"
  11 #include "MCTargetDesc/X86EncodingOptimization.h"
  12 #include "llvm/ADT/StringSwitch.h"
  13 #include "llvm/BinaryFormat/ELF.h"
  14 #include "llvm/BinaryFormat/MachO.h"
  15 #include "llvm/MC/MCAsmBackend.h"
  16 #include "llvm/MC/MCAsmLayout.h"
  17 #include "llvm/MC/MCAssembler.h"
  18 #include "llvm/MC/MCCodeEmitter.h"
  19 #include "llvm/MC/MCContext.h"
  20 #include "llvm/MC/MCDwarf.h"
  21 #include "llvm/MC/MCELFObjectWriter.h"
  22 #include "llvm/MC/MCExpr.h"
  23 #include "llvm/MC/MCFixupKindInfo.h"
  24 #include "llvm/MC/MCInst.h"
  25 #include "llvm/MC/MCInstrInfo.h"
  26 #include "llvm/MC/MCMachObjectWriter.h"
  27 #include "llvm/MC/MCObjectStreamer.h"
  28 #include "llvm/MC/MCObjectWriter.h"
  29 #include "llvm/MC/MCRegisterInfo.h"
  30 #include "llvm/MC/MCSectionMachO.h"
  31 #include "llvm/MC/MCSubtargetInfo.h"
  32 #include "llvm/MC/MCValue.h"
  33 #include "llvm/MC/TargetRegistry.h"
  34 #include "llvm/Support/CommandLine.h"
  35 #include "llvm/Support/ErrorHandling.h"
  36 #include "llvm/Support/raw_ostream.h"
  37
  38 using namespace llvm;
  39
  40 namespace {
  41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
  42 class X86AlignBranchKind {
  43 private:
  44   uint8_t AlignBranchKind = 0;
  45
  46 public:
  47   void operator=(const std::string &Val) {
  48     if (Val.empty())
  49       return;
  50     SmallVector<StringRef, 6> BranchTypes;
  51     StringRef(Val).split(BranchTypes, '+', -1, false);
  52     for (auto BranchType : BranchTypes) {
  53       if (BranchType == "fused")
  54         addKind(X86::AlignBranchFused);
  55       else if (BranchType == "jcc")
  56         addKind(X86::AlignBranchJcc);
  57       else if (BranchType == "jmp")
  58         addKind(X86::AlignBranchJmp);
  59       else if (BranchType == "call")
  60         addKind(X86::AlignBranchCall);
  61       else if (BranchType == "ret")
  62         addKind(X86::AlignBranchRet);
  63       else if (BranchType == "indirect")
  64         addKind(X86::AlignBranchIndirect);
  65       else {
  66         errs() << "invalid argument " << BranchType.str()
  67                << " to -x86-align-branch=; each element must be one of: fused, "
  68                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
  69       }
  70     }
  71   }
  72
  73   operator uint8_t() const { return AlignBranchKind; }
  74   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
  75 };
  76
  77 X86AlignBranchKind X86AlignBranchKindLoc;
  78
  79 cl::opt<unsigned> X86AlignBranchBoundary(
  80     "x86-align-branch-boundary", cl::init(0),
  81     cl::desc(
  82         "Control how the assembler should align branches with NOP. If the "
  83         "boundary's size is not 0, it should be a power of 2 and no less "
  84         "than 32. Branches will be aligned to prevent from being across or "
  85         "against the boundary of specified size. The default value 0 does not "
  86         "align branches."));
  87
  88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
  89     "x86-align-branch",
  90     cl::desc(
  91         "Specify types of branches to align (plus separated list of types):"
  92              "\njcc      indicates conditional jumps"
  93              "\nfused    indicates fused conditional jumps"
  94              "\njmp      indicates direct unconditional jumps"
  95              "\ncall     indicates direct and indirect calls"
  96              "\nret      indicates rets"
  97              "\nindirect indicates indirect unconditional jumps"),
  98     cl::location(X86AlignBranchKindLoc));
  99
 100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
 101     "x86-branches-within-32B-boundaries", cl::init(false),
 102     cl::desc(
 103         "Align selected instructions to mitigate negative performance impact "
 104         "of Intel's micro code update for errata skx102.  May break "
 105         "assumptions about labels corresponding to particular instructions, "
 106         "and should be used with caution."));
 107
 108 cl::opt<unsigned> X86PadMaxPrefixSize(
 109     "x86-pad-max-prefix-size", cl::init(0),
 110     cl::desc("Maximum number of prefixes to use for padding"));
 111
 112 cl::opt<bool> X86PadForAlign(
 113     "x86-pad-for-align", cl::init(false), cl::Hidden,
 114     cl::desc("Pad previous instructions to implement align directives"));
 115
 116 cl::opt<bool> X86PadForBranchAlign(
 117     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
 118     cl::desc("Pad previous instructions to implement branch alignment"));
 119
 120 class X86AsmBackend : public MCAsmBackend {
 121   const MCSubtargetInfo &STI;
 122   std::unique_ptr<const MCInstrInfo> MCII;
 123   X86AlignBranchKind AlignBranchType;
 124   Align AlignBoundary;
 125   unsigned TargetPrefixMax = 0;
 126
 127   MCInst PrevInst;
 128   MCBoundaryAlignFragment *PendingBA = nullptr;
 129   std::pair<MCFragment *, size_t> PrevInstPosition;
 130   bool CanPadInst = false;
 131
 132   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
 133   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
 134   bool needAlign(const MCInst &Inst) const;
 135   bool canPadBranches(MCObjectStreamer &OS) const;
 136   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
 137
 138 public:
 139   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
 140       : MCAsmBackend(llvm::endianness::little), STI(STI),
 141         MCII(T.createMCInstrInfo()) {
 142     if (X86AlignBranchWithin32BBoundaries) {
 143       // At the moment, this defaults to aligning fused branches, unconditional
 144       // jumps, and (unfused) conditional jumps with nops.  Both the
 145       // instructions aligned and the alignment method (nop vs prefix) may
 146       // change in the future.
 147       AlignBoundary = assumeAligned(32);
 148       AlignBranchType.addKind(X86::AlignBranchFused);
 149       AlignBranchType.addKind(X86::AlignBranchJcc);
 150       AlignBranchType.addKind(X86::AlignBranchJmp);
 151     }
 152     // Allow overriding defaults set by main flag
 153     if (X86AlignBranchBoundary.getNumOccurrences())
 154       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
 155     if (X86AlignBranch.getNumOccurrences())
 156       AlignBranchType = X86AlignBranchKindLoc;
 157     if (X86PadMaxPrefixSize.getNumOccurrences())
 158       TargetPrefixMax = X86PadMaxPrefixSize;
 159   }
 160
 161   bool allowAutoPadding() const override;
 162   bool allowEnhancedRelaxation() const override;
 163   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
 164                             const MCSubtargetInfo &STI) override;
 165   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
 166
 167   unsigned getNumFixupKinds() const override {
 168     return X86::NumTargetFixupKinds;
 169   }
 170
 171   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
 172
 173   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
 174
 175   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
 176                              const MCValue &Target) override;
 177
 178   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
 179                   const MCValue &Target, MutableArrayRef<char> Data,
 180                   uint64_t Value, bool IsResolved,
 181                   const MCSubtargetInfo *STI) const override;
 182
 183   bool mayNeedRelaxation(const MCInst &Inst,
 184                          const MCSubtargetInfo &STI) const override;
 185
 186   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
 187                             const MCRelaxableFragment *DF,
 188                             const MCAsmLayout &Layout) const override;
 189
 190   void relaxInstruction(MCInst &Inst,
 191                         const MCSubtargetInfo &STI) const override;
 192
 193   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
 194                                    MCCodeEmitter &Emitter,
 195                                    unsigned &RemainingSize) const;
 196
 197   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
 198                                unsigned &RemainingSize) const;
 199
 200   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
 201                               unsigned &RemainingSize) const;
 202
 203   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
 204
 205   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
 206
 207   bool writeNopData(raw_ostream &OS, uint64_t Count,
 208                     const MCSubtargetInfo *STI) const override;
 209 };
 210 } // end anonymous namespace
 211
 212 static bool isRelaxableBranch(unsigned Opcode) {
 213   return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
 214 }
 215
 216 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
 217                                        bool Is16BitMode = false) {
 218   switch (Opcode) {
 219   default:
 220     llvm_unreachable("invalid opcode for branch");
 221   case X86::JCC_1:
 222     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
 223   case X86::JMP_1:
 224     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
 225   }
 226 }
 227
 228 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
 229   unsigned Opcode = MI.getOpcode();
 230   return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
 231                                    : X86::getOpcodeForLongImmediateForm(Opcode);
 232 }
 233
 234 static X86::CondCode getCondFromBranch(const MCInst &MI,
 235                                        const MCInstrInfo &MCII) {
 236   unsigned Opcode = MI.getOpcode();
 237   switch (Opcode) {
 238   default:
 239     return X86::COND_INVALID;
 240   case X86::JCC_1: {
 241     const MCInstrDesc &Desc = MCII.get(Opcode);
 242     return static_cast<X86::CondCode>(
 243         MI.getOperand(Desc.getNumOperands() - 1).getImm());
 244   }
 245   }
 246 }
 247
 248 static X86::SecondMacroFusionInstKind
 249 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
 250   X86::CondCode CC = getCondFromBranch(MI, MCII);
 251   return classifySecondCondCodeInMacroFusion(CC);
 252 }
 253
 254 /// Check if the instruction uses RIP relative addressing.
 255 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
 256   unsigned Opcode = MI.getOpcode();
 257   const MCInstrDesc &Desc = MCII.get(Opcode);
 258   uint64_t TSFlags = Desc.TSFlags;
 259   unsigned CurOp = X86II::getOperandBias(Desc);
 260   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
 261   if (MemoryOperand < 0)
 262     return false;
 263   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
 264   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
 265   return (BaseReg == X86::RIP);
 266 }
 267
 268 /// Check if the instruction is a prefix.
 269 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
 270   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
 271 }
 272
 273 /// Check if the instruction is valid as the first instruction in macro fusion.
 274 static bool isFirstMacroFusibleInst(const MCInst &Inst,
 275                                     const MCInstrInfo &MCII) {
 276   // An Intel instruction with RIP relative addressing is not macro fusible.
 277   if (isRIPRelative(Inst, MCII))
 278     return false;
 279   X86::FirstMacroFusionInstKind FIK =
 280       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
 281   return FIK != X86::FirstMacroFusionInstKind::Invalid;
 282 }
 283
 284 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
 285 /// get a better peformance in some cases. Here, we determine which prefix is
 286 /// the most suitable.
 287 ///
 288 /// If the instruction has a segment override prefix, use the existing one.
 289 /// If the target is 64-bit, use the CS.
 290 /// If the target is 32-bit,
 291 ///   - If the instruction has a ESP/EBP base register, use SS.
 292 ///   - Otherwise use DS.
 293 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
 294   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
 295          "Prefixes can be added only in 32-bit or 64-bit mode.");
 296   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
 297   uint64_t TSFlags = Desc.TSFlags;
 298
 299   // Determine where the memory operand starts, if present.
 300   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
 301   if (MemoryOperand != -1)
 302     MemoryOperand += X86II::getOperandBias(Desc);
 303
 304   unsigned SegmentReg = 0;
 305   if (MemoryOperand >= 0) {
 306     // Check for explicit segment override on memory operand.
 307     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
 308   }
 309
 310   switch (TSFlags & X86II::FormMask) {
 311   default:
 312     break;
 313   case X86II::RawFrmDstSrc: {
 314     // Check segment override opcode prefix as needed (not for %ds).
 315     if (Inst.getOperand(2).getReg() != X86::DS)
 316       SegmentReg = Inst.getOperand(2).getReg();
 317     break;
 318   }
 319   case X86II::RawFrmSrc: {
 320     // Check segment override opcode prefix as needed (not for %ds).
 321     if (Inst.getOperand(1).getReg() != X86::DS)
 322       SegmentReg = Inst.getOperand(1).getReg();
 323     break;
 324   }
 325   case X86II::RawFrmMemOffs: {
 326     // Check segment override opcode prefix as needed.
 327     SegmentReg = Inst.getOperand(1).getReg();
 328     break;
 329   }
 330   }
 331
 332   if (SegmentReg != 0)
 333     return X86::getSegmentOverridePrefixForReg(SegmentReg);
 334
 335   if (STI.hasFeature(X86::Is64Bit))
 336     return X86::CS_Encoding;
 337
 338   if (MemoryOperand >= 0) {
 339     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
 340     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
 341     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
 342       return X86::SS_Encoding;
 343   }
 344   return X86::DS_Encoding;
 345 }
 346
 347 /// Check if the two instructions will be macro-fused on the target cpu.
 348 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
 349   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
 350   if (!InstDesc.isConditionalBranch())
 351     return false;
 352   if (!isFirstMacroFusibleInst(Cmp, *MCII))
 353     return false;
 354   const X86::FirstMacroFusionInstKind CmpKind =
 355       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
 356   const X86::SecondMacroFusionInstKind BranchKind =
 357       classifySecondInstInMacroFusion(Jcc, *MCII);
 358   return X86::isMacroFused(CmpKind, BranchKind);
 359 }
 360
 361 /// Check if the instruction has a variant symbol operand.
 362 static bool hasVariantSymbol(const MCInst &MI) {
 363   for (auto &Operand : MI) {
 364     if (!Operand.isExpr())
 365       continue;
 366     const MCExpr &Expr = *Operand.getExpr();
 367     if (Expr.getKind() == MCExpr::SymbolRef &&
 368         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
 369       return true;
 370   }
 371   return false;
 372 }
 373
 374 bool X86AsmBackend::allowAutoPadding() const {
 375   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
 376 }
 377
 378 bool X86AsmBackend::allowEnhancedRelaxation() const {
 379   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
 380 }
 381
 382 /// X86 has certain instructions which enable interrupts exactly one
 383 /// instruction *after* the instruction which stores to SS.  Return true if the
 384 /// given instruction has such an interrupt delay slot.
 385 static bool hasInterruptDelaySlot(const MCInst &Inst) {
 386   switch (Inst.getOpcode()) {
 387   case X86::POPSS16:
 388   case X86::POPSS32:
 389   case X86::STI:
 390     return true;
 391
 392   case X86::MOV16sr:
 393   case X86::MOV32sr:
 394   case X86::MOV64sr:
 395   case X86::MOV16sm:
 396     if (Inst.getOperand(0).getReg() == X86::SS)
 397       return true;
 398     break;
 399   }
 400   return false;
 401 }
 402
 403 /// Check if the instruction to be emitted is right after any data.
 404 static bool
 405 isRightAfterData(MCFragment *CurrentFragment,
 406                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
 407   MCFragment *F = CurrentFragment;
 408   // Empty data fragments may be created to prevent further data being
 409   // added into the previous fragment, we need to skip them since they
 410   // have no contents.
 411   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
 412     if (cast<MCDataFragment>(F)->getContents().size() != 0)
 413       break;
 414
 415   // Since data is always emitted into a DataFragment, our check strategy is
 416   // simple here.
 417   //   - If the fragment is a DataFragment
 418   //     - If it's not the fragment where the previous instruction is,
 419   //       returns true.
 420   //     - If it's the fragment holding the previous instruction but its
 421   //       size changed since the previous instruction was emitted into
 422   //       it, returns true.
 423   //     - Otherwise returns false.
 424   //   - If the fragment is not a DataFragment, returns false.
 425   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
 426     return DF != PrevInstPosition.first ||
 427            DF->getContents().size() != PrevInstPosition.second;
 428
 429   return false;
 430 }
 431
 432 /// \returns the fragment size if it has instructions, otherwise returns 0.
 433 static size_t getSizeForInstFragment(const MCFragment *F) {
 434   if (!F || !F->hasInstructions())
 435     return 0;
 436   // MCEncodedFragmentWithContents being templated makes this tricky.
 437   switch (F->getKind()) {
 438   default:
 439     llvm_unreachable("Unknown fragment with instructions!");
 440   case MCFragment::FT_Data:
 441     return cast<MCDataFragment>(*F).getContents().size();
 442   case MCFragment::FT_Relaxable:
 443     return cast<MCRelaxableFragment>(*F).getContents().size();
 444   case MCFragment::FT_CompactEncodedInst:
 445     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
 446   }
 447 }
 448
 449 /// Return true if we can insert NOP or prefixes automatically before the
 450 /// the instruction to be emitted.
 451 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
 452   if (hasVariantSymbol(Inst))
 453     // Linker may rewrite the instruction with variant symbol operand(e.g.
 454     // TLSCALL).
 455     return false;
 456
 457   if (hasInterruptDelaySlot(PrevInst))
 458     // If this instruction follows an interrupt enabling instruction with a one
 459     // instruction delay, inserting a nop would change behavior.
 460     return false;
 461
 462   if (isPrefix(PrevInst, *MCII))
 463     // If this instruction follows a prefix, inserting a nop/prefix would change
 464     // semantic.
 465     return false;
 466
 467   if (isPrefix(Inst, *MCII))
 468     // If this instruction is a prefix, inserting a prefix would change
 469     // semantic.
 470     return false;
 471
 472   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
 473     // If this instruction follows any data, there is no clear
 474     // instruction boundary, inserting a nop/prefix would change semantic.
 475     return false;
 476
 477   return true;
 478 }
 479
 480 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
 481   if (!OS.getAllowAutoPadding())
 482     return false;
 483   assert(allowAutoPadding() && "incorrect initialization!");
 484
 485   // We only pad in text section.
 486   if (!OS.getCurrentSectionOnly()->getKind().isText())
 487     return false;
 488
 489   // To be Done: Currently don't deal with Bundle cases.
 490   if (OS.getAssembler().isBundlingEnabled())
 491     return false;
 492
 493   // Branches only need to be aligned in 32-bit or 64-bit mode.
 494   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
 495     return false;
 496
 497   return true;
 498 }
 499
 500 /// Check if the instruction operand needs to be aligned.
 501 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
 502   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
 503   return (Desc.isConditionalBranch() &&
 504           (AlignBranchType & X86::AlignBranchJcc)) ||
 505          (Desc.isUnconditionalBranch() &&
 506           (AlignBranchType & X86::AlignBranchJmp)) ||
 507          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
 508          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
 509          (Desc.isIndirectBranch() &&
 510           (AlignBranchType & X86::AlignBranchIndirect));
 511 }
 512
 513 /// Insert BoundaryAlignFragment before instructions to align branches.
 514 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
 515                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
 516   CanPadInst = canPadInst(Inst, OS);
 517
 518   if (!canPadBranches(OS))
 519     return;
 520
 521   if (!isMacroFused(PrevInst, Inst))
 522     // Macro fusion doesn't happen indeed, clear the pending.
 523     PendingBA = nullptr;
 524
 525   if (!CanPadInst)
 526     return;
 527
 528   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
 529     // Macro fusion actually happens and there is no other fragment inserted
 530     // after the previous instruction.
 531     //
 532     // Do nothing here since we already inserted a BoudaryAlign fragment when
 533     // we met the first instruction in the fused pair and we'll tie them
 534     // together in emitInstructionEnd.
 535     //
 536     // Note: When there is at least one fragment, such as MCAlignFragment,
 537     // inserted after the previous instruction, e.g.
 538     //
 539     // \code
 540     //   cmp %rax %rcx
 541     //   .align 16
 542     //   je .Label0
 543     // \ endcode
 544     //
 545     // We will treat the JCC as a unfused branch although it may be fused
 546     // with the CMP.
 547     return;
 548   }
 549
 550   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
 551                           isFirstMacroFusibleInst(Inst, *MCII))) {
 552     // If we meet a unfused branch or the first instuction in a fusiable pair,
 553     // insert a BoundaryAlign fragment.
 554     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
 555   }
 556 }
 557
 558 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
 559 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
 560   PrevInst = Inst;
 561   MCFragment *CF = OS.getCurrentFragment();
 562   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
 563   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
 564     F->setAllowAutoPadding(CanPadInst);
 565
 566   if (!canPadBranches(OS))
 567     return;
 568
 569   if (!needAlign(Inst) || !PendingBA)
 570     return;
 571
 572   // Tie the aligned instructions into a pending BoundaryAlign.
 573   PendingBA->setLastFragment(CF);
 574   PendingBA = nullptr;
 575
 576   // We need to ensure that further data isn't added to the current
 577   // DataFragment, so that we can get the size of instructions later in
 578   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
 579   // DataFragment.
 580   if (isa_and_nonnull<MCDataFragment>(CF))
 581     OS.insert(new MCDataFragment());
 582
 583   // Update the maximum alignment on the current section if necessary.
 584   MCSection *Sec = OS.getCurrentSectionOnly();
 585   Sec->ensureMinAlignment(AlignBoundary);
 586 }
 587
 588 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
 589   if (STI.getTargetTriple().isOSBinFormatELF()) {
 590     unsigned Type;
 591     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
 592       Type = llvm::StringSwitch<unsigned>(Name)
 593 #define ELF_RELOC(X, Y) .Case(#X, Y)
 594 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
 595 #undef ELF_RELOC
 596                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
 597                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
 598                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
 599                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
 600                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
 601                  .Default(-1u);
 602     } else {
 603       Type = llvm::StringSwitch<unsigned>(Name)
 604 #define ELF_RELOC(X, Y) .Case(#X, Y)
 605 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
 606 #undef ELF_RELOC
 607                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
 608                  .Case("BFD_RELOC_8", ELF::R_386_8)
 609                  .Case("BFD_RELOC_16", ELF::R_386_16)
 610                  .Case("BFD_RELOC_32", ELF::R_386_32)
 611                  .Default(-1u);
 612     }
 613     if (Type == -1u)
 614       return std::nullopt;
 615     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
 616   }
 617   return MCAsmBackend::getFixupKind(Name);
 618 }
 619
 620 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
 621   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
 622       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 623       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 624       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 625       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 626       {"reloc_signed_4byte", 0, 32, 0},
 627       {"reloc_signed_4byte_relax", 0, 32, 0},
 628       {"reloc_global_offset_table", 0, 32, 0},
 629       {"reloc_global_offset_table8", 0, 64, 0},
 630       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 631   };
 632
 633   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
 634   // do not require any extra processing.
 635   if (Kind >= FirstLiteralRelocationKind)
 636     return MCAsmBackend::getFixupKindInfo(FK_NONE);
 637
 638   if (Kind < FirstTargetFixupKind)
 639     return MCAsmBackend::getFixupKindInfo(Kind);
 640
 641   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
 642          "Invalid kind!");
 643   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
 644   return Infos[Kind - FirstTargetFixupKind];
 645 }
 646
 647 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
 648                                           const MCFixup &Fixup,
 649                                           const MCValue &) {
 650   return Fixup.getKind() >= FirstLiteralRelocationKind;
 651 }
 652
 653 static unsigned getFixupKindSize(unsigned Kind) {
 654   switch (Kind) {
 655   default:
 656     llvm_unreachable("invalid fixup kind!");
 657   case FK_NONE:
 658     return 0;
 659   case FK_PCRel_1:
 660   case FK_SecRel_1:
 661   case FK_Data_1:
 662     return 1;
 663   case FK_PCRel_2:
 664   case FK_SecRel_2:
 665   case FK_Data_2:
 666     return 2;
 667   case FK_PCRel_4:
 668   case X86::reloc_riprel_4byte:
 669   case X86::reloc_riprel_4byte_relax:
 670   case X86::reloc_riprel_4byte_relax_rex:
 671   case X86::reloc_riprel_4byte_movq_load:
 672   case X86::reloc_signed_4byte:
 673   case X86::reloc_signed_4byte_relax:
 674   case X86::reloc_global_offset_table:
 675   case X86::reloc_branch_4byte_pcrel:
 676   case FK_SecRel_4:
 677   case FK_Data_4:
 678     return 4;
 679   case FK_PCRel_8:
 680   case FK_SecRel_8:
 681   case FK_Data_8:
 682   case X86::reloc_global_offset_table8:
 683     return 8;
 684   }
 685 }
 686
 687 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
 688                                const MCValue &Target,
 689                                MutableArrayRef<char> Data,
 690                                uint64_t Value, bool IsResolved,
 691                                const MCSubtargetInfo *STI) const {
 692   unsigned Kind = Fixup.getKind();
 693   if (Kind >= FirstLiteralRelocationKind)
 694     return;
 695   unsigned Size = getFixupKindSize(Kind);
 696
 697   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
 698
 699   int64_t SignedValue = static_cast<int64_t>(Value);
 700   if ((Target.isAbsolute() || IsResolved) &&
 701       getFixupKindInfo(Fixup.getKind()).Flags &
 702       MCFixupKindInfo::FKF_IsPCRel) {
 703     // check that PC relative fixup fits into the fixup size.
 704     if (Size > 0 && !isIntN(Size * 8, SignedValue))
 705       Asm.getContext().reportError(
 706                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
 707                                    " is too large for field of " + Twine(Size) +
 708                                    ((Size == 1) ? " byte." : " bytes."));
 709   } else {
 710     // Check that uppper bits are either all zeros or all ones.
 711     // Specifically ignore overflow/underflow as long as the leakage is
 712     // limited to the lower bits. This is to remain compatible with
 713     // other assemblers.
 714     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
 715            "Value does not fit in the Fixup field");
 716   }
 717
 718   for (unsigned i = 0; i != Size; ++i)
 719     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
 720 }
 721
 722 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
 723                                       const MCSubtargetInfo &STI) const {
 724   unsigned Opcode = MI.getOpcode();
 725   return isRelaxableBranch(Opcode) ||
 726          (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
 727           MI.getOperand(MI.getNumOperands() - 1).isExpr());
 728 }
 729
 730 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
 731                                          uint64_t Value,
 732                                          const MCRelaxableFragment *DF,
 733                                          const MCAsmLayout &Layout) const {
 734   // Relax if the value is too big for a (signed) i8.
 735   return !isInt<8>(Value);
 736 }
 737
 738 // FIXME: Can tblgen help at all here to verify there aren't other instructions
 739 // we can relax?
 740 void X86AsmBackend::relaxInstruction(MCInst &Inst,
 741                                      const MCSubtargetInfo &STI) const {
 742   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
 743   bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
 744   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
 745
 746   if (RelaxedOp == Inst.getOpcode()) {
 747     SmallString<256> Tmp;
 748     raw_svector_ostream OS(Tmp);
 749     Inst.dump_pretty(OS);
 750     OS << "\n";
 751     report_fatal_error("unexpected instruction to relax: " + OS.str());
 752   }
 753
 754   Inst.setOpcode(RelaxedOp);
 755 }
 756
 757 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
 758                                             MCCodeEmitter &Emitter,
 759                                             unsigned &RemainingSize) const {
 760   if (!RF.getAllowAutoPadding())
 761     return false;
 762   // If the instruction isn't fully relaxed, shifting it around might require a
 763   // larger value for one of the fixups then can be encoded.  The outer loop
 764   // will also catch this before moving to the next instruction, but we need to
 765   // prevent padding this single instruction as well.
 766   if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
 767     return false;
 768
 769   const unsigned OldSize = RF.getContents().size();
 770   if (OldSize == 15)
 771     return false;
 772
 773   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
 774   const unsigned RemainingPrefixSize = [&]() -> unsigned {
 775     SmallString<15> Code;
 776     Emitter.emitPrefix(RF.getInst(), Code, STI);
 777     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
 778
 779     // TODO: It turns out we need a decent amount of plumbing for the target
 780     // specific bits to determine number of prefixes its safe to add.  Various
 781     // targets (older chips mostly, but also Atom family) encounter decoder
 782     // stalls with too many prefixes.  For testing purposes, we set the value
 783     // externally for the moment.
 784     unsigned ExistingPrefixSize = Code.size();
 785     if (TargetPrefixMax <= ExistingPrefixSize)
 786       return 0;
 787     return TargetPrefixMax - ExistingPrefixSize;
 788   }();
 789   const unsigned PrefixBytesToAdd =
 790       std::min(MaxPossiblePad, RemainingPrefixSize);
 791   if (PrefixBytesToAdd == 0)
 792     return false;
 793
 794   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
 795
 796   SmallString<256> Code;
 797   Code.append(PrefixBytesToAdd, Prefix);
 798   Code.append(RF.getContents().begin(), RF.getContents().end());
 799   RF.getContents() = Code;
 800
 801   // Adjust the fixups for the change in offsets
 802   for (auto &F : RF.getFixups()) {
 803     F.setOffset(F.getOffset() + PrefixBytesToAdd);
 804   }
 805
 806   RemainingSize -= PrefixBytesToAdd;
 807   return true;
 808 }
 809
 810 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
 811                                                 MCCodeEmitter &Emitter,
 812                                                 unsigned &RemainingSize) const {
 813   if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
 814     // TODO: There are lots of other tricks we could apply for increasing
 815     // encoding size without impacting performance.
 816     return false;
 817
 818   MCInst Relaxed = RF.getInst();
 819   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
 820
 821   SmallVector<MCFixup, 4> Fixups;
 822   SmallString<15> Code;
 823   Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
 824   const unsigned OldSize = RF.getContents().size();
 825   const unsigned NewSize = Code.size();
 826   assert(NewSize >= OldSize && "size decrease during relaxation?");
 827   unsigned Delta = NewSize - OldSize;
 828   if (Delta > RemainingSize)
 829     return false;
 830   RF.setInst(Relaxed);
 831   RF.getContents() = Code;
 832   RF.getFixups() = Fixups;
 833   RemainingSize -= Delta;
 834   return true;
 835 }
 836
 837 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
 838                                            MCCodeEmitter &Emitter,
 839                                            unsigned &RemainingSize) const {
 840   bool Changed = false;
 841   if (RemainingSize != 0)
 842     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
 843   if (RemainingSize != 0)
 844     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
 845   return Changed;
 846 }
 847
 848 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
 849                                  MCAsmLayout &Layout) const {
 850   // See if we can further relax some instructions to cut down on the number of
 851   // nop bytes required for code alignment.  The actual win is in reducing
 852   // instruction count, not number of bytes.  Modern X86-64 can easily end up
 853   // decode limited.  It is often better to reduce the number of instructions
 854   // (i.e. eliminate nops) even at the cost of increasing the size and
 855   // complexity of others.
 856   if (!X86PadForAlign && !X86PadForBranchAlign)
 857     return;
 858
 859   // The processed regions are delimitered by LabeledFragments. -g may have more
 860   // MCSymbols and therefore different relaxation results. X86PadForAlign is
 861   // disabled by default to eliminate the -g vs non -g difference.
 862   DenseSet<MCFragment *> LabeledFragments;
 863   for (const MCSymbol &S : Asm.symbols())
 864     LabeledFragments.insert(S.getFragment(false));
 865
 866   for (MCSection &Sec : Asm) {
 867     if (!Sec.getKind().isText())
 868       continue;
 869
 870     SmallVector<MCRelaxableFragment *, 4> Relaxable;
 871     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
 872       MCFragment &F = *I;
 873
 874       if (LabeledFragments.count(&F))
 875         Relaxable.clear();
 876
 877       if (F.getKind() == MCFragment::FT_Data ||
 878           F.getKind() == MCFragment::FT_CompactEncodedInst)
 879         // Skip and ignore
 880         continue;
 881
 882       if (F.getKind() == MCFragment::FT_Relaxable) {
 883         auto &RF = cast<MCRelaxableFragment>(*I);
 884         Relaxable.push_back(&RF);
 885         continue;
 886       }
 887
 888       auto canHandle = [](MCFragment &F) -> bool {
 889         switch (F.getKind()) {
 890         default:
 891           return false;
 892         case MCFragment::FT_Align:
 893           return X86PadForAlign;
 894         case MCFragment::FT_BoundaryAlign:
 895           return X86PadForBranchAlign;
 896         }
 897       };
 898       // For any unhandled kind, assume we can't change layout.
 899       if (!canHandle(F)) {
 900         Relaxable.clear();
 901         continue;
 902       }
 903
 904 #ifndef NDEBUG
 905       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
 906 #endif
 907       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
 908
 909       // To keep the effects local, prefer to relax instructions closest to
 910       // the align directive.  This is purely about human understandability
 911       // of the resulting code.  If we later find a reason to expand
 912       // particular instructions over others, we can adjust.
 913       MCFragment *FirstChangedFragment = nullptr;
 914       unsigned RemainingSize = OrigSize;
 915       while (!Relaxable.empty() && RemainingSize != 0) {
 916         auto &RF = *Relaxable.pop_back_val();
 917         // Give the backend a chance to play any tricks it wishes to increase
 918         // the encoding size of the given instruction.  Target independent code
 919         // will try further relaxation, but target's may play further tricks.
 920         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
 921           FirstChangedFragment = &RF;
 922
 923         // If we have an instruction which hasn't been fully relaxed, we can't
 924         // skip past it and insert bytes before it.  Changing its starting
 925         // offset might require a larger negative offset than it can encode.
 926         // We don't need to worry about larger positive offsets as none of the
 927         // possible offsets between this and our align are visible, and the
 928         // ones afterwards aren't changing.
 929         if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
 930           break;
 931       }
 932       Relaxable.clear();
 933
 934       if (FirstChangedFragment) {
 935         // Make sure the offsets for any fragments in the effected range get
 936         // updated.  Note that this (conservatively) invalidates the offsets of
 937         // those following, but this is not required.
 938         Layout.invalidateFragmentsFrom(FirstChangedFragment);
 939       }
 940
 941       // BoundaryAlign explicitly tracks it's size (unlike align)
 942       if (F.getKind() == MCFragment::FT_BoundaryAlign)
 943         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
 944
 945 #ifndef NDEBUG
 946       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
 947       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
 948       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
 949              "can't move start of next fragment!");
 950       assert(FinalSize == RemainingSize && "inconsistent size computation?");
 951 #endif
 952
 953       // If we're looking at a boundary align, make sure we don't try to pad
 954       // its target instructions for some following directive.  Doing so would
 955       // break the alignment of the current boundary align.
 956       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
 957         const MCFragment *LastFragment = BF->getLastFragment();
 958         if (!LastFragment)
 959           continue;
 960         while (&*I != LastFragment)
 961           ++I;
 962       }
 963     }
 964   }
 965
 966   // The layout is done. Mark every fragment as valid.
 967   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
 968     MCSection &Section = *Layout.getSectionOrder()[i];
 969     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
 970     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
 971   }
 972 }
 973
 974 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
 975   if (STI.hasFeature(X86::Is16Bit))
 976     return 4;
 977   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
 978     return 1;
 979   if (STI.hasFeature(X86::TuningFast7ByteNOP))
 980     return 7;
 981   if (STI.hasFeature(X86::TuningFast15ByteNOP))
 982     return 15;
 983   if (STI.hasFeature(X86::TuningFast11ByteNOP))
 984     return 11;
 985   // FIXME: handle 32-bit mode
 986   // 15-bytes is the longest single NOP instruction, but 10-bytes is
 987   // commonly the longest that can be efficiently decoded.
 988   return 10;
 989 }
 990
 991 /// Write a sequence of optimal nops to the output, covering \p Count
 992 /// bytes.
 993 /// \return - true on success, false on failure
 994 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
 995                                  const MCSubtargetInfo *STI) const {
 996   static const char Nops32Bit[10][11] = {
 997       // nop
 998       "\x90",
 999       // xchg %ax,%ax
1000       "\x66\x90",
1001       // nopl (%[re]ax)
1002       "\x0f\x1f\x00",
1003       // nopl 0(%[re]ax)
1004       "\x0f\x1f\x40\x00",
1005       // nopl 0(%[re]ax,%[re]ax,1)
1006       "\x0f\x1f\x44\x00\x00",
1007       // nopw 0(%[re]ax,%[re]ax,1)
1008       "\x66\x0f\x1f\x44\x00\x00",
1009       // nopl 0L(%[re]ax)
1010       "\x0f\x1f\x80\x00\x00\x00\x00",
1011       // nopl 0L(%[re]ax,%[re]ax,1)
1012       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1013       // nopw 0L(%[re]ax,%[re]ax,1)
1014       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1015       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1016       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1017   };
1018
1019   // 16-bit mode uses different nop patterns than 32-bit.
1020   static const char Nops16Bit[4][11] = {
1021       // nop
1022       "\x90",
1023       // xchg %eax,%eax
1024       "\x66\x90",
1025       // lea 0(%si),%si
1026       "\x8d\x74\x00",
1027       // lea 0w(%si),%si
1028       "\x8d\xb4\x00\x00",
1029   };
1030
1031   const char(*Nops)[11] =
1032       STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1033
1034   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1035
1036   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1037   // length.
1038   do {
1039     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1040     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1041     for (uint8_t i = 0; i < Prefixes; i++)
1042       OS << '\x66';
1043     const uint8_t Rest = ThisNopLength - Prefixes;
1044     if (Rest != 0)
1045       OS.write(Nops[Rest - 1], Rest);
1046     Count -= ThisNopLength;
1047   } while (Count != 0);
1048
1049   return true;
1050 }
1051
1052 /* *** */
1053
1054 namespace {
1055
1056 class ELFX86AsmBackend : public X86AsmBackend {
1057 public:
1058   uint8_t OSABI;
1059   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1060       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1061 };
1062
1063 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1064 public:
1065   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1066                       const MCSubtargetInfo &STI)
1067     : ELFX86AsmBackend(T, OSABI, STI) {}
1068
1069   std::unique_ptr<MCObjectTargetWriter>
1070   createObjectTargetWriter() const override {
1071     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1072   }
1073 };
1074
1075 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1076 public:
1077   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1078                        const MCSubtargetInfo &STI)
1079       : ELFX86AsmBackend(T, OSABI, STI) {}
1080
1081   std::unique_ptr<MCObjectTargetWriter>
1082   createObjectTargetWriter() const override {
1083     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1084                                     ELF::EM_X86_64);
1085   }
1086 };
1087
1088 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1089 public:
1090   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1091                          const MCSubtargetInfo &STI)
1092       : ELFX86AsmBackend(T, OSABI, STI) {}
1093
1094   std::unique_ptr<MCObjectTargetWriter>
1095   createObjectTargetWriter() const override {
1096     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1097                                     ELF::EM_IAMCU);
1098   }
1099 };
1100
1101 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1102 public:
1103   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1104                       const MCSubtargetInfo &STI)
1105     : ELFX86AsmBackend(T, OSABI, STI) {}
1106
1107   std::unique_ptr<MCObjectTargetWriter>
1108   createObjectTargetWriter() const override {
1109     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1110   }
1111 };
1112
1113 class WindowsX86AsmBackend : public X86AsmBackend {
1114   bool Is64Bit;
1115
1116 public:
1117   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1118                        const MCSubtargetInfo &STI)
1119     : X86AsmBackend(T, STI)
1120     , Is64Bit(is64Bit) {
1121   }
1122
1123   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1124     return StringSwitch<std::optional<MCFixupKind>>(Name)
1125         .Case("dir32", FK_Data_4)
1126         .Case("secrel32", FK_SecRel_4)
1127         .Case("secidx", FK_SecRel_2)
1128         .Default(MCAsmBackend::getFixupKind(Name));
1129   }
1130
1131   std::unique_ptr<MCObjectTargetWriter>
1132   createObjectTargetWriter() const override {
1133     return createX86WinCOFFObjectWriter(Is64Bit);
1134   }
1135 };
1136
1137 namespace CU {
1138
1139   /// Compact unwind encoding values.
1140   enum CompactUnwindEncodings {
1141     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1142     /// the return address, then [RE]SP is moved to [RE]BP.
1143     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1144
1145     /// A frameless function with a small constant stack size.
1146     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1147
1148     /// A frameless function with a large constant stack size.
1149     UNWIND_MODE_STACK_IND                  = 0x03000000,
1150
1151     /// No compact unwind encoding is available.
1152     UNWIND_MODE_DWARF                      = 0x04000000,
1153
1154     /// Mask for encoding the frame registers.
1155     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1156
1157     /// Mask for encoding the frameless registers.
1158     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1159   };
1160
1161 } // namespace CU
1162
1163 class DarwinX86AsmBackend : public X86AsmBackend {
1164   const MCRegisterInfo &MRI;
1165
1166   /// Number of registers that can be saved in a compact unwind encoding.
1167   enum { CU_NUM_SAVED_REGS = 6 };
1168
1169   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1170   Triple TT;
1171   bool Is64Bit;
1172
1173   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1174   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1175   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1176 protected:
1177   /// Size of a "push" instruction for the given register.
1178   unsigned PushInstrSize(unsigned Reg) const {
1179     switch (Reg) {
1180       case X86::EBX:
1181       case X86::ECX:
1182       case X86::EDX:
1183       case X86::EDI:
1184       case X86::ESI:
1185       case X86::EBP:
1186       case X86::RBX:
1187       case X86::RBP:
1188         return 1;
1189       case X86::R12:
1190       case X86::R13:
1191       case X86::R14:
1192       case X86::R15:
1193         return 2;
1194     }
1195     return 1;
1196   }
1197
1198 private:
1199   /// Get the compact unwind number for a given register. The number
1200   /// corresponds to the enum lists in compact_unwind_encoding.h.
1201   int getCompactUnwindRegNum(unsigned Reg) const {
1202     static const MCPhysReg CU32BitRegs[7] = {
1203       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1204     };
1205     static const MCPhysReg CU64BitRegs[] = {
1206       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1207     };
1208     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1209     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1210       if (*CURegs == Reg)
1211         return Idx;
1212
1213     return -1;
1214   }
1215
1216   /// Return the registers encoded for a compact encoding with a frame
1217   /// pointer.
1218   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1219     // Encode the registers in the order they were saved --- 3-bits per
1220     // register. The list of saved registers is assumed to be in reverse
1221     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1222     uint32_t RegEnc = 0;
1223     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1224       unsigned Reg = SavedRegs[i];
1225       if (Reg == 0) break;
1226
1227       int CURegNum = getCompactUnwindRegNum(Reg);
1228       if (CURegNum == -1) return ~0U;
1229
1230       // Encode the 3-bit register number in order, skipping over 3-bits for
1231       // each register.
1232       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1233     }
1234
1235     assert((RegEnc & 0x3FFFF) == RegEnc &&
1236            "Invalid compact register encoding!");
1237     return RegEnc;
1238   }
1239
1240   /// Create the permutation encoding used with frameless stacks. It is
1241   /// passed the number of registers to be saved and an array of the registers
1242   /// saved.
1243   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1244     // The saved registers are numbered from 1 to 6. In order to encode the
1245     // order in which they were saved, we re-number them according to their
1246     // place in the register order. The re-numbering is relative to the last
1247     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1248     // that order:
1249     //
1250     //    Orig  Re-Num
1251     //    ----  ------
1252     //     6       6
1253     //     2       2
1254     //     4       3
1255     //     5       3
1256     //
1257     for (unsigned i = 0; i < RegCount; ++i) {
1258       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1259       if (CUReg == -1) return ~0U;
1260       SavedRegs[i] = CUReg;
1261     }
1262
1263     // Reverse the list.
1264     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1265
1266     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1267     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1268       unsigned Countless = 0;
1269       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1270         if (SavedRegs[j] < SavedRegs[i])
1271           ++Countless;
1272
1273       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1274     }
1275
1276     // Take the renumbered values and encode them into a 10-bit number.
1277     uint32_t permutationEncoding = 0;
1278     switch (RegCount) {
1279     case 6:
1280       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1281                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1282                              +     RenumRegs[4];
1283       break;
1284     case 5:
1285       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1286                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1287                              +     RenumRegs[5];
1288       break;
1289     case 4:
1290       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1291                              + 3 * RenumRegs[4] +      RenumRegs[5];
1292       break;
1293     case 3:
1294       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1295                              +     RenumRegs[5];
1296       break;
1297     case 2:
1298       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1299       break;
1300     case 1:
1301       permutationEncoding |=       RenumRegs[5];
1302       break;
1303     }
1304
1305     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1306            "Invalid compact register encoding!");
1307     return permutationEncoding;
1308   }
1309
1310 public:
1311   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1312                       const MCSubtargetInfo &STI)
1313       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1314         Is64Bit(TT.isArch64Bit()) {
1315     memset(SavedRegs, 0, sizeof(SavedRegs));
1316     OffsetSize = Is64Bit ? 8 : 4;
1317     MoveInstrSize = Is64Bit ? 3 : 2;
1318     StackDivide = Is64Bit ? 8 : 4;
1319   }
1320
1321   std::unique_ptr<MCObjectTargetWriter>
1322   createObjectTargetWriter() const override {
1323     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1324     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1325     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1326   }
1327
1328   /// Implementation of algorithm to generate the compact unwind encoding
1329   /// for the CFI instructions.
1330   uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1331                                          const MCContext *Ctxt) const override {
1332     ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1333     if (Instrs.empty()) return 0;
1334     if (!isDarwinCanonicalPersonality(FI->Personality) &&
1335         !Ctxt->emitCompactUnwindNonCanonical())
1336       return CU::UNWIND_MODE_DWARF;
1337
1338     // Reset the saved registers.
1339     unsigned SavedRegIdx = 0;
1340     memset(SavedRegs, 0, sizeof(SavedRegs));
1341
1342     bool HasFP = false;
1343
1344     // Encode that we are using EBP/RBP as the frame pointer.
1345     uint32_t CompactUnwindEncoding = 0;
1346
1347     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1348     unsigned InstrOffset = 0;
1349     unsigned StackAdjust = 0;
1350     unsigned StackSize = 0;
1351     int MinAbsOffset = std::numeric_limits<int>::max();
1352
1353     for (const MCCFIInstruction &Inst : Instrs) {
1354       switch (Inst.getOperation()) {
1355       default:
1356         // Any other CFI directives indicate a frame that we aren't prepared
1357         // to represent via compact unwind, so just bail out.
1358         return CU::UNWIND_MODE_DWARF;
1359       case MCCFIInstruction::OpDefCfaRegister: {
1360         // Defines a frame pointer. E.g.
1361         //
1362         //     movq %rsp, %rbp
1363         //  L0:
1364         //     .cfi_def_cfa_register %rbp
1365         //
1366         HasFP = true;
1367
1368         // If the frame pointer is other than esp/rsp, we do not have a way to
1369         // generate a compact unwinding representation, so bail out.
1370         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1371             (Is64Bit ? X86::RBP : X86::EBP))
1372           return CU::UNWIND_MODE_DWARF;
1373
1374         // Reset the counts.
1375         memset(SavedRegs, 0, sizeof(SavedRegs));
1376         StackAdjust = 0;
1377         SavedRegIdx = 0;
1378         MinAbsOffset = std::numeric_limits<int>::max();
1379         InstrOffset += MoveInstrSize;
1380         break;
1381       }
1382       case MCCFIInstruction::OpDefCfaOffset: {
1383         // Defines a new offset for the CFA. E.g.
1384         //
1385         //  With frame:
1386         //
1387         //     pushq %rbp
1388         //  L0:
1389         //     .cfi_def_cfa_offset 16
1390         //
1391         //  Without frame:
1392         //
1393         //     subq $72, %rsp
1394         //  L0:
1395         //     .cfi_def_cfa_offset 80
1396         //
1397         StackSize = Inst.getOffset() / StackDivide;
1398         break;
1399       }
1400       case MCCFIInstruction::OpOffset: {
1401         // Defines a "push" of a callee-saved register. E.g.
1402         //
1403         //     pushq %r15
1404         //     pushq %r14
1405         //     pushq %rbx
1406         //  L0:
1407         //     subq $120, %rsp
1408         //  L1:
1409         //     .cfi_offset %rbx, -40
1410         //     .cfi_offset %r14, -32
1411         //     .cfi_offset %r15, -24
1412         //
1413         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1414           // If there are too many saved registers, we cannot use a compact
1415           // unwind encoding.
1416           return CU::UNWIND_MODE_DWARF;
1417
1418         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1419         SavedRegs[SavedRegIdx++] = Reg;
1420         StackAdjust += OffsetSize;
1421         MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1422         InstrOffset += PushInstrSize(Reg);
1423         break;
1424       }
1425       }
1426     }
1427
1428     StackAdjust /= StackDivide;
1429
1430     if (HasFP) {
1431       if ((StackAdjust & 0xFF) != StackAdjust)
1432         // Offset was too big for a compact unwind encoding.
1433         return CU::UNWIND_MODE_DWARF;
1434
1435       // We don't attempt to track a real StackAdjust, so if the saved registers
1436       // aren't adjacent to rbp we can't cope.
1437       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1438         return CU::UNWIND_MODE_DWARF;
1439
1440       // Get the encoding of the saved registers when we have a frame pointer.
1441       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1442       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1443
1444       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1445       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1446       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1447     } else {
1448       SubtractInstrIdx += InstrOffset;
1449       ++StackAdjust;
1450
1451       if ((StackSize & 0xFF) == StackSize) {
1452         // Frameless stack with a small stack size.
1453         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1454
1455         // Encode the stack size.
1456         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1457       } else {
1458         if ((StackAdjust & 0x7) != StackAdjust)
1459           // The extra stack adjustments are too big for us to handle.
1460           return CU::UNWIND_MODE_DWARF;
1461
1462         // Frameless stack with an offset too large for us to encode compactly.
1463         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1464
1465         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1466         // instruction.
1467         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1468
1469         // Encode any extra stack adjustments (done via push instructions).
1470         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1471       }
1472
1473       // Encode the number of registers saved. (Reverse the list first.)
1474       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1475       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1476
1477       // Get the encoding of the saved registers when we don't have a frame
1478       // pointer.
1479       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1480       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1481
1482       // Encode the register encoding.
1483       CompactUnwindEncoding |=
1484         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1485     }
1486
1487     return CompactUnwindEncoding;
1488   }
1489 };
1490
1491 } // end anonymous namespace
1492
1493 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1494                                            const MCSubtargetInfo &STI,
1495                                            const MCRegisterInfo &MRI,
1496                                            const MCTargetOptions &Options) {
1497   const Triple &TheTriple = STI.getTargetTriple();
1498   if (TheTriple.isOSBinFormatMachO())
1499     return new DarwinX86AsmBackend(T, MRI, STI);
1500
1501   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1502     return new WindowsX86AsmBackend(T, false, STI);
1503
1504   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1505
1506   if (TheTriple.isOSIAMCU())
1507     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1508
1509   return new ELFX86_32AsmBackend(T, OSABI, STI);
1510 }
1511
1512 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1513                                            const MCSubtargetInfo &STI,
1514                                            const MCRegisterInfo &MRI,
1515                                            const MCTargetOptions &Options) {
1516   const Triple &TheTriple = STI.getTargetTriple();
1517   if (TheTriple.isOSBinFormatMachO())
1518     return new DarwinX86AsmBackend(T, MRI, STI);
1519
1520   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1521     return new WindowsX86AsmBackend(T, true, STI);
1522
1523   if (TheTriple.isUEFI()) {
1524     assert(TheTriple.isOSBinFormatCOFF() &&
1525          "Only COFF format is supported in UEFI environment.");
1526     return new WindowsX86AsmBackend(T, true, STI);
1527   }
1528
1529   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1530
1531   if (TheTriple.isX32())
1532     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1533   return new ELFX86_64AsmBackend(T, OSABI, STI);
1534 }