[llvm] Stop including llvm/ADT/DenseMap.h (NFC)
[llvm-project.git] / llvm / lib / Target / X86 / MCTargetDesc / X86AsmBackend.cpp
blobe01ce4f43143bd5e39f3823c14f32d271b046398
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86EncodingOptimization.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
38 using namespace llvm;
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44 uint8_t AlignBranchKind = 0;
46 public:
47 void operator=(const std::string &Val) {
48 if (Val.empty())
49 return;
50 SmallVector<StringRef, 6> BranchTypes;
51 StringRef(Val).split(BranchTypes, '+', -1, false);
52 for (auto BranchType : BranchTypes) {
53 if (BranchType == "fused")
54 addKind(X86::AlignBranchFused);
55 else if (BranchType == "jcc")
56 addKind(X86::AlignBranchJcc);
57 else if (BranchType == "jmp")
58 addKind(X86::AlignBranchJmp);
59 else if (BranchType == "call")
60 addKind(X86::AlignBranchCall);
61 else if (BranchType == "ret")
62 addKind(X86::AlignBranchRet);
63 else if (BranchType == "indirect")
64 addKind(X86::AlignBranchIndirect);
65 else {
66 errs() << "invalid argument " << BranchType.str()
67 << " to -x86-align-branch=; each element must be one of: fused, "
68 "jcc, jmp, call, ret, indirect.(plus separated)\n";
73 operator uint8_t() const { return AlignBranchKind; }
74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
77 X86AlignBranchKind X86AlignBranchKindLoc;
79 cl::opt<unsigned> X86AlignBranchBoundary(
80 "x86-align-branch-boundary", cl::init(0),
81 cl::desc(
82 "Control how the assembler should align branches with NOP. If the "
83 "boundary's size is not 0, it should be a power of 2 and no less "
84 "than 32. Branches will be aligned to prevent from being across or "
85 "against the boundary of specified size. The default value 0 does not "
86 "align branches."));
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89 "x86-align-branch",
90 cl::desc(
91 "Specify types of branches to align (plus separated list of types):"
92 "\njcc indicates conditional jumps"
93 "\nfused indicates fused conditional jumps"
94 "\njmp indicates direct unconditional jumps"
95 "\ncall indicates direct and indirect calls"
96 "\nret indicates rets"
97 "\nindirect indicates indirect unconditional jumps"),
98 cl::location(X86AlignBranchKindLoc));
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101 "x86-branches-within-32B-boundaries", cl::init(false),
102 cl::desc(
103 "Align selected instructions to mitigate negative performance impact "
104 "of Intel's micro code update for errata skx102. May break "
105 "assumptions about labels corresponding to particular instructions, "
106 "and should be used with caution."));
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109 "x86-pad-max-prefix-size", cl::init(0),
110 cl::desc("Maximum number of prefixes to use for padding"));
112 cl::opt<bool> X86PadForAlign(
113 "x86-pad-for-align", cl::init(false), cl::Hidden,
114 cl::desc("Pad previous instructions to implement align directives"));
116 cl::opt<bool> X86PadForBranchAlign(
117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118 cl::desc("Pad previous instructions to implement branch alignment"));
120 class X86AsmBackend : public MCAsmBackend {
121 const MCSubtargetInfo &STI;
122 std::unique_ptr<const MCInstrInfo> MCII;
123 X86AlignBranchKind AlignBranchType;
124 Align AlignBoundary;
125 unsigned TargetPrefixMax = 0;
127 MCInst PrevInst;
128 MCBoundaryAlignFragment *PendingBA = nullptr;
129 std::pair<MCFragment *, size_t> PrevInstPosition;
130 bool CanPadInst = false;
132 uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134 bool needAlign(const MCInst &Inst) const;
135 bool canPadBranches(MCObjectStreamer &OS) const;
136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
138 public:
139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140 : MCAsmBackend(llvm::endianness::little), STI(STI),
141 MCII(T.createMCInstrInfo()) {
142 if (X86AlignBranchWithin32BBoundaries) {
143 // At the moment, this defaults to aligning fused branches, unconditional
144 // jumps, and (unfused) conditional jumps with nops. Both the
145 // instructions aligned and the alignment method (nop vs prefix) may
146 // change in the future.
147 AlignBoundary = assumeAligned(32);
148 AlignBranchType.addKind(X86::AlignBranchFused);
149 AlignBranchType.addKind(X86::AlignBranchJcc);
150 AlignBranchType.addKind(X86::AlignBranchJmp);
152 // Allow overriding defaults set by main flag
153 if (X86AlignBranchBoundary.getNumOccurrences())
154 AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155 if (X86AlignBranch.getNumOccurrences())
156 AlignBranchType = X86AlignBranchKindLoc;
157 if (X86PadMaxPrefixSize.getNumOccurrences())
158 TargetPrefixMax = X86PadMaxPrefixSize;
161 bool allowAutoPadding() const override;
162 bool allowEnhancedRelaxation() const override;
163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164 const MCSubtargetInfo &STI) override;
165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
167 unsigned getNumFixupKinds() const override {
168 return X86::NumTargetFixupKinds;
171 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176 const MCValue &Target) override;
178 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
179 const MCValue &Target, MutableArrayRef<char> Data,
180 uint64_t Value, bool IsResolved,
181 const MCSubtargetInfo *STI) const override;
183 bool mayNeedRelaxation(const MCInst &Inst,
184 const MCSubtargetInfo &STI) const override;
186 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
187 const MCRelaxableFragment *DF,
188 const MCAsmLayout &Layout) const override;
190 void relaxInstruction(MCInst &Inst,
191 const MCSubtargetInfo &STI) const override;
193 bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
194 MCCodeEmitter &Emitter,
195 unsigned &RemainingSize) const;
197 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
198 unsigned &RemainingSize) const;
200 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
201 unsigned &RemainingSize) const;
203 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
205 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207 bool writeNopData(raw_ostream &OS, uint64_t Count,
208 const MCSubtargetInfo *STI) const override;
210 } // end anonymous namespace
212 static bool isRelaxableBranch(unsigned Opcode) {
213 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
216 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
217 bool Is16BitMode = false) {
218 switch (Opcode) {
219 default:
220 llvm_unreachable("invalid opcode for branch");
221 case X86::JCC_1:
222 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
223 case X86::JMP_1:
224 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
228 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
229 unsigned Opcode = MI.getOpcode();
230 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
231 : X86::getOpcodeForLongImmediateForm(Opcode);
234 static X86::CondCode getCondFromBranch(const MCInst &MI,
235 const MCInstrInfo &MCII) {
236 unsigned Opcode = MI.getOpcode();
237 switch (Opcode) {
238 default:
239 return X86::COND_INVALID;
240 case X86::JCC_1: {
241 const MCInstrDesc &Desc = MCII.get(Opcode);
242 return static_cast<X86::CondCode>(
243 MI.getOperand(Desc.getNumOperands() - 1).getImm());
248 static X86::SecondMacroFusionInstKind
249 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
250 X86::CondCode CC = getCondFromBranch(MI, MCII);
251 return classifySecondCondCodeInMacroFusion(CC);
254 /// Check if the instruction uses RIP relative addressing.
255 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
256 unsigned Opcode = MI.getOpcode();
257 const MCInstrDesc &Desc = MCII.get(Opcode);
258 uint64_t TSFlags = Desc.TSFlags;
259 unsigned CurOp = X86II::getOperandBias(Desc);
260 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
261 if (MemoryOperand < 0)
262 return false;
263 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
264 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
265 return (BaseReg == X86::RIP);
268 /// Check if the instruction is a prefix.
269 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
270 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
273 /// Check if the instruction is valid as the first instruction in macro fusion.
274 static bool isFirstMacroFusibleInst(const MCInst &Inst,
275 const MCInstrInfo &MCII) {
276 // An Intel instruction with RIP relative addressing is not macro fusible.
277 if (isRIPRelative(Inst, MCII))
278 return false;
279 X86::FirstMacroFusionInstKind FIK =
280 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
281 return FIK != X86::FirstMacroFusionInstKind::Invalid;
284 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
285 /// get a better peformance in some cases. Here, we determine which prefix is
286 /// the most suitable.
288 /// If the instruction has a segment override prefix, use the existing one.
289 /// If the target is 64-bit, use the CS.
290 /// If the target is 32-bit,
291 /// - If the instruction has a ESP/EBP base register, use SS.
292 /// - Otherwise use DS.
293 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
294 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
295 "Prefixes can be added only in 32-bit or 64-bit mode.");
296 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
297 uint64_t TSFlags = Desc.TSFlags;
299 // Determine where the memory operand starts, if present.
300 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
301 if (MemoryOperand != -1)
302 MemoryOperand += X86II::getOperandBias(Desc);
304 unsigned SegmentReg = 0;
305 if (MemoryOperand >= 0) {
306 // Check for explicit segment override on memory operand.
307 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
310 switch (TSFlags & X86II::FormMask) {
311 default:
312 break;
313 case X86II::RawFrmDstSrc: {
314 // Check segment override opcode prefix as needed (not for %ds).
315 if (Inst.getOperand(2).getReg() != X86::DS)
316 SegmentReg = Inst.getOperand(2).getReg();
317 break;
319 case X86II::RawFrmSrc: {
320 // Check segment override opcode prefix as needed (not for %ds).
321 if (Inst.getOperand(1).getReg() != X86::DS)
322 SegmentReg = Inst.getOperand(1).getReg();
323 break;
325 case X86II::RawFrmMemOffs: {
326 // Check segment override opcode prefix as needed.
327 SegmentReg = Inst.getOperand(1).getReg();
328 break;
332 if (SegmentReg != 0)
333 return X86::getSegmentOverridePrefixForReg(SegmentReg);
335 if (STI.hasFeature(X86::Is64Bit))
336 return X86::CS_Encoding;
338 if (MemoryOperand >= 0) {
339 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
340 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
341 if (BaseReg == X86::ESP || BaseReg == X86::EBP)
342 return X86::SS_Encoding;
344 return X86::DS_Encoding;
347 /// Check if the two instructions will be macro-fused on the target cpu.
348 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
349 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
350 if (!InstDesc.isConditionalBranch())
351 return false;
352 if (!isFirstMacroFusibleInst(Cmp, *MCII))
353 return false;
354 const X86::FirstMacroFusionInstKind CmpKind =
355 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
356 const X86::SecondMacroFusionInstKind BranchKind =
357 classifySecondInstInMacroFusion(Jcc, *MCII);
358 return X86::isMacroFused(CmpKind, BranchKind);
361 /// Check if the instruction has a variant symbol operand.
362 static bool hasVariantSymbol(const MCInst &MI) {
363 for (auto &Operand : MI) {
364 if (!Operand.isExpr())
365 continue;
366 const MCExpr &Expr = *Operand.getExpr();
367 if (Expr.getKind() == MCExpr::SymbolRef &&
368 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
369 return true;
371 return false;
374 bool X86AsmBackend::allowAutoPadding() const {
375 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
378 bool X86AsmBackend::allowEnhancedRelaxation() const {
379 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
382 /// X86 has certain instructions which enable interrupts exactly one
383 /// instruction *after* the instruction which stores to SS. Return true if the
384 /// given instruction has such an interrupt delay slot.
385 static bool hasInterruptDelaySlot(const MCInst &Inst) {
386 switch (Inst.getOpcode()) {
387 case X86::POPSS16:
388 case X86::POPSS32:
389 case X86::STI:
390 return true;
392 case X86::MOV16sr:
393 case X86::MOV32sr:
394 case X86::MOV64sr:
395 case X86::MOV16sm:
396 if (Inst.getOperand(0).getReg() == X86::SS)
397 return true;
398 break;
400 return false;
403 /// Check if the instruction to be emitted is right after any data.
404 static bool
405 isRightAfterData(MCFragment *CurrentFragment,
406 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
407 MCFragment *F = CurrentFragment;
408 // Empty data fragments may be created to prevent further data being
409 // added into the previous fragment, we need to skip them since they
410 // have no contents.
411 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
412 if (cast<MCDataFragment>(F)->getContents().size() != 0)
413 break;
415 // Since data is always emitted into a DataFragment, our check strategy is
416 // simple here.
417 // - If the fragment is a DataFragment
418 // - If it's not the fragment where the previous instruction is,
419 // returns true.
420 // - If it's the fragment holding the previous instruction but its
421 // size changed since the previous instruction was emitted into
422 // it, returns true.
423 // - Otherwise returns false.
424 // - If the fragment is not a DataFragment, returns false.
425 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
426 return DF != PrevInstPosition.first ||
427 DF->getContents().size() != PrevInstPosition.second;
429 return false;
432 /// \returns the fragment size if it has instructions, otherwise returns 0.
433 static size_t getSizeForInstFragment(const MCFragment *F) {
434 if (!F || !F->hasInstructions())
435 return 0;
436 // MCEncodedFragmentWithContents being templated makes this tricky.
437 switch (F->getKind()) {
438 default:
439 llvm_unreachable("Unknown fragment with instructions!");
440 case MCFragment::FT_Data:
441 return cast<MCDataFragment>(*F).getContents().size();
442 case MCFragment::FT_Relaxable:
443 return cast<MCRelaxableFragment>(*F).getContents().size();
444 case MCFragment::FT_CompactEncodedInst:
445 return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
449 /// Return true if we can insert NOP or prefixes automatically before the
450 /// the instruction to be emitted.
451 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
452 if (hasVariantSymbol(Inst))
453 // Linker may rewrite the instruction with variant symbol operand(e.g.
454 // TLSCALL).
455 return false;
457 if (hasInterruptDelaySlot(PrevInst))
458 // If this instruction follows an interrupt enabling instruction with a one
459 // instruction delay, inserting a nop would change behavior.
460 return false;
462 if (isPrefix(PrevInst, *MCII))
463 // If this instruction follows a prefix, inserting a nop/prefix would change
464 // semantic.
465 return false;
467 if (isPrefix(Inst, *MCII))
468 // If this instruction is a prefix, inserting a prefix would change
469 // semantic.
470 return false;
472 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
473 // If this instruction follows any data, there is no clear
474 // instruction boundary, inserting a nop/prefix would change semantic.
475 return false;
477 return true;
480 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
481 if (!OS.getAllowAutoPadding())
482 return false;
483 assert(allowAutoPadding() && "incorrect initialization!");
485 // We only pad in text section.
486 if (!OS.getCurrentSectionOnly()->getKind().isText())
487 return false;
489 // To be Done: Currently don't deal with Bundle cases.
490 if (OS.getAssembler().isBundlingEnabled())
491 return false;
493 // Branches only need to be aligned in 32-bit or 64-bit mode.
494 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
495 return false;
497 return true;
500 /// Check if the instruction operand needs to be aligned.
501 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
502 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
503 return (Desc.isConditionalBranch() &&
504 (AlignBranchType & X86::AlignBranchJcc)) ||
505 (Desc.isUnconditionalBranch() &&
506 (AlignBranchType & X86::AlignBranchJmp)) ||
507 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
508 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
509 (Desc.isIndirectBranch() &&
510 (AlignBranchType & X86::AlignBranchIndirect));
513 /// Insert BoundaryAlignFragment before instructions to align branches.
514 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
515 const MCInst &Inst, const MCSubtargetInfo &STI) {
516 CanPadInst = canPadInst(Inst, OS);
518 if (!canPadBranches(OS))
519 return;
521 if (!isMacroFused(PrevInst, Inst))
522 // Macro fusion doesn't happen indeed, clear the pending.
523 PendingBA = nullptr;
525 if (!CanPadInst)
526 return;
528 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
529 // Macro fusion actually happens and there is no other fragment inserted
530 // after the previous instruction.
532 // Do nothing here since we already inserted a BoudaryAlign fragment when
533 // we met the first instruction in the fused pair and we'll tie them
534 // together in emitInstructionEnd.
536 // Note: When there is at least one fragment, such as MCAlignFragment,
537 // inserted after the previous instruction, e.g.
539 // \code
540 // cmp %rax %rcx
541 // .align 16
542 // je .Label0
543 // \ endcode
545 // We will treat the JCC as a unfused branch although it may be fused
546 // with the CMP.
547 return;
550 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
551 isFirstMacroFusibleInst(Inst, *MCII))) {
552 // If we meet a unfused branch or the first instuction in a fusiable pair,
553 // insert a BoundaryAlign fragment.
554 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
558 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
559 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
560 PrevInst = Inst;
561 MCFragment *CF = OS.getCurrentFragment();
562 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
563 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
564 F->setAllowAutoPadding(CanPadInst);
566 if (!canPadBranches(OS))
567 return;
569 if (!needAlign(Inst) || !PendingBA)
570 return;
572 // Tie the aligned instructions into a pending BoundaryAlign.
573 PendingBA->setLastFragment(CF);
574 PendingBA = nullptr;
576 // We need to ensure that further data isn't added to the current
577 // DataFragment, so that we can get the size of instructions later in
578 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
579 // DataFragment.
580 if (isa_and_nonnull<MCDataFragment>(CF))
581 OS.insert(new MCDataFragment());
583 // Update the maximum alignment on the current section if necessary.
584 MCSection *Sec = OS.getCurrentSectionOnly();
585 Sec->ensureMinAlignment(AlignBoundary);
588 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
589 if (STI.getTargetTriple().isOSBinFormatELF()) {
590 unsigned Type;
591 if (STI.getTargetTriple().getArch() == Triple::x86_64) {
592 Type = llvm::StringSwitch<unsigned>(Name)
593 #define ELF_RELOC(X, Y) .Case(#X, Y)
594 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
595 #undef ELF_RELOC
596 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
597 .Case("BFD_RELOC_8", ELF::R_X86_64_8)
598 .Case("BFD_RELOC_16", ELF::R_X86_64_16)
599 .Case("BFD_RELOC_32", ELF::R_X86_64_32)
600 .Case("BFD_RELOC_64", ELF::R_X86_64_64)
601 .Default(-1u);
602 } else {
603 Type = llvm::StringSwitch<unsigned>(Name)
604 #define ELF_RELOC(X, Y) .Case(#X, Y)
605 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
606 #undef ELF_RELOC
607 .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
608 .Case("BFD_RELOC_8", ELF::R_386_8)
609 .Case("BFD_RELOC_16", ELF::R_386_16)
610 .Case("BFD_RELOC_32", ELF::R_386_32)
611 .Default(-1u);
613 if (Type == -1u)
614 return std::nullopt;
615 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
617 return MCAsmBackend::getFixupKind(Name);
620 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
621 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
622 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
623 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
624 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
625 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626 {"reloc_signed_4byte", 0, 32, 0},
627 {"reloc_signed_4byte_relax", 0, 32, 0},
628 {"reloc_global_offset_table", 0, 32, 0},
629 {"reloc_global_offset_table8", 0, 64, 0},
630 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
633 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
634 // do not require any extra processing.
635 if (Kind >= FirstLiteralRelocationKind)
636 return MCAsmBackend::getFixupKindInfo(FK_NONE);
638 if (Kind < FirstTargetFixupKind)
639 return MCAsmBackend::getFixupKindInfo(Kind);
641 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
642 "Invalid kind!");
643 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
644 return Infos[Kind - FirstTargetFixupKind];
647 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
648 const MCFixup &Fixup,
649 const MCValue &) {
650 return Fixup.getKind() >= FirstLiteralRelocationKind;
653 static unsigned getFixupKindSize(unsigned Kind) {
654 switch (Kind) {
655 default:
656 llvm_unreachable("invalid fixup kind!");
657 case FK_NONE:
658 return 0;
659 case FK_PCRel_1:
660 case FK_SecRel_1:
661 case FK_Data_1:
662 return 1;
663 case FK_PCRel_2:
664 case FK_SecRel_2:
665 case FK_Data_2:
666 return 2;
667 case FK_PCRel_4:
668 case X86::reloc_riprel_4byte:
669 case X86::reloc_riprel_4byte_relax:
670 case X86::reloc_riprel_4byte_relax_rex:
671 case X86::reloc_riprel_4byte_movq_load:
672 case X86::reloc_signed_4byte:
673 case X86::reloc_signed_4byte_relax:
674 case X86::reloc_global_offset_table:
675 case X86::reloc_branch_4byte_pcrel:
676 case FK_SecRel_4:
677 case FK_Data_4:
678 return 4;
679 case FK_PCRel_8:
680 case FK_SecRel_8:
681 case FK_Data_8:
682 case X86::reloc_global_offset_table8:
683 return 8;
687 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
688 const MCValue &Target,
689 MutableArrayRef<char> Data,
690 uint64_t Value, bool IsResolved,
691 const MCSubtargetInfo *STI) const {
692 unsigned Kind = Fixup.getKind();
693 if (Kind >= FirstLiteralRelocationKind)
694 return;
695 unsigned Size = getFixupKindSize(Kind);
697 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
699 int64_t SignedValue = static_cast<int64_t>(Value);
700 if ((Target.isAbsolute() || IsResolved) &&
701 getFixupKindInfo(Fixup.getKind()).Flags &
702 MCFixupKindInfo::FKF_IsPCRel) {
703 // check that PC relative fixup fits into the fixup size.
704 if (Size > 0 && !isIntN(Size * 8, SignedValue))
705 Asm.getContext().reportError(
706 Fixup.getLoc(), "value of " + Twine(SignedValue) +
707 " is too large for field of " + Twine(Size) +
708 ((Size == 1) ? " byte." : " bytes."));
709 } else {
710 // Check that uppper bits are either all zeros or all ones.
711 // Specifically ignore overflow/underflow as long as the leakage is
712 // limited to the lower bits. This is to remain compatible with
713 // other assemblers.
714 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
715 "Value does not fit in the Fixup field");
718 for (unsigned i = 0; i != Size; ++i)
719 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
722 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
723 const MCSubtargetInfo &STI) const {
724 unsigned Opcode = MI.getOpcode();
725 return isRelaxableBranch(Opcode) ||
726 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
727 MI.getOperand(MI.getNumOperands() - 1).isExpr());
730 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
731 uint64_t Value,
732 const MCRelaxableFragment *DF,
733 const MCAsmLayout &Layout) const {
734 // Relax if the value is too big for a (signed) i8.
735 return !isInt<8>(Value);
738 // FIXME: Can tblgen help at all here to verify there aren't other instructions
739 // we can relax?
740 void X86AsmBackend::relaxInstruction(MCInst &Inst,
741 const MCSubtargetInfo &STI) const {
742 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
743 bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
744 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
746 if (RelaxedOp == Inst.getOpcode()) {
747 SmallString<256> Tmp;
748 raw_svector_ostream OS(Tmp);
749 Inst.dump_pretty(OS);
750 OS << "\n";
751 report_fatal_error("unexpected instruction to relax: " + OS.str());
754 Inst.setOpcode(RelaxedOp);
757 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
758 MCCodeEmitter &Emitter,
759 unsigned &RemainingSize) const {
760 if (!RF.getAllowAutoPadding())
761 return false;
762 // If the instruction isn't fully relaxed, shifting it around might require a
763 // larger value for one of the fixups then can be encoded. The outer loop
764 // will also catch this before moving to the next instruction, but we need to
765 // prevent padding this single instruction as well.
766 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
767 return false;
769 const unsigned OldSize = RF.getContents().size();
770 if (OldSize == 15)
771 return false;
773 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
774 const unsigned RemainingPrefixSize = [&]() -> unsigned {
775 SmallString<15> Code;
776 Emitter.emitPrefix(RF.getInst(), Code, STI);
777 assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
779 // TODO: It turns out we need a decent amount of plumbing for the target
780 // specific bits to determine number of prefixes its safe to add. Various
781 // targets (older chips mostly, but also Atom family) encounter decoder
782 // stalls with too many prefixes. For testing purposes, we set the value
783 // externally for the moment.
784 unsigned ExistingPrefixSize = Code.size();
785 if (TargetPrefixMax <= ExistingPrefixSize)
786 return 0;
787 return TargetPrefixMax - ExistingPrefixSize;
788 }();
789 const unsigned PrefixBytesToAdd =
790 std::min(MaxPossiblePad, RemainingPrefixSize);
791 if (PrefixBytesToAdd == 0)
792 return false;
794 const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
796 SmallString<256> Code;
797 Code.append(PrefixBytesToAdd, Prefix);
798 Code.append(RF.getContents().begin(), RF.getContents().end());
799 RF.getContents() = Code;
801 // Adjust the fixups for the change in offsets
802 for (auto &F : RF.getFixups()) {
803 F.setOffset(F.getOffset() + PrefixBytesToAdd);
806 RemainingSize -= PrefixBytesToAdd;
807 return true;
810 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
811 MCCodeEmitter &Emitter,
812 unsigned &RemainingSize) const {
813 if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
814 // TODO: There are lots of other tricks we could apply for increasing
815 // encoding size without impacting performance.
816 return false;
818 MCInst Relaxed = RF.getInst();
819 relaxInstruction(Relaxed, *RF.getSubtargetInfo());
821 SmallVector<MCFixup, 4> Fixups;
822 SmallString<15> Code;
823 Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
824 const unsigned OldSize = RF.getContents().size();
825 const unsigned NewSize = Code.size();
826 assert(NewSize >= OldSize && "size decrease during relaxation?");
827 unsigned Delta = NewSize - OldSize;
828 if (Delta > RemainingSize)
829 return false;
830 RF.setInst(Relaxed);
831 RF.getContents() = Code;
832 RF.getFixups() = Fixups;
833 RemainingSize -= Delta;
834 return true;
837 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
838 MCCodeEmitter &Emitter,
839 unsigned &RemainingSize) const {
840 bool Changed = false;
841 if (RemainingSize != 0)
842 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
843 if (RemainingSize != 0)
844 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
845 return Changed;
848 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
849 MCAsmLayout &Layout) const {
850 // See if we can further relax some instructions to cut down on the number of
851 // nop bytes required for code alignment. The actual win is in reducing
852 // instruction count, not number of bytes. Modern X86-64 can easily end up
853 // decode limited. It is often better to reduce the number of instructions
854 // (i.e. eliminate nops) even at the cost of increasing the size and
855 // complexity of others.
856 if (!X86PadForAlign && !X86PadForBranchAlign)
857 return;
859 // The processed regions are delimitered by LabeledFragments. -g may have more
860 // MCSymbols and therefore different relaxation results. X86PadForAlign is
861 // disabled by default to eliminate the -g vs non -g difference.
862 DenseSet<MCFragment *> LabeledFragments;
863 for (const MCSymbol &S : Asm.symbols())
864 LabeledFragments.insert(S.getFragment(false));
866 for (MCSection &Sec : Asm) {
867 if (!Sec.getKind().isText())
868 continue;
870 SmallVector<MCRelaxableFragment *, 4> Relaxable;
871 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
872 MCFragment &F = *I;
874 if (LabeledFragments.count(&F))
875 Relaxable.clear();
877 if (F.getKind() == MCFragment::FT_Data ||
878 F.getKind() == MCFragment::FT_CompactEncodedInst)
879 // Skip and ignore
880 continue;
882 if (F.getKind() == MCFragment::FT_Relaxable) {
883 auto &RF = cast<MCRelaxableFragment>(*I);
884 Relaxable.push_back(&RF);
885 continue;
888 auto canHandle = [](MCFragment &F) -> bool {
889 switch (F.getKind()) {
890 default:
891 return false;
892 case MCFragment::FT_Align:
893 return X86PadForAlign;
894 case MCFragment::FT_BoundaryAlign:
895 return X86PadForBranchAlign;
898 // For any unhandled kind, assume we can't change layout.
899 if (!canHandle(F)) {
900 Relaxable.clear();
901 continue;
904 #ifndef NDEBUG
905 const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
906 #endif
907 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
909 // To keep the effects local, prefer to relax instructions closest to
910 // the align directive. This is purely about human understandability
911 // of the resulting code. If we later find a reason to expand
912 // particular instructions over others, we can adjust.
913 MCFragment *FirstChangedFragment = nullptr;
914 unsigned RemainingSize = OrigSize;
915 while (!Relaxable.empty() && RemainingSize != 0) {
916 auto &RF = *Relaxable.pop_back_val();
917 // Give the backend a chance to play any tricks it wishes to increase
918 // the encoding size of the given instruction. Target independent code
919 // will try further relaxation, but target's may play further tricks.
920 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
921 FirstChangedFragment = &RF;
923 // If we have an instruction which hasn't been fully relaxed, we can't
924 // skip past it and insert bytes before it. Changing its starting
925 // offset might require a larger negative offset than it can encode.
926 // We don't need to worry about larger positive offsets as none of the
927 // possible offsets between this and our align are visible, and the
928 // ones afterwards aren't changing.
929 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
930 break;
932 Relaxable.clear();
934 if (FirstChangedFragment) {
935 // Make sure the offsets for any fragments in the effected range get
936 // updated. Note that this (conservatively) invalidates the offsets of
937 // those following, but this is not required.
938 Layout.invalidateFragmentsFrom(FirstChangedFragment);
941 // BoundaryAlign explicitly tracks it's size (unlike align)
942 if (F.getKind() == MCFragment::FT_BoundaryAlign)
943 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
945 #ifndef NDEBUG
946 const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
947 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
948 assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
949 "can't move start of next fragment!");
950 assert(FinalSize == RemainingSize && "inconsistent size computation?");
951 #endif
953 // If we're looking at a boundary align, make sure we don't try to pad
954 // its target instructions for some following directive. Doing so would
955 // break the alignment of the current boundary align.
956 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
957 const MCFragment *LastFragment = BF->getLastFragment();
958 if (!LastFragment)
959 continue;
960 while (&*I != LastFragment)
961 ++I;
966 // The layout is done. Mark every fragment as valid.
967 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
968 MCSection &Section = *Layout.getSectionOrder()[i];
969 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
970 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
974 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
975 if (STI.hasFeature(X86::Is16Bit))
976 return 4;
977 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
978 return 1;
979 if (STI.hasFeature(X86::TuningFast7ByteNOP))
980 return 7;
981 if (STI.hasFeature(X86::TuningFast15ByteNOP))
982 return 15;
983 if (STI.hasFeature(X86::TuningFast11ByteNOP))
984 return 11;
985 // FIXME: handle 32-bit mode
986 // 15-bytes is the longest single NOP instruction, but 10-bytes is
987 // commonly the longest that can be efficiently decoded.
988 return 10;
991 /// Write a sequence of optimal nops to the output, covering \p Count
992 /// bytes.
993 /// \return - true on success, false on failure
994 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
995 const MCSubtargetInfo *STI) const {
996 static const char Nops32Bit[10][11] = {
997 // nop
998 "\x90",
999 // xchg %ax,%ax
1000 "\x66\x90",
1001 // nopl (%[re]ax)
1002 "\x0f\x1f\x00",
1003 // nopl 0(%[re]ax)
1004 "\x0f\x1f\x40\x00",
1005 // nopl 0(%[re]ax,%[re]ax,1)
1006 "\x0f\x1f\x44\x00\x00",
1007 // nopw 0(%[re]ax,%[re]ax,1)
1008 "\x66\x0f\x1f\x44\x00\x00",
1009 // nopl 0L(%[re]ax)
1010 "\x0f\x1f\x80\x00\x00\x00\x00",
1011 // nopl 0L(%[re]ax,%[re]ax,1)
1012 "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1013 // nopw 0L(%[re]ax,%[re]ax,1)
1014 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1015 // nopw %cs:0L(%[re]ax,%[re]ax,1)
1016 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1019 // 16-bit mode uses different nop patterns than 32-bit.
1020 static const char Nops16Bit[4][11] = {
1021 // nop
1022 "\x90",
1023 // xchg %eax,%eax
1024 "\x66\x90",
1025 // lea 0(%si),%si
1026 "\x8d\x74\x00",
1027 // lea 0w(%si),%si
1028 "\x8d\xb4\x00\x00",
1031 const char(*Nops)[11] =
1032 STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1036 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1037 // length.
1038 do {
1039 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1040 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1041 for (uint8_t i = 0; i < Prefixes; i++)
1042 OS << '\x66';
1043 const uint8_t Rest = ThisNopLength - Prefixes;
1044 if (Rest != 0)
1045 OS.write(Nops[Rest - 1], Rest);
1046 Count -= ThisNopLength;
1047 } while (Count != 0);
1049 return true;
1052 /* *** */
1054 namespace {
1056 class ELFX86AsmBackend : public X86AsmBackend {
1057 public:
1058 uint8_t OSABI;
1059 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1060 : X86AsmBackend(T, STI), OSABI(OSABI) {}
1063 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1064 public:
1065 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1066 const MCSubtargetInfo &STI)
1067 : ELFX86AsmBackend(T, OSABI, STI) {}
1069 std::unique_ptr<MCObjectTargetWriter>
1070 createObjectTargetWriter() const override {
1071 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1075 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1076 public:
1077 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1078 const MCSubtargetInfo &STI)
1079 : ELFX86AsmBackend(T, OSABI, STI) {}
1081 std::unique_ptr<MCObjectTargetWriter>
1082 createObjectTargetWriter() const override {
1083 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1084 ELF::EM_X86_64);
1088 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1089 public:
1090 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1091 const MCSubtargetInfo &STI)
1092 : ELFX86AsmBackend(T, OSABI, STI) {}
1094 std::unique_ptr<MCObjectTargetWriter>
1095 createObjectTargetWriter() const override {
1096 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1097 ELF::EM_IAMCU);
1101 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1102 public:
1103 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1104 const MCSubtargetInfo &STI)
1105 : ELFX86AsmBackend(T, OSABI, STI) {}
1107 std::unique_ptr<MCObjectTargetWriter>
1108 createObjectTargetWriter() const override {
1109 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1113 class WindowsX86AsmBackend : public X86AsmBackend {
1114 bool Is64Bit;
1116 public:
1117 WindowsX86AsmBackend(const Target &T, bool is64Bit,
1118 const MCSubtargetInfo &STI)
1119 : X86AsmBackend(T, STI)
1120 , Is64Bit(is64Bit) {
1123 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1124 return StringSwitch<std::optional<MCFixupKind>>(Name)
1125 .Case("dir32", FK_Data_4)
1126 .Case("secrel32", FK_SecRel_4)
1127 .Case("secidx", FK_SecRel_2)
1128 .Default(MCAsmBackend::getFixupKind(Name));
1131 std::unique_ptr<MCObjectTargetWriter>
1132 createObjectTargetWriter() const override {
1133 return createX86WinCOFFObjectWriter(Is64Bit);
1137 namespace CU {
1139 /// Compact unwind encoding values.
1140 enum CompactUnwindEncodings {
1141 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1142 /// the return address, then [RE]SP is moved to [RE]BP.
1143 UNWIND_MODE_BP_FRAME = 0x01000000,
1145 /// A frameless function with a small constant stack size.
1146 UNWIND_MODE_STACK_IMMD = 0x02000000,
1148 /// A frameless function with a large constant stack size.
1149 UNWIND_MODE_STACK_IND = 0x03000000,
1151 /// No compact unwind encoding is available.
1152 UNWIND_MODE_DWARF = 0x04000000,
1154 /// Mask for encoding the frame registers.
1155 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1157 /// Mask for encoding the frameless registers.
1158 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1161 } // namespace CU
1163 class DarwinX86AsmBackend : public X86AsmBackend {
1164 const MCRegisterInfo &MRI;
1166 /// Number of registers that can be saved in a compact unwind encoding.
1167 enum { CU_NUM_SAVED_REGS = 6 };
1169 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1170 Triple TT;
1171 bool Is64Bit;
1173 unsigned OffsetSize; ///< Offset of a "push" instruction.
1174 unsigned MoveInstrSize; ///< Size of a "move" instruction.
1175 unsigned StackDivide; ///< Amount to adjust stack size by.
1176 protected:
1177 /// Size of a "push" instruction for the given register.
1178 unsigned PushInstrSize(unsigned Reg) const {
1179 switch (Reg) {
1180 case X86::EBX:
1181 case X86::ECX:
1182 case X86::EDX:
1183 case X86::EDI:
1184 case X86::ESI:
1185 case X86::EBP:
1186 case X86::RBX:
1187 case X86::RBP:
1188 return 1;
1189 case X86::R12:
1190 case X86::R13:
1191 case X86::R14:
1192 case X86::R15:
1193 return 2;
1195 return 1;
1198 private:
1199 /// Get the compact unwind number for a given register. The number
1200 /// corresponds to the enum lists in compact_unwind_encoding.h.
1201 int getCompactUnwindRegNum(unsigned Reg) const {
1202 static const MCPhysReg CU32BitRegs[7] = {
1203 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1205 static const MCPhysReg CU64BitRegs[] = {
1206 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1208 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1209 for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1210 if (*CURegs == Reg)
1211 return Idx;
1213 return -1;
1216 /// Return the registers encoded for a compact encoding with a frame
1217 /// pointer.
1218 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1219 // Encode the registers in the order they were saved --- 3-bits per
1220 // register. The list of saved registers is assumed to be in reverse
1221 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1222 uint32_t RegEnc = 0;
1223 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1224 unsigned Reg = SavedRegs[i];
1225 if (Reg == 0) break;
1227 int CURegNum = getCompactUnwindRegNum(Reg);
1228 if (CURegNum == -1) return ~0U;
1230 // Encode the 3-bit register number in order, skipping over 3-bits for
1231 // each register.
1232 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1235 assert((RegEnc & 0x3FFFF) == RegEnc &&
1236 "Invalid compact register encoding!");
1237 return RegEnc;
1240 /// Create the permutation encoding used with frameless stacks. It is
1241 /// passed the number of registers to be saved and an array of the registers
1242 /// saved.
1243 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1244 // The saved registers are numbered from 1 to 6. In order to encode the
1245 // order in which they were saved, we re-number them according to their
1246 // place in the register order. The re-numbering is relative to the last
1247 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1248 // that order:
1250 // Orig Re-Num
1251 // ---- ------
1252 // 6 6
1253 // 2 2
1254 // 4 3
1255 // 5 3
1257 for (unsigned i = 0; i < RegCount; ++i) {
1258 int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1259 if (CUReg == -1) return ~0U;
1260 SavedRegs[i] = CUReg;
1263 // Reverse the list.
1264 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1266 uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1267 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1268 unsigned Countless = 0;
1269 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1270 if (SavedRegs[j] < SavedRegs[i])
1271 ++Countless;
1273 RenumRegs[i] = SavedRegs[i] - Countless - 1;
1276 // Take the renumbered values and encode them into a 10-bit number.
1277 uint32_t permutationEncoding = 0;
1278 switch (RegCount) {
1279 case 6:
1280 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1281 + 6 * RenumRegs[2] + 2 * RenumRegs[3]
1282 + RenumRegs[4];
1283 break;
1284 case 5:
1285 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1286 + 6 * RenumRegs[3] + 2 * RenumRegs[4]
1287 + RenumRegs[5];
1288 break;
1289 case 4:
1290 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1291 + 3 * RenumRegs[4] + RenumRegs[5];
1292 break;
1293 case 3:
1294 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1295 + RenumRegs[5];
1296 break;
1297 case 2:
1298 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1299 break;
1300 case 1:
1301 permutationEncoding |= RenumRegs[5];
1302 break;
1305 assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1306 "Invalid compact register encoding!");
1307 return permutationEncoding;
1310 public:
1311 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1312 const MCSubtargetInfo &STI)
1313 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1314 Is64Bit(TT.isArch64Bit()) {
1315 memset(SavedRegs, 0, sizeof(SavedRegs));
1316 OffsetSize = Is64Bit ? 8 : 4;
1317 MoveInstrSize = Is64Bit ? 3 : 2;
1318 StackDivide = Is64Bit ? 8 : 4;
1321 std::unique_ptr<MCObjectTargetWriter>
1322 createObjectTargetWriter() const override {
1323 uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1324 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1325 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1328 /// Implementation of algorithm to generate the compact unwind encoding
1329 /// for the CFI instructions.
1330 uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1331 const MCContext *Ctxt) const override {
1332 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1333 if (Instrs.empty()) return 0;
1334 if (!isDarwinCanonicalPersonality(FI->Personality) &&
1335 !Ctxt->emitCompactUnwindNonCanonical())
1336 return CU::UNWIND_MODE_DWARF;
1338 // Reset the saved registers.
1339 unsigned SavedRegIdx = 0;
1340 memset(SavedRegs, 0, sizeof(SavedRegs));
1342 bool HasFP = false;
1344 // Encode that we are using EBP/RBP as the frame pointer.
1345 uint32_t CompactUnwindEncoding = 0;
1347 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1348 unsigned InstrOffset = 0;
1349 unsigned StackAdjust = 0;
1350 unsigned StackSize = 0;
1351 int MinAbsOffset = std::numeric_limits<int>::max();
1353 for (const MCCFIInstruction &Inst : Instrs) {
1354 switch (Inst.getOperation()) {
1355 default:
1356 // Any other CFI directives indicate a frame that we aren't prepared
1357 // to represent via compact unwind, so just bail out.
1358 return CU::UNWIND_MODE_DWARF;
1359 case MCCFIInstruction::OpDefCfaRegister: {
1360 // Defines a frame pointer. E.g.
1362 // movq %rsp, %rbp
1363 // L0:
1364 // .cfi_def_cfa_register %rbp
1366 HasFP = true;
1368 // If the frame pointer is other than esp/rsp, we do not have a way to
1369 // generate a compact unwinding representation, so bail out.
1370 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1371 (Is64Bit ? X86::RBP : X86::EBP))
1372 return CU::UNWIND_MODE_DWARF;
1374 // Reset the counts.
1375 memset(SavedRegs, 0, sizeof(SavedRegs));
1376 StackAdjust = 0;
1377 SavedRegIdx = 0;
1378 MinAbsOffset = std::numeric_limits<int>::max();
1379 InstrOffset += MoveInstrSize;
1380 break;
1382 case MCCFIInstruction::OpDefCfaOffset: {
1383 // Defines a new offset for the CFA. E.g.
1385 // With frame:
1387 // pushq %rbp
1388 // L0:
1389 // .cfi_def_cfa_offset 16
1391 // Without frame:
1393 // subq $72, %rsp
1394 // L0:
1395 // .cfi_def_cfa_offset 80
1397 StackSize = Inst.getOffset() / StackDivide;
1398 break;
1400 case MCCFIInstruction::OpOffset: {
1401 // Defines a "push" of a callee-saved register. E.g.
1403 // pushq %r15
1404 // pushq %r14
1405 // pushq %rbx
1406 // L0:
1407 // subq $120, %rsp
1408 // L1:
1409 // .cfi_offset %rbx, -40
1410 // .cfi_offset %r14, -32
1411 // .cfi_offset %r15, -24
1413 if (SavedRegIdx == CU_NUM_SAVED_REGS)
1414 // If there are too many saved registers, we cannot use a compact
1415 // unwind encoding.
1416 return CU::UNWIND_MODE_DWARF;
1418 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1419 SavedRegs[SavedRegIdx++] = Reg;
1420 StackAdjust += OffsetSize;
1421 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1422 InstrOffset += PushInstrSize(Reg);
1423 break;
1428 StackAdjust /= StackDivide;
1430 if (HasFP) {
1431 if ((StackAdjust & 0xFF) != StackAdjust)
1432 // Offset was too big for a compact unwind encoding.
1433 return CU::UNWIND_MODE_DWARF;
1435 // We don't attempt to track a real StackAdjust, so if the saved registers
1436 // aren't adjacent to rbp we can't cope.
1437 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1438 return CU::UNWIND_MODE_DWARF;
1440 // Get the encoding of the saved registers when we have a frame pointer.
1441 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1442 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1444 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1445 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1446 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1447 } else {
1448 SubtractInstrIdx += InstrOffset;
1449 ++StackAdjust;
1451 if ((StackSize & 0xFF) == StackSize) {
1452 // Frameless stack with a small stack size.
1453 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1455 // Encode the stack size.
1456 CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1457 } else {
1458 if ((StackAdjust & 0x7) != StackAdjust)
1459 // The extra stack adjustments are too big for us to handle.
1460 return CU::UNWIND_MODE_DWARF;
1462 // Frameless stack with an offset too large for us to encode compactly.
1463 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1465 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1466 // instruction.
1467 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1469 // Encode any extra stack adjustments (done via push instructions).
1470 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1473 // Encode the number of registers saved. (Reverse the list first.)
1474 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1475 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1477 // Get the encoding of the saved registers when we don't have a frame
1478 // pointer.
1479 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1480 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1482 // Encode the register encoding.
1483 CompactUnwindEncoding |=
1484 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1487 return CompactUnwindEncoding;
1491 } // end anonymous namespace
1493 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1494 const MCSubtargetInfo &STI,
1495 const MCRegisterInfo &MRI,
1496 const MCTargetOptions &Options) {
1497 const Triple &TheTriple = STI.getTargetTriple();
1498 if (TheTriple.isOSBinFormatMachO())
1499 return new DarwinX86AsmBackend(T, MRI, STI);
1501 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1502 return new WindowsX86AsmBackend(T, false, STI);
1504 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1506 if (TheTriple.isOSIAMCU())
1507 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1509 return new ELFX86_32AsmBackend(T, OSABI, STI);
1512 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1513 const MCSubtargetInfo &STI,
1514 const MCRegisterInfo &MRI,
1515 const MCTargetOptions &Options) {
1516 const Triple &TheTriple = STI.getTargetTriple();
1517 if (TheTriple.isOSBinFormatMachO())
1518 return new DarwinX86AsmBackend(T, MRI, STI);
1520 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1521 return new WindowsX86AsmBackend(T, true, STI);
1523 if (TheTriple.isUEFI()) {
1524 assert(TheTriple.isOSBinFormatCOFF() &&
1525 "Only COFF format is supported in UEFI environment.");
1526 return new WindowsX86AsmBackend(T, true, STI);
1529 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1531 if (TheTriple.isX32())
1532 return new ELFX86_X32AsmBackend(T, OSABI, STI);
1533 return new ELFX86_64AsmBackend(T, OSABI, STI);