1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86EncodingOptimization.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind
{
44 uint8_t AlignBranchKind
= 0;
47 void operator=(const std::string
&Val
) {
50 SmallVector
<StringRef
, 6> BranchTypes
;
51 StringRef(Val
).split(BranchTypes
, '+', -1, false);
52 for (auto BranchType
: BranchTypes
) {
53 if (BranchType
== "fused")
54 addKind(X86::AlignBranchFused
);
55 else if (BranchType
== "jcc")
56 addKind(X86::AlignBranchJcc
);
57 else if (BranchType
== "jmp")
58 addKind(X86::AlignBranchJmp
);
59 else if (BranchType
== "call")
60 addKind(X86::AlignBranchCall
);
61 else if (BranchType
== "ret")
62 addKind(X86::AlignBranchRet
);
63 else if (BranchType
== "indirect")
64 addKind(X86::AlignBranchIndirect
);
66 errs() << "invalid argument " << BranchType
.str()
67 << " to -x86-align-branch=; each element must be one of: fused, "
68 "jcc, jmp, call, ret, indirect.(plus separated)\n";
73 operator uint8_t() const { return AlignBranchKind
; }
74 void addKind(X86::AlignBranchBoundaryKind Value
) { AlignBranchKind
|= Value
; }
77 X86AlignBranchKind X86AlignBranchKindLoc
;
79 cl::opt
<unsigned> X86AlignBranchBoundary(
80 "x86-align-branch-boundary", cl::init(0),
82 "Control how the assembler should align branches with NOP. If the "
83 "boundary's size is not 0, it should be a power of 2 and no less "
84 "than 32. Branches will be aligned to prevent from being across or "
85 "against the boundary of specified size. The default value 0 does not "
88 cl::opt
<X86AlignBranchKind
, true, cl::parser
<std::string
>> X86AlignBranch(
91 "Specify types of branches to align (plus separated list of types):"
92 "\njcc indicates conditional jumps"
93 "\nfused indicates fused conditional jumps"
94 "\njmp indicates direct unconditional jumps"
95 "\ncall indicates direct and indirect calls"
96 "\nret indicates rets"
97 "\nindirect indicates indirect unconditional jumps"),
98 cl::location(X86AlignBranchKindLoc
));
100 cl::opt
<bool> X86AlignBranchWithin32BBoundaries(
101 "x86-branches-within-32B-boundaries", cl::init(false),
103 "Align selected instructions to mitigate negative performance impact "
104 "of Intel's micro code update for errata skx102. May break "
105 "assumptions about labels corresponding to particular instructions, "
106 "and should be used with caution."));
108 cl::opt
<unsigned> X86PadMaxPrefixSize(
109 "x86-pad-max-prefix-size", cl::init(0),
110 cl::desc("Maximum number of prefixes to use for padding"));
112 cl::opt
<bool> X86PadForAlign(
113 "x86-pad-for-align", cl::init(false), cl::Hidden
,
114 cl::desc("Pad previous instructions to implement align directives"));
116 cl::opt
<bool> X86PadForBranchAlign(
117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden
,
118 cl::desc("Pad previous instructions to implement branch alignment"));
120 class X86AsmBackend
: public MCAsmBackend
{
121 const MCSubtargetInfo
&STI
;
122 std::unique_ptr
<const MCInstrInfo
> MCII
;
123 X86AlignBranchKind AlignBranchType
;
125 unsigned TargetPrefixMax
= 0;
128 MCBoundaryAlignFragment
*PendingBA
= nullptr;
129 std::pair
<MCFragment
*, size_t> PrevInstPosition
;
130 bool CanPadInst
= false;
132 uint8_t determinePaddingPrefix(const MCInst
&Inst
) const;
133 bool isMacroFused(const MCInst
&Cmp
, const MCInst
&Jcc
) const;
134 bool needAlign(const MCInst
&Inst
) const;
135 bool canPadBranches(MCObjectStreamer
&OS
) const;
136 bool canPadInst(const MCInst
&Inst
, MCObjectStreamer
&OS
) const;
139 X86AsmBackend(const Target
&T
, const MCSubtargetInfo
&STI
)
140 : MCAsmBackend(llvm::endianness::little
), STI(STI
),
141 MCII(T
.createMCInstrInfo()) {
142 if (X86AlignBranchWithin32BBoundaries
) {
143 // At the moment, this defaults to aligning fused branches, unconditional
144 // jumps, and (unfused) conditional jumps with nops. Both the
145 // instructions aligned and the alignment method (nop vs prefix) may
146 // change in the future.
147 AlignBoundary
= assumeAligned(32);
148 AlignBranchType
.addKind(X86::AlignBranchFused
);
149 AlignBranchType
.addKind(X86::AlignBranchJcc
);
150 AlignBranchType
.addKind(X86::AlignBranchJmp
);
152 // Allow overriding defaults set by main flag
153 if (X86AlignBranchBoundary
.getNumOccurrences())
154 AlignBoundary
= assumeAligned(X86AlignBranchBoundary
);
155 if (X86AlignBranch
.getNumOccurrences())
156 AlignBranchType
= X86AlignBranchKindLoc
;
157 if (X86PadMaxPrefixSize
.getNumOccurrences())
158 TargetPrefixMax
= X86PadMaxPrefixSize
;
161 bool allowAutoPadding() const override
;
162 bool allowEnhancedRelaxation() const override
;
163 void emitInstructionBegin(MCObjectStreamer
&OS
, const MCInst
&Inst
,
164 const MCSubtargetInfo
&STI
) override
;
165 void emitInstructionEnd(MCObjectStreamer
&OS
, const MCInst
&Inst
) override
;
167 unsigned getNumFixupKinds() const override
{
168 return X86::NumTargetFixupKinds
;
171 std::optional
<MCFixupKind
> getFixupKind(StringRef Name
) const override
;
173 const MCFixupKindInfo
&getFixupKindInfo(MCFixupKind Kind
) const override
;
175 bool shouldForceRelocation(const MCAssembler
&Asm
, const MCFixup
&Fixup
,
176 const MCValue
&Target
) override
;
178 void applyFixup(const MCAssembler
&Asm
, const MCFixup
&Fixup
,
179 const MCValue
&Target
, MutableArrayRef
<char> Data
,
180 uint64_t Value
, bool IsResolved
,
181 const MCSubtargetInfo
*STI
) const override
;
183 bool mayNeedRelaxation(const MCInst
&Inst
,
184 const MCSubtargetInfo
&STI
) const override
;
186 bool fixupNeedsRelaxation(const MCFixup
&Fixup
, uint64_t Value
,
187 const MCRelaxableFragment
*DF
,
188 const MCAsmLayout
&Layout
) const override
;
190 void relaxInstruction(MCInst
&Inst
,
191 const MCSubtargetInfo
&STI
) const override
;
193 bool padInstructionViaRelaxation(MCRelaxableFragment
&RF
,
194 MCCodeEmitter
&Emitter
,
195 unsigned &RemainingSize
) const;
197 bool padInstructionViaPrefix(MCRelaxableFragment
&RF
, MCCodeEmitter
&Emitter
,
198 unsigned &RemainingSize
) const;
200 bool padInstructionEncoding(MCRelaxableFragment
&RF
, MCCodeEmitter
&Emitter
,
201 unsigned &RemainingSize
) const;
203 void finishLayout(MCAssembler
const &Asm
, MCAsmLayout
&Layout
) const override
;
205 unsigned getMaximumNopSize(const MCSubtargetInfo
&STI
) const override
;
207 bool writeNopData(raw_ostream
&OS
, uint64_t Count
,
208 const MCSubtargetInfo
*STI
) const override
;
210 } // end anonymous namespace
212 static bool isRelaxableBranch(unsigned Opcode
) {
213 return Opcode
== X86::JCC_1
|| Opcode
== X86::JMP_1
;
216 static unsigned getRelaxedOpcodeBranch(unsigned Opcode
,
217 bool Is16BitMode
= false) {
220 llvm_unreachable("invalid opcode for branch");
222 return (Is16BitMode
) ? X86::JCC_2
: X86::JCC_4
;
224 return (Is16BitMode
) ? X86::JMP_2
: X86::JMP_4
;
228 static unsigned getRelaxedOpcode(const MCInst
&MI
, bool Is16BitMode
) {
229 unsigned Opcode
= MI
.getOpcode();
230 return isRelaxableBranch(Opcode
) ? getRelaxedOpcodeBranch(Opcode
, Is16BitMode
)
231 : X86::getOpcodeForLongImmediateForm(Opcode
);
234 static X86::CondCode
getCondFromBranch(const MCInst
&MI
,
235 const MCInstrInfo
&MCII
) {
236 unsigned Opcode
= MI
.getOpcode();
239 return X86::COND_INVALID
;
241 const MCInstrDesc
&Desc
= MCII
.get(Opcode
);
242 return static_cast<X86::CondCode
>(
243 MI
.getOperand(Desc
.getNumOperands() - 1).getImm());
248 static X86::SecondMacroFusionInstKind
249 classifySecondInstInMacroFusion(const MCInst
&MI
, const MCInstrInfo
&MCII
) {
250 X86::CondCode CC
= getCondFromBranch(MI
, MCII
);
251 return classifySecondCondCodeInMacroFusion(CC
);
254 /// Check if the instruction uses RIP relative addressing.
255 static bool isRIPRelative(const MCInst
&MI
, const MCInstrInfo
&MCII
) {
256 unsigned Opcode
= MI
.getOpcode();
257 const MCInstrDesc
&Desc
= MCII
.get(Opcode
);
258 uint64_t TSFlags
= Desc
.TSFlags
;
259 unsigned CurOp
= X86II::getOperandBias(Desc
);
260 int MemoryOperand
= X86II::getMemoryOperandNo(TSFlags
);
261 if (MemoryOperand
< 0)
263 unsigned BaseRegNum
= MemoryOperand
+ CurOp
+ X86::AddrBaseReg
;
264 unsigned BaseReg
= MI
.getOperand(BaseRegNum
).getReg();
265 return (BaseReg
== X86::RIP
);
268 /// Check if the instruction is a prefix.
269 static bool isPrefix(const MCInst
&MI
, const MCInstrInfo
&MCII
) {
270 return X86II::isPrefix(MCII
.get(MI
.getOpcode()).TSFlags
);
273 /// Check if the instruction is valid as the first instruction in macro fusion.
274 static bool isFirstMacroFusibleInst(const MCInst
&Inst
,
275 const MCInstrInfo
&MCII
) {
276 // An Intel instruction with RIP relative addressing is not macro fusible.
277 if (isRIPRelative(Inst
, MCII
))
279 X86::FirstMacroFusionInstKind FIK
=
280 X86::classifyFirstOpcodeInMacroFusion(Inst
.getOpcode());
281 return FIK
!= X86::FirstMacroFusionInstKind::Invalid
;
284 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
285 /// get a better peformance in some cases. Here, we determine which prefix is
286 /// the most suitable.
288 /// If the instruction has a segment override prefix, use the existing one.
289 /// If the target is 64-bit, use the CS.
290 /// If the target is 32-bit,
291 /// - If the instruction has a ESP/EBP base register, use SS.
292 /// - Otherwise use DS.
293 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst
&Inst
) const {
294 assert((STI
.hasFeature(X86::Is32Bit
) || STI
.hasFeature(X86::Is64Bit
)) &&
295 "Prefixes can be added only in 32-bit or 64-bit mode.");
296 const MCInstrDesc
&Desc
= MCII
->get(Inst
.getOpcode());
297 uint64_t TSFlags
= Desc
.TSFlags
;
299 // Determine where the memory operand starts, if present.
300 int MemoryOperand
= X86II::getMemoryOperandNo(TSFlags
);
301 if (MemoryOperand
!= -1)
302 MemoryOperand
+= X86II::getOperandBias(Desc
);
304 unsigned SegmentReg
= 0;
305 if (MemoryOperand
>= 0) {
306 // Check for explicit segment override on memory operand.
307 SegmentReg
= Inst
.getOperand(MemoryOperand
+ X86::AddrSegmentReg
).getReg();
310 switch (TSFlags
& X86II::FormMask
) {
313 case X86II::RawFrmDstSrc
: {
314 // Check segment override opcode prefix as needed (not for %ds).
315 if (Inst
.getOperand(2).getReg() != X86::DS
)
316 SegmentReg
= Inst
.getOperand(2).getReg();
319 case X86II::RawFrmSrc
: {
320 // Check segment override opcode prefix as needed (not for %ds).
321 if (Inst
.getOperand(1).getReg() != X86::DS
)
322 SegmentReg
= Inst
.getOperand(1).getReg();
325 case X86II::RawFrmMemOffs
: {
326 // Check segment override opcode prefix as needed.
327 SegmentReg
= Inst
.getOperand(1).getReg();
333 return X86::getSegmentOverridePrefixForReg(SegmentReg
);
335 if (STI
.hasFeature(X86::Is64Bit
))
336 return X86::CS_Encoding
;
338 if (MemoryOperand
>= 0) {
339 unsigned BaseRegNum
= MemoryOperand
+ X86::AddrBaseReg
;
340 unsigned BaseReg
= Inst
.getOperand(BaseRegNum
).getReg();
341 if (BaseReg
== X86::ESP
|| BaseReg
== X86::EBP
)
342 return X86::SS_Encoding
;
344 return X86::DS_Encoding
;
347 /// Check if the two instructions will be macro-fused on the target cpu.
348 bool X86AsmBackend::isMacroFused(const MCInst
&Cmp
, const MCInst
&Jcc
) const {
349 const MCInstrDesc
&InstDesc
= MCII
->get(Jcc
.getOpcode());
350 if (!InstDesc
.isConditionalBranch())
352 if (!isFirstMacroFusibleInst(Cmp
, *MCII
))
354 const X86::FirstMacroFusionInstKind CmpKind
=
355 X86::classifyFirstOpcodeInMacroFusion(Cmp
.getOpcode());
356 const X86::SecondMacroFusionInstKind BranchKind
=
357 classifySecondInstInMacroFusion(Jcc
, *MCII
);
358 return X86::isMacroFused(CmpKind
, BranchKind
);
361 /// Check if the instruction has a variant symbol operand.
362 static bool hasVariantSymbol(const MCInst
&MI
) {
363 for (auto &Operand
: MI
) {
364 if (!Operand
.isExpr())
366 const MCExpr
&Expr
= *Operand
.getExpr();
367 if (Expr
.getKind() == MCExpr::SymbolRef
&&
368 cast
<MCSymbolRefExpr
>(Expr
).getKind() != MCSymbolRefExpr::VK_None
)
374 bool X86AsmBackend::allowAutoPadding() const {
375 return (AlignBoundary
!= Align(1) && AlignBranchType
!= X86::AlignBranchNone
);
378 bool X86AsmBackend::allowEnhancedRelaxation() const {
379 return allowAutoPadding() && TargetPrefixMax
!= 0 && X86PadForBranchAlign
;
382 /// X86 has certain instructions which enable interrupts exactly one
383 /// instruction *after* the instruction which stores to SS. Return true if the
384 /// given instruction has such an interrupt delay slot.
385 static bool hasInterruptDelaySlot(const MCInst
&Inst
) {
386 switch (Inst
.getOpcode()) {
396 if (Inst
.getOperand(0).getReg() == X86::SS
)
403 /// Check if the instruction to be emitted is right after any data.
405 isRightAfterData(MCFragment
*CurrentFragment
,
406 const std::pair
<MCFragment
*, size_t> &PrevInstPosition
) {
407 MCFragment
*F
= CurrentFragment
;
408 // Empty data fragments may be created to prevent further data being
409 // added into the previous fragment, we need to skip them since they
411 for (; isa_and_nonnull
<MCDataFragment
>(F
); F
= F
->getPrevNode())
412 if (cast
<MCDataFragment
>(F
)->getContents().size() != 0)
415 // Since data is always emitted into a DataFragment, our check strategy is
417 // - If the fragment is a DataFragment
418 // - If it's not the fragment where the previous instruction is,
420 // - If it's the fragment holding the previous instruction but its
421 // size changed since the previous instruction was emitted into
423 // - Otherwise returns false.
424 // - If the fragment is not a DataFragment, returns false.
425 if (auto *DF
= dyn_cast_or_null
<MCDataFragment
>(F
))
426 return DF
!= PrevInstPosition
.first
||
427 DF
->getContents().size() != PrevInstPosition
.second
;
432 /// \returns the fragment size if it has instructions, otherwise returns 0.
433 static size_t getSizeForInstFragment(const MCFragment
*F
) {
434 if (!F
|| !F
->hasInstructions())
436 // MCEncodedFragmentWithContents being templated makes this tricky.
437 switch (F
->getKind()) {
439 llvm_unreachable("Unknown fragment with instructions!");
440 case MCFragment::FT_Data
:
441 return cast
<MCDataFragment
>(*F
).getContents().size();
442 case MCFragment::FT_Relaxable
:
443 return cast
<MCRelaxableFragment
>(*F
).getContents().size();
444 case MCFragment::FT_CompactEncodedInst
:
445 return cast
<MCCompactEncodedInstFragment
>(*F
).getContents().size();
449 /// Return true if we can insert NOP or prefixes automatically before the
450 /// the instruction to be emitted.
451 bool X86AsmBackend::canPadInst(const MCInst
&Inst
, MCObjectStreamer
&OS
) const {
452 if (hasVariantSymbol(Inst
))
453 // Linker may rewrite the instruction with variant symbol operand(e.g.
457 if (hasInterruptDelaySlot(PrevInst
))
458 // If this instruction follows an interrupt enabling instruction with a one
459 // instruction delay, inserting a nop would change behavior.
462 if (isPrefix(PrevInst
, *MCII
))
463 // If this instruction follows a prefix, inserting a nop/prefix would change
467 if (isPrefix(Inst
, *MCII
))
468 // If this instruction is a prefix, inserting a prefix would change
472 if (isRightAfterData(OS
.getCurrentFragment(), PrevInstPosition
))
473 // If this instruction follows any data, there is no clear
474 // instruction boundary, inserting a nop/prefix would change semantic.
480 bool X86AsmBackend::canPadBranches(MCObjectStreamer
&OS
) const {
481 if (!OS
.getAllowAutoPadding())
483 assert(allowAutoPadding() && "incorrect initialization!");
485 // We only pad in text section.
486 if (!OS
.getCurrentSectionOnly()->getKind().isText())
489 // To be Done: Currently don't deal with Bundle cases.
490 if (OS
.getAssembler().isBundlingEnabled())
493 // Branches only need to be aligned in 32-bit or 64-bit mode.
494 if (!(STI
.hasFeature(X86::Is64Bit
) || STI
.hasFeature(X86::Is32Bit
)))
500 /// Check if the instruction operand needs to be aligned.
501 bool X86AsmBackend::needAlign(const MCInst
&Inst
) const {
502 const MCInstrDesc
&Desc
= MCII
->get(Inst
.getOpcode());
503 return (Desc
.isConditionalBranch() &&
504 (AlignBranchType
& X86::AlignBranchJcc
)) ||
505 (Desc
.isUnconditionalBranch() &&
506 (AlignBranchType
& X86::AlignBranchJmp
)) ||
507 (Desc
.isCall() && (AlignBranchType
& X86::AlignBranchCall
)) ||
508 (Desc
.isReturn() && (AlignBranchType
& X86::AlignBranchRet
)) ||
509 (Desc
.isIndirectBranch() &&
510 (AlignBranchType
& X86::AlignBranchIndirect
));
513 /// Insert BoundaryAlignFragment before instructions to align branches.
514 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer
&OS
,
515 const MCInst
&Inst
, const MCSubtargetInfo
&STI
) {
516 CanPadInst
= canPadInst(Inst
, OS
);
518 if (!canPadBranches(OS
))
521 if (!isMacroFused(PrevInst
, Inst
))
522 // Macro fusion doesn't happen indeed, clear the pending.
528 if (PendingBA
&& OS
.getCurrentFragment()->getPrevNode() == PendingBA
) {
529 // Macro fusion actually happens and there is no other fragment inserted
530 // after the previous instruction.
532 // Do nothing here since we already inserted a BoudaryAlign fragment when
533 // we met the first instruction in the fused pair and we'll tie them
534 // together in emitInstructionEnd.
536 // Note: When there is at least one fragment, such as MCAlignFragment,
537 // inserted after the previous instruction, e.g.
545 // We will treat the JCC as a unfused branch although it may be fused
550 if (needAlign(Inst
) || ((AlignBranchType
& X86::AlignBranchFused
) &&
551 isFirstMacroFusibleInst(Inst
, *MCII
))) {
552 // If we meet a unfused branch or the first instuction in a fusiable pair,
553 // insert a BoundaryAlign fragment.
554 OS
.insert(PendingBA
= new MCBoundaryAlignFragment(AlignBoundary
, STI
));
558 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
559 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer
&OS
, const MCInst
&Inst
) {
561 MCFragment
*CF
= OS
.getCurrentFragment();
562 PrevInstPosition
= std::make_pair(CF
, getSizeForInstFragment(CF
));
563 if (auto *F
= dyn_cast_or_null
<MCRelaxableFragment
>(CF
))
564 F
->setAllowAutoPadding(CanPadInst
);
566 if (!canPadBranches(OS
))
569 if (!needAlign(Inst
) || !PendingBA
)
572 // Tie the aligned instructions into a pending BoundaryAlign.
573 PendingBA
->setLastFragment(CF
);
576 // We need to ensure that further data isn't added to the current
577 // DataFragment, so that we can get the size of instructions later in
578 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
580 if (isa_and_nonnull
<MCDataFragment
>(CF
))
581 OS
.insert(new MCDataFragment());
583 // Update the maximum alignment on the current section if necessary.
584 MCSection
*Sec
= OS
.getCurrentSectionOnly();
585 Sec
->ensureMinAlignment(AlignBoundary
);
588 std::optional
<MCFixupKind
> X86AsmBackend::getFixupKind(StringRef Name
) const {
589 if (STI
.getTargetTriple().isOSBinFormatELF()) {
591 if (STI
.getTargetTriple().getArch() == Triple::x86_64
) {
592 Type
= llvm::StringSwitch
<unsigned>(Name
)
593 #define ELF_RELOC(X, Y) .Case(#X, Y)
594 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
596 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE
)
597 .Case("BFD_RELOC_8", ELF::R_X86_64_8
)
598 .Case("BFD_RELOC_16", ELF::R_X86_64_16
)
599 .Case("BFD_RELOC_32", ELF::R_X86_64_32
)
600 .Case("BFD_RELOC_64", ELF::R_X86_64_64
)
603 Type
= llvm::StringSwitch
<unsigned>(Name
)
604 #define ELF_RELOC(X, Y) .Case(#X, Y)
605 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
607 .Case("BFD_RELOC_NONE", ELF::R_386_NONE
)
608 .Case("BFD_RELOC_8", ELF::R_386_8
)
609 .Case("BFD_RELOC_16", ELF::R_386_16
)
610 .Case("BFD_RELOC_32", ELF::R_386_32
)
615 return static_cast<MCFixupKind
>(FirstLiteralRelocationKind
+ Type
);
617 return MCAsmBackend::getFixupKind(Name
);
620 const MCFixupKindInfo
&X86AsmBackend::getFixupKindInfo(MCFixupKind Kind
) const {
621 const static MCFixupKindInfo Infos
[X86::NumTargetFixupKinds
] = {
622 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel
},
623 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel
},
624 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel
},
625 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel
},
626 {"reloc_signed_4byte", 0, 32, 0},
627 {"reloc_signed_4byte_relax", 0, 32, 0},
628 {"reloc_global_offset_table", 0, 32, 0},
629 {"reloc_global_offset_table8", 0, 64, 0},
630 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel
},
633 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
634 // do not require any extra processing.
635 if (Kind
>= FirstLiteralRelocationKind
)
636 return MCAsmBackend::getFixupKindInfo(FK_NONE
);
638 if (Kind
< FirstTargetFixupKind
)
639 return MCAsmBackend::getFixupKindInfo(Kind
);
641 assert(unsigned(Kind
- FirstTargetFixupKind
) < getNumFixupKinds() &&
643 assert(Infos
[Kind
- FirstTargetFixupKind
].Name
&& "Empty fixup name!");
644 return Infos
[Kind
- FirstTargetFixupKind
];
647 bool X86AsmBackend::shouldForceRelocation(const MCAssembler
&,
648 const MCFixup
&Fixup
,
650 return Fixup
.getKind() >= FirstLiteralRelocationKind
;
653 static unsigned getFixupKindSize(unsigned Kind
) {
656 llvm_unreachable("invalid fixup kind!");
668 case X86::reloc_riprel_4byte
:
669 case X86::reloc_riprel_4byte_relax
:
670 case X86::reloc_riprel_4byte_relax_rex
:
671 case X86::reloc_riprel_4byte_movq_load
:
672 case X86::reloc_signed_4byte
:
673 case X86::reloc_signed_4byte_relax
:
674 case X86::reloc_global_offset_table
:
675 case X86::reloc_branch_4byte_pcrel
:
682 case X86::reloc_global_offset_table8
:
687 void X86AsmBackend::applyFixup(const MCAssembler
&Asm
, const MCFixup
&Fixup
,
688 const MCValue
&Target
,
689 MutableArrayRef
<char> Data
,
690 uint64_t Value
, bool IsResolved
,
691 const MCSubtargetInfo
*STI
) const {
692 unsigned Kind
= Fixup
.getKind();
693 if (Kind
>= FirstLiteralRelocationKind
)
695 unsigned Size
= getFixupKindSize(Kind
);
697 assert(Fixup
.getOffset() + Size
<= Data
.size() && "Invalid fixup offset!");
699 int64_t SignedValue
= static_cast<int64_t>(Value
);
700 if ((Target
.isAbsolute() || IsResolved
) &&
701 getFixupKindInfo(Fixup
.getKind()).Flags
&
702 MCFixupKindInfo::FKF_IsPCRel
) {
703 // check that PC relative fixup fits into the fixup size.
704 if (Size
> 0 && !isIntN(Size
* 8, SignedValue
))
705 Asm
.getContext().reportError(
706 Fixup
.getLoc(), "value of " + Twine(SignedValue
) +
707 " is too large for field of " + Twine(Size
) +
708 ((Size
== 1) ? " byte." : " bytes."));
710 // Check that uppper bits are either all zeros or all ones.
711 // Specifically ignore overflow/underflow as long as the leakage is
712 // limited to the lower bits. This is to remain compatible with
714 assert((Size
== 0 || isIntN(Size
* 8 + 1, SignedValue
)) &&
715 "Value does not fit in the Fixup field");
718 for (unsigned i
= 0; i
!= Size
; ++i
)
719 Data
[Fixup
.getOffset() + i
] = uint8_t(Value
>> (i
* 8));
722 bool X86AsmBackend::mayNeedRelaxation(const MCInst
&MI
,
723 const MCSubtargetInfo
&STI
) const {
724 unsigned Opcode
= MI
.getOpcode();
725 return isRelaxableBranch(Opcode
) ||
726 (X86::getOpcodeForLongImmediateForm(Opcode
) != Opcode
&&
727 MI
.getOperand(MI
.getNumOperands() - 1).isExpr());
730 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup
&Fixup
,
732 const MCRelaxableFragment
*DF
,
733 const MCAsmLayout
&Layout
) const {
734 // Relax if the value is too big for a (signed) i8.
735 return !isInt
<8>(Value
);
738 // FIXME: Can tblgen help at all here to verify there aren't other instructions
740 void X86AsmBackend::relaxInstruction(MCInst
&Inst
,
741 const MCSubtargetInfo
&STI
) const {
742 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
743 bool Is16BitMode
= STI
.hasFeature(X86::Is16Bit
);
744 unsigned RelaxedOp
= getRelaxedOpcode(Inst
, Is16BitMode
);
746 if (RelaxedOp
== Inst
.getOpcode()) {
747 SmallString
<256> Tmp
;
748 raw_svector_ostream
OS(Tmp
);
749 Inst
.dump_pretty(OS
);
751 report_fatal_error("unexpected instruction to relax: " + OS
.str());
754 Inst
.setOpcode(RelaxedOp
);
757 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment
&RF
,
758 MCCodeEmitter
&Emitter
,
759 unsigned &RemainingSize
) const {
760 if (!RF
.getAllowAutoPadding())
762 // If the instruction isn't fully relaxed, shifting it around might require a
763 // larger value for one of the fixups then can be encoded. The outer loop
764 // will also catch this before moving to the next instruction, but we need to
765 // prevent padding this single instruction as well.
766 if (mayNeedRelaxation(RF
.getInst(), *RF
.getSubtargetInfo()))
769 const unsigned OldSize
= RF
.getContents().size();
773 const unsigned MaxPossiblePad
= std::min(15 - OldSize
, RemainingSize
);
774 const unsigned RemainingPrefixSize
= [&]() -> unsigned {
775 SmallString
<15> Code
;
776 Emitter
.emitPrefix(RF
.getInst(), Code
, STI
);
777 assert(Code
.size() < 15 && "The number of prefixes must be less than 15.");
779 // TODO: It turns out we need a decent amount of plumbing for the target
780 // specific bits to determine number of prefixes its safe to add. Various
781 // targets (older chips mostly, but also Atom family) encounter decoder
782 // stalls with too many prefixes. For testing purposes, we set the value
783 // externally for the moment.
784 unsigned ExistingPrefixSize
= Code
.size();
785 if (TargetPrefixMax
<= ExistingPrefixSize
)
787 return TargetPrefixMax
- ExistingPrefixSize
;
789 const unsigned PrefixBytesToAdd
=
790 std::min(MaxPossiblePad
, RemainingPrefixSize
);
791 if (PrefixBytesToAdd
== 0)
794 const uint8_t Prefix
= determinePaddingPrefix(RF
.getInst());
796 SmallString
<256> Code
;
797 Code
.append(PrefixBytesToAdd
, Prefix
);
798 Code
.append(RF
.getContents().begin(), RF
.getContents().end());
799 RF
.getContents() = Code
;
801 // Adjust the fixups for the change in offsets
802 for (auto &F
: RF
.getFixups()) {
803 F
.setOffset(F
.getOffset() + PrefixBytesToAdd
);
806 RemainingSize
-= PrefixBytesToAdd
;
810 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment
&RF
,
811 MCCodeEmitter
&Emitter
,
812 unsigned &RemainingSize
) const {
813 if (!mayNeedRelaxation(RF
.getInst(), *RF
.getSubtargetInfo()))
814 // TODO: There are lots of other tricks we could apply for increasing
815 // encoding size without impacting performance.
818 MCInst Relaxed
= RF
.getInst();
819 relaxInstruction(Relaxed
, *RF
.getSubtargetInfo());
821 SmallVector
<MCFixup
, 4> Fixups
;
822 SmallString
<15> Code
;
823 Emitter
.encodeInstruction(Relaxed
, Code
, Fixups
, *RF
.getSubtargetInfo());
824 const unsigned OldSize
= RF
.getContents().size();
825 const unsigned NewSize
= Code
.size();
826 assert(NewSize
>= OldSize
&& "size decrease during relaxation?");
827 unsigned Delta
= NewSize
- OldSize
;
828 if (Delta
> RemainingSize
)
831 RF
.getContents() = Code
;
832 RF
.getFixups() = Fixups
;
833 RemainingSize
-= Delta
;
837 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment
&RF
,
838 MCCodeEmitter
&Emitter
,
839 unsigned &RemainingSize
) const {
840 bool Changed
= false;
841 if (RemainingSize
!= 0)
842 Changed
|= padInstructionViaRelaxation(RF
, Emitter
, RemainingSize
);
843 if (RemainingSize
!= 0)
844 Changed
|= padInstructionViaPrefix(RF
, Emitter
, RemainingSize
);
848 void X86AsmBackend::finishLayout(MCAssembler
const &Asm
,
849 MCAsmLayout
&Layout
) const {
850 // See if we can further relax some instructions to cut down on the number of
851 // nop bytes required for code alignment. The actual win is in reducing
852 // instruction count, not number of bytes. Modern X86-64 can easily end up
853 // decode limited. It is often better to reduce the number of instructions
854 // (i.e. eliminate nops) even at the cost of increasing the size and
855 // complexity of others.
856 if (!X86PadForAlign
&& !X86PadForBranchAlign
)
859 // The processed regions are delimitered by LabeledFragments. -g may have more
860 // MCSymbols and therefore different relaxation results. X86PadForAlign is
861 // disabled by default to eliminate the -g vs non -g difference.
862 DenseSet
<MCFragment
*> LabeledFragments
;
863 for (const MCSymbol
&S
: Asm
.symbols())
864 LabeledFragments
.insert(S
.getFragment(false));
866 for (MCSection
&Sec
: Asm
) {
867 if (!Sec
.getKind().isText())
870 SmallVector
<MCRelaxableFragment
*, 4> Relaxable
;
871 for (MCSection::iterator I
= Sec
.begin(), IE
= Sec
.end(); I
!= IE
; ++I
) {
874 if (LabeledFragments
.count(&F
))
877 if (F
.getKind() == MCFragment::FT_Data
||
878 F
.getKind() == MCFragment::FT_CompactEncodedInst
)
882 if (F
.getKind() == MCFragment::FT_Relaxable
) {
883 auto &RF
= cast
<MCRelaxableFragment
>(*I
);
884 Relaxable
.push_back(&RF
);
888 auto canHandle
= [](MCFragment
&F
) -> bool {
889 switch (F
.getKind()) {
892 case MCFragment::FT_Align
:
893 return X86PadForAlign
;
894 case MCFragment::FT_BoundaryAlign
:
895 return X86PadForBranchAlign
;
898 // For any unhandled kind, assume we can't change layout.
905 const uint64_t OrigOffset
= Layout
.getFragmentOffset(&F
);
907 const uint64_t OrigSize
= Asm
.computeFragmentSize(Layout
, F
);
909 // To keep the effects local, prefer to relax instructions closest to
910 // the align directive. This is purely about human understandability
911 // of the resulting code. If we later find a reason to expand
912 // particular instructions over others, we can adjust.
913 MCFragment
*FirstChangedFragment
= nullptr;
914 unsigned RemainingSize
= OrigSize
;
915 while (!Relaxable
.empty() && RemainingSize
!= 0) {
916 auto &RF
= *Relaxable
.pop_back_val();
917 // Give the backend a chance to play any tricks it wishes to increase
918 // the encoding size of the given instruction. Target independent code
919 // will try further relaxation, but target's may play further tricks.
920 if (padInstructionEncoding(RF
, Asm
.getEmitter(), RemainingSize
))
921 FirstChangedFragment
= &RF
;
923 // If we have an instruction which hasn't been fully relaxed, we can't
924 // skip past it and insert bytes before it. Changing its starting
925 // offset might require a larger negative offset than it can encode.
926 // We don't need to worry about larger positive offsets as none of the
927 // possible offsets between this and our align are visible, and the
928 // ones afterwards aren't changing.
929 if (mayNeedRelaxation(RF
.getInst(), *RF
.getSubtargetInfo()))
934 if (FirstChangedFragment
) {
935 // Make sure the offsets for any fragments in the effected range get
936 // updated. Note that this (conservatively) invalidates the offsets of
937 // those following, but this is not required.
938 Layout
.invalidateFragmentsFrom(FirstChangedFragment
);
941 // BoundaryAlign explicitly tracks it's size (unlike align)
942 if (F
.getKind() == MCFragment::FT_BoundaryAlign
)
943 cast
<MCBoundaryAlignFragment
>(F
).setSize(RemainingSize
);
946 const uint64_t FinalOffset
= Layout
.getFragmentOffset(&F
);
947 const uint64_t FinalSize
= Asm
.computeFragmentSize(Layout
, F
);
948 assert(OrigOffset
+ OrigSize
== FinalOffset
+ FinalSize
&&
949 "can't move start of next fragment!");
950 assert(FinalSize
== RemainingSize
&& "inconsistent size computation?");
953 // If we're looking at a boundary align, make sure we don't try to pad
954 // its target instructions for some following directive. Doing so would
955 // break the alignment of the current boundary align.
956 if (auto *BF
= dyn_cast
<MCBoundaryAlignFragment
>(&F
)) {
957 const MCFragment
*LastFragment
= BF
->getLastFragment();
960 while (&*I
!= LastFragment
)
966 // The layout is done. Mark every fragment as valid.
967 for (unsigned int i
= 0, n
= Layout
.getSectionOrder().size(); i
!= n
; ++i
) {
968 MCSection
&Section
= *Layout
.getSectionOrder()[i
];
969 Layout
.getFragmentOffset(&*Section
.getFragmentList().rbegin());
970 Asm
.computeFragmentSize(Layout
, *Section
.getFragmentList().rbegin());
974 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo
&STI
) const {
975 if (STI
.hasFeature(X86::Is16Bit
))
977 if (!STI
.hasFeature(X86::FeatureNOPL
) && !STI
.hasFeature(X86::Is64Bit
))
979 if (STI
.hasFeature(X86::TuningFast7ByteNOP
))
981 if (STI
.hasFeature(X86::TuningFast15ByteNOP
))
983 if (STI
.hasFeature(X86::TuningFast11ByteNOP
))
985 // FIXME: handle 32-bit mode
986 // 15-bytes is the longest single NOP instruction, but 10-bytes is
987 // commonly the longest that can be efficiently decoded.
991 /// Write a sequence of optimal nops to the output, covering \p Count
993 /// \return - true on success, false on failure
994 bool X86AsmBackend::writeNopData(raw_ostream
&OS
, uint64_t Count
,
995 const MCSubtargetInfo
*STI
) const {
996 static const char Nops32Bit
[10][11] = {
1005 // nopl 0(%[re]ax,%[re]ax,1)
1006 "\x0f\x1f\x44\x00\x00",
1007 // nopw 0(%[re]ax,%[re]ax,1)
1008 "\x66\x0f\x1f\x44\x00\x00",
1010 "\x0f\x1f\x80\x00\x00\x00\x00",
1011 // nopl 0L(%[re]ax,%[re]ax,1)
1012 "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1013 // nopw 0L(%[re]ax,%[re]ax,1)
1014 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1015 // nopw %cs:0L(%[re]ax,%[re]ax,1)
1016 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1019 // 16-bit mode uses different nop patterns than 32-bit.
1020 static const char Nops16Bit
[4][11] = {
1031 const char(*Nops
)[11] =
1032 STI
->hasFeature(X86::Is16Bit
) ? Nops16Bit
: Nops32Bit
;
1034 uint64_t MaxNopLength
= (uint64_t)getMaximumNopSize(*STI
);
1036 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1039 const uint8_t ThisNopLength
= (uint8_t) std::min(Count
, MaxNopLength
);
1040 const uint8_t Prefixes
= ThisNopLength
<= 10 ? 0 : ThisNopLength
- 10;
1041 for (uint8_t i
= 0; i
< Prefixes
; i
++)
1043 const uint8_t Rest
= ThisNopLength
- Prefixes
;
1045 OS
.write(Nops
[Rest
- 1], Rest
);
1046 Count
-= ThisNopLength
;
1047 } while (Count
!= 0);
1056 class ELFX86AsmBackend
: public X86AsmBackend
{
1059 ELFX86AsmBackend(const Target
&T
, uint8_t OSABI
, const MCSubtargetInfo
&STI
)
1060 : X86AsmBackend(T
, STI
), OSABI(OSABI
) {}
1063 class ELFX86_32AsmBackend
: public ELFX86AsmBackend
{
1065 ELFX86_32AsmBackend(const Target
&T
, uint8_t OSABI
,
1066 const MCSubtargetInfo
&STI
)
1067 : ELFX86AsmBackend(T
, OSABI
, STI
) {}
1069 std::unique_ptr
<MCObjectTargetWriter
>
1070 createObjectTargetWriter() const override
{
1071 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI
, ELF::EM_386
);
1075 class ELFX86_X32AsmBackend
: public ELFX86AsmBackend
{
1077 ELFX86_X32AsmBackend(const Target
&T
, uint8_t OSABI
,
1078 const MCSubtargetInfo
&STI
)
1079 : ELFX86AsmBackend(T
, OSABI
, STI
) {}
1081 std::unique_ptr
<MCObjectTargetWriter
>
1082 createObjectTargetWriter() const override
{
1083 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI
,
1088 class ELFX86_IAMCUAsmBackend
: public ELFX86AsmBackend
{
1090 ELFX86_IAMCUAsmBackend(const Target
&T
, uint8_t OSABI
,
1091 const MCSubtargetInfo
&STI
)
1092 : ELFX86AsmBackend(T
, OSABI
, STI
) {}
1094 std::unique_ptr
<MCObjectTargetWriter
>
1095 createObjectTargetWriter() const override
{
1096 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI
,
1101 class ELFX86_64AsmBackend
: public ELFX86AsmBackend
{
1103 ELFX86_64AsmBackend(const Target
&T
, uint8_t OSABI
,
1104 const MCSubtargetInfo
&STI
)
1105 : ELFX86AsmBackend(T
, OSABI
, STI
) {}
1107 std::unique_ptr
<MCObjectTargetWriter
>
1108 createObjectTargetWriter() const override
{
1109 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI
, ELF::EM_X86_64
);
1113 class WindowsX86AsmBackend
: public X86AsmBackend
{
1117 WindowsX86AsmBackend(const Target
&T
, bool is64Bit
,
1118 const MCSubtargetInfo
&STI
)
1119 : X86AsmBackend(T
, STI
)
1120 , Is64Bit(is64Bit
) {
1123 std::optional
<MCFixupKind
> getFixupKind(StringRef Name
) const override
{
1124 return StringSwitch
<std::optional
<MCFixupKind
>>(Name
)
1125 .Case("dir32", FK_Data_4
)
1126 .Case("secrel32", FK_SecRel_4
)
1127 .Case("secidx", FK_SecRel_2
)
1128 .Default(MCAsmBackend::getFixupKind(Name
));
1131 std::unique_ptr
<MCObjectTargetWriter
>
1132 createObjectTargetWriter() const override
{
1133 return createX86WinCOFFObjectWriter(Is64Bit
);
1139 /// Compact unwind encoding values.
1140 enum CompactUnwindEncodings
{
1141 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1142 /// the return address, then [RE]SP is moved to [RE]BP.
1143 UNWIND_MODE_BP_FRAME
= 0x01000000,
1145 /// A frameless function with a small constant stack size.
1146 UNWIND_MODE_STACK_IMMD
= 0x02000000,
1148 /// A frameless function with a large constant stack size.
1149 UNWIND_MODE_STACK_IND
= 0x03000000,
1151 /// No compact unwind encoding is available.
1152 UNWIND_MODE_DWARF
= 0x04000000,
1154 /// Mask for encoding the frame registers.
1155 UNWIND_BP_FRAME_REGISTERS
= 0x00007FFF,
1157 /// Mask for encoding the frameless registers.
1158 UNWIND_FRAMELESS_STACK_REG_PERMUTATION
= 0x000003FF
1163 class DarwinX86AsmBackend
: public X86AsmBackend
{
1164 const MCRegisterInfo
&MRI
;
1166 /// Number of registers that can be saved in a compact unwind encoding.
1167 enum { CU_NUM_SAVED_REGS
= 6 };
1169 mutable unsigned SavedRegs
[CU_NUM_SAVED_REGS
];
1173 unsigned OffsetSize
; ///< Offset of a "push" instruction.
1174 unsigned MoveInstrSize
; ///< Size of a "move" instruction.
1175 unsigned StackDivide
; ///< Amount to adjust stack size by.
1177 /// Size of a "push" instruction for the given register.
1178 unsigned PushInstrSize(unsigned Reg
) const {
1199 /// Get the compact unwind number for a given register. The number
1200 /// corresponds to the enum lists in compact_unwind_encoding.h.
1201 int getCompactUnwindRegNum(unsigned Reg
) const {
1202 static const MCPhysReg CU32BitRegs
[7] = {
1203 X86::EBX
, X86::ECX
, X86::EDX
, X86::EDI
, X86::ESI
, X86::EBP
, 0
1205 static const MCPhysReg CU64BitRegs
[] = {
1206 X86::RBX
, X86::R12
, X86::R13
, X86::R14
, X86::R15
, X86::RBP
, 0
1208 const MCPhysReg
*CURegs
= Is64Bit
? CU64BitRegs
: CU32BitRegs
;
1209 for (int Idx
= 1; *CURegs
; ++CURegs
, ++Idx
)
1216 /// Return the registers encoded for a compact encoding with a frame
1218 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1219 // Encode the registers in the order they were saved --- 3-bits per
1220 // register. The list of saved registers is assumed to be in reverse
1221 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1222 uint32_t RegEnc
= 0;
1223 for (int i
= 0, Idx
= 0; i
!= CU_NUM_SAVED_REGS
; ++i
) {
1224 unsigned Reg
= SavedRegs
[i
];
1225 if (Reg
== 0) break;
1227 int CURegNum
= getCompactUnwindRegNum(Reg
);
1228 if (CURegNum
== -1) return ~0U;
1230 // Encode the 3-bit register number in order, skipping over 3-bits for
1232 RegEnc
|= (CURegNum
& 0x7) << (Idx
++ * 3);
1235 assert((RegEnc
& 0x3FFFF) == RegEnc
&&
1236 "Invalid compact register encoding!");
1240 /// Create the permutation encoding used with frameless stacks. It is
1241 /// passed the number of registers to be saved and an array of the registers
1243 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount
) const {
1244 // The saved registers are numbered from 1 to 6. In order to encode the
1245 // order in which they were saved, we re-number them according to their
1246 // place in the register order. The re-numbering is relative to the last
1247 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1257 for (unsigned i
= 0; i
< RegCount
; ++i
) {
1258 int CUReg
= getCompactUnwindRegNum(SavedRegs
[i
]);
1259 if (CUReg
== -1) return ~0U;
1260 SavedRegs
[i
] = CUReg
;
1263 // Reverse the list.
1264 std::reverse(&SavedRegs
[0], &SavedRegs
[CU_NUM_SAVED_REGS
]);
1266 uint32_t RenumRegs
[CU_NUM_SAVED_REGS
];
1267 for (unsigned i
= CU_NUM_SAVED_REGS
- RegCount
; i
< CU_NUM_SAVED_REGS
; ++i
){
1268 unsigned Countless
= 0;
1269 for (unsigned j
= CU_NUM_SAVED_REGS
- RegCount
; j
< i
; ++j
)
1270 if (SavedRegs
[j
] < SavedRegs
[i
])
1273 RenumRegs
[i
] = SavedRegs
[i
] - Countless
- 1;
1276 // Take the renumbered values and encode them into a 10-bit number.
1277 uint32_t permutationEncoding
= 0;
1280 permutationEncoding
|= 120 * RenumRegs
[0] + 24 * RenumRegs
[1]
1281 + 6 * RenumRegs
[2] + 2 * RenumRegs
[3]
1285 permutationEncoding
|= 120 * RenumRegs
[1] + 24 * RenumRegs
[2]
1286 + 6 * RenumRegs
[3] + 2 * RenumRegs
[4]
1290 permutationEncoding
|= 60 * RenumRegs
[2] + 12 * RenumRegs
[3]
1291 + 3 * RenumRegs
[4] + RenumRegs
[5];
1294 permutationEncoding
|= 20 * RenumRegs
[3] + 4 * RenumRegs
[4]
1298 permutationEncoding
|= 5 * RenumRegs
[4] + RenumRegs
[5];
1301 permutationEncoding
|= RenumRegs
[5];
1305 assert((permutationEncoding
& 0x3FF) == permutationEncoding
&&
1306 "Invalid compact register encoding!");
1307 return permutationEncoding
;
1311 DarwinX86AsmBackend(const Target
&T
, const MCRegisterInfo
&MRI
,
1312 const MCSubtargetInfo
&STI
)
1313 : X86AsmBackend(T
, STI
), MRI(MRI
), TT(STI
.getTargetTriple()),
1314 Is64Bit(TT
.isArch64Bit()) {
1315 memset(SavedRegs
, 0, sizeof(SavedRegs
));
1316 OffsetSize
= Is64Bit
? 8 : 4;
1317 MoveInstrSize
= Is64Bit
? 3 : 2;
1318 StackDivide
= Is64Bit
? 8 : 4;
1321 std::unique_ptr
<MCObjectTargetWriter
>
1322 createObjectTargetWriter() const override
{
1323 uint32_t CPUType
= cantFail(MachO::getCPUType(TT
));
1324 uint32_t CPUSubType
= cantFail(MachO::getCPUSubType(TT
));
1325 return createX86MachObjectWriter(Is64Bit
, CPUType
, CPUSubType
);
1328 /// Implementation of algorithm to generate the compact unwind encoding
1329 /// for the CFI instructions.
1330 uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo
*FI
,
1331 const MCContext
*Ctxt
) const override
{
1332 ArrayRef
<MCCFIInstruction
> Instrs
= FI
->Instructions
;
1333 if (Instrs
.empty()) return 0;
1334 if (!isDarwinCanonicalPersonality(FI
->Personality
) &&
1335 !Ctxt
->emitCompactUnwindNonCanonical())
1336 return CU::UNWIND_MODE_DWARF
;
1338 // Reset the saved registers.
1339 unsigned SavedRegIdx
= 0;
1340 memset(SavedRegs
, 0, sizeof(SavedRegs
));
1344 // Encode that we are using EBP/RBP as the frame pointer.
1345 uint32_t CompactUnwindEncoding
= 0;
1347 unsigned SubtractInstrIdx
= Is64Bit
? 3 : 2;
1348 unsigned InstrOffset
= 0;
1349 unsigned StackAdjust
= 0;
1350 unsigned StackSize
= 0;
1351 int MinAbsOffset
= std::numeric_limits
<int>::max();
1353 for (const MCCFIInstruction
&Inst
: Instrs
) {
1354 switch (Inst
.getOperation()) {
1356 // Any other CFI directives indicate a frame that we aren't prepared
1357 // to represent via compact unwind, so just bail out.
1358 return CU::UNWIND_MODE_DWARF
;
1359 case MCCFIInstruction::OpDefCfaRegister
: {
1360 // Defines a frame pointer. E.g.
1364 // .cfi_def_cfa_register %rbp
1368 // If the frame pointer is other than esp/rsp, we do not have a way to
1369 // generate a compact unwinding representation, so bail out.
1370 if (*MRI
.getLLVMRegNum(Inst
.getRegister(), true) !=
1371 (Is64Bit
? X86::RBP
: X86::EBP
))
1372 return CU::UNWIND_MODE_DWARF
;
1374 // Reset the counts.
1375 memset(SavedRegs
, 0, sizeof(SavedRegs
));
1378 MinAbsOffset
= std::numeric_limits
<int>::max();
1379 InstrOffset
+= MoveInstrSize
;
1382 case MCCFIInstruction::OpDefCfaOffset
: {
1383 // Defines a new offset for the CFA. E.g.
1389 // .cfi_def_cfa_offset 16
1395 // .cfi_def_cfa_offset 80
1397 StackSize
= Inst
.getOffset() / StackDivide
;
1400 case MCCFIInstruction::OpOffset
: {
1401 // Defines a "push" of a callee-saved register. E.g.
1409 // .cfi_offset %rbx, -40
1410 // .cfi_offset %r14, -32
1411 // .cfi_offset %r15, -24
1413 if (SavedRegIdx
== CU_NUM_SAVED_REGS
)
1414 // If there are too many saved registers, we cannot use a compact
1416 return CU::UNWIND_MODE_DWARF
;
1418 unsigned Reg
= *MRI
.getLLVMRegNum(Inst
.getRegister(), true);
1419 SavedRegs
[SavedRegIdx
++] = Reg
;
1420 StackAdjust
+= OffsetSize
;
1421 MinAbsOffset
= std::min(MinAbsOffset
, abs(Inst
.getOffset()));
1422 InstrOffset
+= PushInstrSize(Reg
);
1428 StackAdjust
/= StackDivide
;
1431 if ((StackAdjust
& 0xFF) != StackAdjust
)
1432 // Offset was too big for a compact unwind encoding.
1433 return CU::UNWIND_MODE_DWARF
;
1435 // We don't attempt to track a real StackAdjust, so if the saved registers
1436 // aren't adjacent to rbp we can't cope.
1437 if (SavedRegIdx
!= 0 && MinAbsOffset
!= 3 * (int)OffsetSize
)
1438 return CU::UNWIND_MODE_DWARF
;
1440 // Get the encoding of the saved registers when we have a frame pointer.
1441 uint32_t RegEnc
= encodeCompactUnwindRegistersWithFrame();
1442 if (RegEnc
== ~0U) return CU::UNWIND_MODE_DWARF
;
1444 CompactUnwindEncoding
|= CU::UNWIND_MODE_BP_FRAME
;
1445 CompactUnwindEncoding
|= (StackAdjust
& 0xFF) << 16;
1446 CompactUnwindEncoding
|= RegEnc
& CU::UNWIND_BP_FRAME_REGISTERS
;
1448 SubtractInstrIdx
+= InstrOffset
;
1451 if ((StackSize
& 0xFF) == StackSize
) {
1452 // Frameless stack with a small stack size.
1453 CompactUnwindEncoding
|= CU::UNWIND_MODE_STACK_IMMD
;
1455 // Encode the stack size.
1456 CompactUnwindEncoding
|= (StackSize
& 0xFF) << 16;
1458 if ((StackAdjust
& 0x7) != StackAdjust
)
1459 // The extra stack adjustments are too big for us to handle.
1460 return CU::UNWIND_MODE_DWARF
;
1462 // Frameless stack with an offset too large for us to encode compactly.
1463 CompactUnwindEncoding
|= CU::UNWIND_MODE_STACK_IND
;
1465 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467 CompactUnwindEncoding
|= (SubtractInstrIdx
& 0xFF) << 16;
1469 // Encode any extra stack adjustments (done via push instructions).
1470 CompactUnwindEncoding
|= (StackAdjust
& 0x7) << 13;
1473 // Encode the number of registers saved. (Reverse the list first.)
1474 std::reverse(&SavedRegs
[0], &SavedRegs
[SavedRegIdx
]);
1475 CompactUnwindEncoding
|= (SavedRegIdx
& 0x7) << 10;
1477 // Get the encoding of the saved registers when we don't have a frame
1479 uint32_t RegEnc
= encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx
);
1480 if (RegEnc
== ~0U) return CU::UNWIND_MODE_DWARF
;
1482 // Encode the register encoding.
1483 CompactUnwindEncoding
|=
1484 RegEnc
& CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION
;
1487 return CompactUnwindEncoding
;
1491 } // end anonymous namespace
1493 MCAsmBackend
*llvm::createX86_32AsmBackend(const Target
&T
,
1494 const MCSubtargetInfo
&STI
,
1495 const MCRegisterInfo
&MRI
,
1496 const MCTargetOptions
&Options
) {
1497 const Triple
&TheTriple
= STI
.getTargetTriple();
1498 if (TheTriple
.isOSBinFormatMachO())
1499 return new DarwinX86AsmBackend(T
, MRI
, STI
);
1501 if (TheTriple
.isOSWindows() && TheTriple
.isOSBinFormatCOFF())
1502 return new WindowsX86AsmBackend(T
, false, STI
);
1504 uint8_t OSABI
= MCELFObjectTargetWriter::getOSABI(TheTriple
.getOS());
1506 if (TheTriple
.isOSIAMCU())
1507 return new ELFX86_IAMCUAsmBackend(T
, OSABI
, STI
);
1509 return new ELFX86_32AsmBackend(T
, OSABI
, STI
);
1512 MCAsmBackend
*llvm::createX86_64AsmBackend(const Target
&T
,
1513 const MCSubtargetInfo
&STI
,
1514 const MCRegisterInfo
&MRI
,
1515 const MCTargetOptions
&Options
) {
1516 const Triple
&TheTriple
= STI
.getTargetTriple();
1517 if (TheTriple
.isOSBinFormatMachO())
1518 return new DarwinX86AsmBackend(T
, MRI
, STI
);
1520 if (TheTriple
.isOSWindows() && TheTriple
.isOSBinFormatCOFF())
1521 return new WindowsX86AsmBackend(T
, true, STI
);
1523 if (TheTriple
.isUEFI()) {
1524 assert(TheTriple
.isOSBinFormatCOFF() &&
1525 "Only COFF format is supported in UEFI environment.");
1526 return new WindowsX86AsmBackend(T
, true, STI
);
1529 uint8_t OSABI
= MCELFObjectTargetWriter::getOSABI(TheTriple
.getOS());
1531 if (TheTriple
.isX32())
1532 return new ELFX86_X32AsmBackend(T
, OSABI
, STI
);
1533 return new ELFX86_64AsmBackend(T
, OSABI
, STI
);