1 //===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86FixupKinds.h"
10 #include "MCTargetDesc/X86MCTargetDesc.h"
11 #include "llvm/ADT/Twine.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCMachObjectWriter.h"
18 #include "llvm/MC/MCSectionMachO.h"
19 #include "llvm/MC/MCValue.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/Format.h"
26 class X86MachObjectWriter
: public MCMachObjectTargetWriter
{
27 bool recordScatteredRelocation(MachObjectWriter
*Writer
,
28 const MCAssembler
&Asm
,
29 const MCAsmLayout
&Layout
,
30 const MCFragment
*Fragment
,
34 uint64_t &FixedValue
);
35 void recordTLVPRelocation(MachObjectWriter
*Writer
,
36 const MCAssembler
&Asm
,
37 const MCAsmLayout
&Layout
,
38 const MCFragment
*Fragment
,
41 uint64_t &FixedValue
);
43 void RecordX86Relocation(MachObjectWriter
*Writer
,
44 const MCAssembler
&Asm
,
45 const MCAsmLayout
&Layout
,
46 const MCFragment
*Fragment
,
49 uint64_t &FixedValue
);
50 void RecordX86_64Relocation(MachObjectWriter
*Writer
, MCAssembler
&Asm
,
51 const MCAsmLayout
&Layout
,
52 const MCFragment
*Fragment
, const MCFixup
&Fixup
,
53 MCValue Target
, uint64_t &FixedValue
);
56 X86MachObjectWriter(bool Is64Bit
, uint32_t CPUType
, uint32_t CPUSubtype
)
57 : MCMachObjectTargetWriter(Is64Bit
, CPUType
, CPUSubtype
) {}
59 void recordRelocation(MachObjectWriter
*Writer
, MCAssembler
&Asm
,
60 const MCAsmLayout
&Layout
, const MCFragment
*Fragment
,
61 const MCFixup
&Fixup
, MCValue Target
,
62 uint64_t &FixedValue
) override
{
63 if (Writer
->is64Bit())
64 RecordX86_64Relocation(Writer
, Asm
, Layout
, Fragment
, Fixup
, Target
,
67 RecordX86Relocation(Writer
, Asm
, Layout
, Fragment
, Fixup
, Target
,
73 static bool isFixupKindRIPRel(unsigned Kind
) {
74 return Kind
== X86::reloc_riprel_4byte
||
75 Kind
== X86::reloc_riprel_4byte_movq_load
||
76 Kind
== X86::reloc_riprel_4byte_relax
||
77 Kind
== X86::reloc_riprel_4byte_relax_rex
;
80 static unsigned getFixupKindLog2Size(unsigned Kind
) {
83 llvm_unreachable("invalid fixup kind!");
85 case FK_Data_1
: return 0;
87 case FK_Data_2
: return 1;
89 // FIXME: Remove these!!!
90 case X86::reloc_riprel_4byte
:
91 case X86::reloc_riprel_4byte_relax
:
92 case X86::reloc_riprel_4byte_relax_rex
:
93 case X86::reloc_riprel_4byte_movq_load
:
94 case X86::reloc_signed_4byte
:
95 case X86::reloc_signed_4byte_relax
:
96 case X86::reloc_branch_4byte_pcrel
:
97 case FK_Data_4
: return 2;
98 case FK_Data_8
: return 3;
102 void X86MachObjectWriter::RecordX86_64Relocation(
103 MachObjectWriter
*Writer
, MCAssembler
&Asm
, const MCAsmLayout
&Layout
,
104 const MCFragment
*Fragment
, const MCFixup
&Fixup
, MCValue Target
,
105 uint64_t &FixedValue
) {
106 unsigned IsPCRel
= Writer
->isFixupKindPCRel(Asm
, Fixup
.getKind());
107 unsigned IsRIPRel
= isFixupKindRIPRel(Fixup
.getKind());
108 unsigned Log2Size
= getFixupKindLog2Size(Fixup
.getKind());
111 uint32_t FixupOffset
=
112 Layout
.getFragmentOffset(Fragment
) + Fixup
.getOffset();
113 uint32_t FixupAddress
=
114 Writer
->getFragmentAddress(Fragment
, Layout
) + Fixup
.getOffset();
117 unsigned IsExtern
= 0;
119 const MCSymbol
*RelSymbol
= nullptr;
121 Value
= Target
.getConstant();
124 // Compensate for the relocation offset, Darwin x86_64 relocations only have
125 // the addend and appear to have attempted to define it to be the actual
126 // expression addend without the PCrel bias. However, instructions with data
127 // following the relocation are not accommodated for (see comment below
128 // regarding SIGNED{1,2,4}), so it isn't exactly that either.
129 Value
+= 1LL << Log2Size
;
132 if (Target
.isAbsolute()) { // constant
133 // SymbolNum of 0 indicates the absolute section.
134 Type
= MachO::X86_64_RELOC_UNSIGNED
;
136 // FIXME: I believe this is broken, I don't think the linker can understand
137 // it. I think it would require a local relocation, but I'm not sure if that
138 // would work either. The official way to get an absolute PCrel relocation
139 // is to use an absolute symbol (which we don't support yet).
142 Type
= MachO::X86_64_RELOC_BRANCH
;
144 } else if (Target
.getSymB()) { // A - B + constant
145 const MCSymbol
*A
= &Target
.getSymA()->getSymbol();
146 if (A
->isTemporary())
147 A
= &Writer
->findAliasedSymbol(*A
);
148 const MCSymbol
*A_Base
= Asm
.getAtom(*A
);
150 const MCSymbol
*B
= &Target
.getSymB()->getSymbol();
151 if (B
->isTemporary())
152 B
= &Writer
->findAliasedSymbol(*B
);
153 const MCSymbol
*B_Base
= Asm
.getAtom(*B
);
155 // Neither symbol can be modified.
156 if (Target
.getSymA()->getKind() != MCSymbolRefExpr::VK_None
) {
157 Asm
.getContext().reportError(Fixup
.getLoc(),
158 "unsupported relocation of modified symbol");
162 // We don't support PCrel relocations of differences. Darwin 'as' doesn't
163 // implement most of these correctly.
165 Asm
.getContext().reportError(
166 Fixup
.getLoc(), "unsupported pc-relative relocation of difference");
170 // The support for the situation where one or both of the symbols would
171 // require a local relocation is handled just like if the symbols were
172 // external. This is certainly used in the case of debug sections where the
173 // section has only temporary symbols and thus the symbols don't have base
174 // symbols. This is encoded using the section ordinal and non-extern
175 // relocation entries.
177 // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
178 // single SIGNED relocation); reject it for now. Except the case where both
179 // symbols don't have a base, equal but both NULL.
180 if (A_Base
== B_Base
&& A_Base
) {
181 Asm
.getContext().reportError(
182 Fixup
.getLoc(), "unsupported relocation with identical base");
186 // A subtraction expression where either symbol is undefined is a
187 // non-relocatable expression.
188 if (A
->isUndefined() || B
->isUndefined()) {
189 StringRef Name
= A
->isUndefined() ? A
->getName() : B
->getName();
190 Asm
.getContext().reportError(Fixup
.getLoc(),
191 "unsupported relocation with subtraction expression, symbol '" +
192 Name
+ "' can not be undefined in a subtraction expression");
196 Value
+= Writer
->getSymbolAddress(*A
, Layout
) -
197 (!A_Base
? 0 : Writer
->getSymbolAddress(*A_Base
, Layout
));
198 Value
-= Writer
->getSymbolAddress(*B
, Layout
) -
199 (!B_Base
? 0 : Writer
->getSymbolAddress(*B_Base
, Layout
));
202 Index
= A
->getFragment()->getParent()->getOrdinal() + 1;
203 Type
= MachO::X86_64_RELOC_UNSIGNED
;
205 MachO::any_relocation_info MRE
;
206 MRE
.r_word0
= FixupOffset
;
208 (Index
<< 0) | (IsPCRel
<< 24) | (Log2Size
<< 25) | (Type
<< 28);
209 Writer
->addRelocation(A_Base
, Fragment
->getParent(), MRE
);
214 Index
= B
->getFragment()->getParent()->getOrdinal() + 1;
215 Type
= MachO::X86_64_RELOC_SUBTRACTOR
;
217 const MCSymbol
*Symbol
= &Target
.getSymA()->getSymbol();
218 if (Symbol
->isTemporary() && Value
) {
219 const MCSection
&Sec
= Symbol
->getSection();
220 if (!Asm
.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec
))
221 Symbol
->setUsedInReloc();
223 RelSymbol
= Asm
.getAtom(*Symbol
);
225 // Relocations inside debug sections always use local relocations when
226 // possible. This seems to be done because the debugger doesn't fully
227 // understand x86_64 relocation entries, and expects to find values that
228 // have already been fixed up.
229 if (Symbol
->isInSection()) {
230 const MCSectionMachO
&Section
=
231 static_cast<const MCSectionMachO
&>(*Fragment
->getParent());
232 if (Section
.hasAttribute(MachO::S_ATTR_DEBUG
))
236 // x86_64 almost always uses external relocations, except when there is no
237 // symbol to use as a base address (a local symbol with no preceding
238 // non-local symbol).
240 // Add the local offset, if needed.
241 if (RelSymbol
!= Symbol
)
242 Value
+= Layout
.getSymbolOffset(*Symbol
) -
243 Layout
.getSymbolOffset(*RelSymbol
);
244 } else if (Symbol
->isInSection() && !Symbol
->isVariable()) {
245 // The index is the section ordinal (1-based).
246 Index
= Symbol
->getFragment()->getParent()->getOrdinal() + 1;
247 Value
+= Writer
->getSymbolAddress(*Symbol
, Layout
);
250 Value
-= FixupAddress
+ (1 << Log2Size
);
251 } else if (Symbol
->isVariable()) {
252 const MCExpr
*Value
= Symbol
->getVariableValue();
254 bool isAbs
= Value
->evaluateAsAbsolute(Res
, Layout
,
255 Writer
->getSectionAddressMap());
260 Asm
.getContext().reportError(Fixup
.getLoc(),
261 "unsupported relocation of variable '" +
262 Symbol
->getName() + "'");
266 Asm
.getContext().reportError(
267 Fixup
.getLoc(), "unsupported relocation of undefined symbol '" +
268 Symbol
->getName() + "'");
272 MCSymbolRefExpr::VariantKind Modifier
= Target
.getSymA()->getKind();
275 if (Modifier
== MCSymbolRefExpr::VK_GOTPCREL
) {
276 // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
277 // rewrite the movq to an leaq at link time if the symbol ends up in
278 // the same linkage unit.
279 if (unsigned(Fixup
.getKind()) == X86::reloc_riprel_4byte_movq_load
)
280 Type
= MachO::X86_64_RELOC_GOT_LOAD
;
282 Type
= MachO::X86_64_RELOC_GOT
;
283 } else if (Modifier
== MCSymbolRefExpr::VK_TLVP
) {
284 Type
= MachO::X86_64_RELOC_TLV
;
285 } else if (Modifier
!= MCSymbolRefExpr::VK_None
) {
286 Asm
.getContext().reportError(
287 Fixup
.getLoc(), "unsupported symbol modifier in relocation");
290 Type
= MachO::X86_64_RELOC_SIGNED
;
292 // The Darwin x86_64 relocation format has a problem where it cannot
293 // encode an address (L<foo> + <constant>) which is outside the atom
294 // containing L<foo>. Generally, this shouldn't occur but it does
295 // happen when we have a RIPrel instruction with data following the
296 // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
297 // adjustment Darwin x86_64 uses, the offset is still negative and the
298 // linker has no way to recognize this.
300 // To work around this, Darwin uses several special relocation types
301 // to indicate the offsets. However, the specification or
302 // implementation of these seems to also be incomplete; they should
303 // adjust the addend as well based on the actual encoded instruction
304 // (the additional bias), but instead appear to just look at the final
306 switch (-(Target
.getConstant() + (1LL << Log2Size
))) {
307 case 1: Type
= MachO::X86_64_RELOC_SIGNED_1
; break;
308 case 2: Type
= MachO::X86_64_RELOC_SIGNED_2
; break;
309 case 4: Type
= MachO::X86_64_RELOC_SIGNED_4
; break;
313 if (Modifier
!= MCSymbolRefExpr::VK_None
) {
314 Asm
.getContext().reportError(
316 "unsupported symbol modifier in branch relocation");
320 Type
= MachO::X86_64_RELOC_BRANCH
;
323 if (Modifier
== MCSymbolRefExpr::VK_GOT
) {
324 Type
= MachO::X86_64_RELOC_GOT
;
325 } else if (Modifier
== MCSymbolRefExpr::VK_GOTPCREL
) {
326 // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
327 // case all we do is set the PCrel bit in the relocation entry; this is
328 // used with exception handling, for example. The source is required to
329 // include any necessary offset directly.
330 Type
= MachO::X86_64_RELOC_GOT
;
332 } else if (Modifier
== MCSymbolRefExpr::VK_TLVP
) {
333 Asm
.getContext().reportError(
334 Fixup
.getLoc(), "TLVP symbol modifier should have been rip-rel");
336 } else if (Modifier
!= MCSymbolRefExpr::VK_None
) {
337 Asm
.getContext().reportError(
338 Fixup
.getLoc(), "unsupported symbol modifier in relocation");
341 Type
= MachO::X86_64_RELOC_UNSIGNED
;
342 unsigned Kind
= Fixup
.getKind();
343 if (Kind
== X86::reloc_signed_4byte
) {
344 Asm
.getContext().reportError(
346 "32-bit absolute addressing is not supported in 64-bit mode");
353 // x86_64 always writes custom values into the fixups.
356 // struct relocation_info (8 bytes)
357 MachO::any_relocation_info MRE
;
358 MRE
.r_word0
= FixupOffset
;
359 MRE
.r_word1
= (Index
<< 0) | (IsPCRel
<< 24) | (Log2Size
<< 25) |
360 (IsExtern
<< 27) | (Type
<< 28);
361 Writer
->addRelocation(RelSymbol
, Fragment
->getParent(), MRE
);
364 bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter
*Writer
,
365 const MCAssembler
&Asm
,
366 const MCAsmLayout
&Layout
,
367 const MCFragment
*Fragment
,
368 const MCFixup
&Fixup
,
371 uint64_t &FixedValue
) {
372 uint64_t OriginalFixedValue
= FixedValue
;
373 uint32_t FixupOffset
= Layout
.getFragmentOffset(Fragment
)+Fixup
.getOffset();
374 unsigned IsPCRel
= Writer
->isFixupKindPCRel(Asm
, Fixup
.getKind());
375 unsigned Type
= MachO::GENERIC_RELOC_VANILLA
;
378 const MCSymbol
*A
= &Target
.getSymA()->getSymbol();
380 if (!A
->getFragment()) {
381 Asm
.getContext().reportError(
383 "symbol '" + A
->getName() +
384 "' can not be undefined in a subtraction expression");
388 uint32_t Value
= Writer
->getSymbolAddress(*A
, Layout
);
389 uint64_t SecAddr
= Writer
->getSectionAddress(A
->getFragment()->getParent());
390 FixedValue
+= SecAddr
;
393 if (const MCSymbolRefExpr
*B
= Target
.getSymB()) {
394 const MCSymbol
*SB
= &B
->getSymbol();
396 if (!SB
->getFragment()) {
397 Asm
.getContext().reportError(
399 "symbol '" + SB
->getName() +
400 "' can not be undefined in a subtraction expression");
404 // Select the appropriate difference relocation type.
406 // Note that there is no longer any semantic difference between these two
407 // relocation types from the linkers point of view, this is done solely for
408 // pedantic compatibility with 'as'.
409 Type
= A
->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF
410 : (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF
;
411 Value2
= Writer
->getSymbolAddress(*SB
, Layout
);
412 FixedValue
-= Writer
->getSectionAddress(SB
->getFragment()->getParent());
415 // Relocations are written out in reverse order, so the PAIR comes first.
416 if (Type
== MachO::GENERIC_RELOC_SECTDIFF
||
417 Type
== MachO::GENERIC_RELOC_LOCAL_SECTDIFF
) {
418 // If the offset is too large to fit in a scattered relocation,
419 // we're hosed. It's an unfortunate limitation of the MachO format.
420 if (FixupOffset
> 0xffffff) {
422 format("0x%x", FixupOffset
).print(Buffer
, sizeof(Buffer
));
423 Asm
.getContext().reportError(Fixup
.getLoc(),
424 Twine("Section too large, can't encode "
425 "r_address (") + Buffer
+
426 ") into 24 bits of scattered "
427 "relocation entry.");
431 MachO::any_relocation_info MRE
;
432 MRE
.r_word0
= ((0 << 0) | // r_address
433 (MachO::GENERIC_RELOC_PAIR
<< 24) | // r_type
437 MRE
.r_word1
= Value2
;
438 Writer
->addRelocation(nullptr, Fragment
->getParent(), MRE
);
440 // If the offset is more than 24-bits, it won't fit in a scattered
441 // relocation offset field, so we fall back to using a non-scattered
442 // relocation. This is a bit risky, as if the offset reaches out of
443 // the block and the linker is doing scattered loading on this
444 // symbol, things can go badly.
446 // Required for 'as' compatibility.
447 if (FixupOffset
> 0xffffff) {
448 FixedValue
= OriginalFixedValue
;
453 MachO::any_relocation_info MRE
;
454 MRE
.r_word0
= ((FixupOffset
<< 0) |
460 Writer
->addRelocation(nullptr, Fragment
->getParent(), MRE
);
464 void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter
*Writer
,
465 const MCAssembler
&Asm
,
466 const MCAsmLayout
&Layout
,
467 const MCFragment
*Fragment
,
468 const MCFixup
&Fixup
,
470 uint64_t &FixedValue
) {
471 const MCSymbolRefExpr
*SymA
= Target
.getSymA();
472 assert(SymA
->getKind() == MCSymbolRefExpr::VK_TLVP
&& !is64Bit() &&
473 "Should only be called with a 32-bit TLVP relocation!");
475 unsigned Log2Size
= getFixupKindLog2Size(Fixup
.getKind());
476 uint32_t Value
= Layout
.getFragmentOffset(Fragment
)+Fixup
.getOffset();
477 unsigned IsPCRel
= 0;
479 // We're only going to have a second symbol in pic mode and it'll be a
480 // subtraction from the picbase. For 32-bit pic the addend is the difference
481 // between the picbase and the next address. For 32-bit static the addend is
483 if (auto *SymB
= Target
.getSymB()) {
484 // If this is a subtraction then we're pcrel.
485 uint32_t FixupAddress
=
486 Writer
->getFragmentAddress(Fragment
, Layout
) + Fixup
.getOffset();
488 FixedValue
= FixupAddress
-
489 Writer
->getSymbolAddress(SymB
->getSymbol(), Layout
) +
490 Target
.getConstant();
491 FixedValue
+= 1ULL << Log2Size
;
496 // struct relocation_info (8 bytes)
497 MachO::any_relocation_info MRE
;
500 (IsPCRel
<< 24) | (Log2Size
<< 25) | (MachO::GENERIC_RELOC_TLV
<< 28);
501 Writer
->addRelocation(&SymA
->getSymbol(), Fragment
->getParent(), MRE
);
504 void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter
*Writer
,
505 const MCAssembler
&Asm
,
506 const MCAsmLayout
&Layout
,
507 const MCFragment
*Fragment
,
508 const MCFixup
&Fixup
,
510 uint64_t &FixedValue
) {
511 unsigned IsPCRel
= Writer
->isFixupKindPCRel(Asm
, Fixup
.getKind());
512 unsigned Log2Size
= getFixupKindLog2Size(Fixup
.getKind());
514 // If this is a 32-bit TLVP reloc it's handled a bit differently.
515 if (Target
.getSymA() &&
516 Target
.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP
) {
517 recordTLVPRelocation(Writer
, Asm
, Layout
, Fragment
, Fixup
, Target
,
522 // If this is a difference or a defined symbol plus an offset, then we need a
523 // scattered relocation entry. Differences always require scattered
525 if (Target
.getSymB()) {
526 recordScatteredRelocation(Writer
, Asm
, Layout
, Fragment
, Fixup
,
527 Target
, Log2Size
, FixedValue
);
531 // Get the symbol data, if any.
532 const MCSymbol
*A
= nullptr;
533 if (Target
.getSymA())
534 A
= &Target
.getSymA()->getSymbol();
536 // If this is an internal relocation with an offset, it also needs a scattered
538 uint32_t Offset
= Target
.getConstant();
540 Offset
+= 1 << Log2Size
;
541 // Try to record the scattered relocation if needed. Fall back to non
542 // scattered if necessary (see comments in recordScatteredRelocation()
544 if (Offset
&& A
&& !Writer
->doesSymbolRequireExternRelocation(*A
) &&
545 recordScatteredRelocation(Writer
, Asm
, Layout
, Fragment
, Fixup
, Target
,
546 Log2Size
, FixedValue
))
550 uint32_t FixupOffset
= Layout
.getFragmentOffset(Fragment
)+Fixup
.getOffset();
553 const MCSymbol
*RelSymbol
= nullptr;
555 if (Target
.isAbsolute()) { // constant
556 // SymbolNum of 0 indicates the absolute section.
558 // FIXME: Currently, these are never generated (see code below). I cannot
559 // find a case where they are actually emitted.
560 Type
= MachO::GENERIC_RELOC_VANILLA
;
562 // Resolve constant variables.
563 if (A
->isVariable()) {
565 if (A
->getVariableValue()->evaluateAsAbsolute(
566 Res
, Layout
, Writer
->getSectionAddressMap())) {
572 // Check whether we need an external or internal relocation.
573 if (Writer
->doesSymbolRequireExternRelocation(*A
)) {
575 // For external relocations, make sure to offset the fixup value to
576 // compensate for the addend of the symbol address, if it was
577 // undefined. This occurs with weak definitions, for example.
578 if (!A
->isUndefined())
579 FixedValue
-= Layout
.getSymbolOffset(*A
);
581 // The index is the section ordinal (1-based).
582 const MCSection
&Sec
= A
->getSection();
583 Index
= Sec
.getOrdinal() + 1;
584 FixedValue
+= Writer
->getSectionAddress(&Sec
);
587 FixedValue
-= Writer
->getSectionAddress(Fragment
->getParent());
589 Type
= MachO::GENERIC_RELOC_VANILLA
;
592 // struct relocation_info (8 bytes)
593 MachO::any_relocation_info MRE
;
594 MRE
.r_word0
= FixupOffset
;
596 (Index
<< 0) | (IsPCRel
<< 24) | (Log2Size
<< 25) | (Type
<< 28);
597 Writer
->addRelocation(RelSymbol
, Fragment
->getParent(), MRE
);
600 std::unique_ptr
<MCObjectTargetWriter
>
601 llvm::createX86MachObjectWriter(bool Is64Bit
, uint32_t CPUType
,
602 uint32_t CPUSubtype
) {
603 return llvm::make_unique
<X86MachObjectWriter
>(Is64Bit
, CPUType
, CPUSubtype
);