1 //===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86FixupKinds.h"
10 #include "MCTargetDesc/X86MCTargetDesc.h"
11 #include "llvm/ADT/Twine.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCMachObjectWriter.h"
18 #include "llvm/MC/MCSectionMachO.h"
19 #include "llvm/MC/MCValue.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/Format.h"
26 class X86MachObjectWriter
: public MCMachObjectTargetWriter
{
27 bool recordScatteredRelocation(MachObjectWriter
*Writer
,
28 const MCAssembler
&Asm
,
29 const MCAsmLayout
&Layout
,
30 const MCFragment
*Fragment
,
34 uint64_t &FixedValue
);
35 void recordTLVPRelocation(MachObjectWriter
*Writer
,
36 const MCAssembler
&Asm
,
37 const MCAsmLayout
&Layout
,
38 const MCFragment
*Fragment
,
41 uint64_t &FixedValue
);
43 void RecordX86Relocation(MachObjectWriter
*Writer
,
44 const MCAssembler
&Asm
,
45 const MCAsmLayout
&Layout
,
46 const MCFragment
*Fragment
,
49 uint64_t &FixedValue
);
50 void RecordX86_64Relocation(MachObjectWriter
*Writer
, MCAssembler
&Asm
,
51 const MCAsmLayout
&Layout
,
52 const MCFragment
*Fragment
, const MCFixup
&Fixup
,
53 MCValue Target
, uint64_t &FixedValue
);
56 X86MachObjectWriter(bool Is64Bit
, uint32_t CPUType
, uint32_t CPUSubtype
)
57 : MCMachObjectTargetWriter(Is64Bit
, CPUType
, CPUSubtype
) {}
59 void recordRelocation(MachObjectWriter
*Writer
, MCAssembler
&Asm
,
60 const MCAsmLayout
&Layout
, const MCFragment
*Fragment
,
61 const MCFixup
&Fixup
, MCValue Target
,
62 uint64_t &FixedValue
) override
{
63 if (Writer
->is64Bit())
64 RecordX86_64Relocation(Writer
, Asm
, Layout
, Fragment
, Fixup
, Target
,
67 RecordX86Relocation(Writer
, Asm
, Layout
, Fragment
, Fixup
, Target
,
73 static bool isFixupKindRIPRel(unsigned Kind
) {
74 return Kind
== X86::reloc_riprel_4byte
||
75 Kind
== X86::reloc_riprel_4byte_movq_load
||
76 Kind
== X86::reloc_riprel_4byte_relax
||
77 Kind
== X86::reloc_riprel_4byte_relax_rex
;
80 static unsigned getFixupKindLog2Size(unsigned Kind
) {
83 llvm_unreachable("invalid fixup kind!");
85 case FK_Data_1
: return 0;
87 case FK_Data_2
: return 1;
89 // FIXME: Remove these!!!
90 case X86::reloc_riprel_4byte
:
91 case X86::reloc_riprel_4byte_relax
:
92 case X86::reloc_riprel_4byte_relax_rex
:
93 case X86::reloc_riprel_4byte_movq_load
:
94 case X86::reloc_signed_4byte
:
95 case X86::reloc_signed_4byte_relax
:
96 case X86::reloc_branch_4byte_pcrel
:
97 case FK_Data_4
: return 2;
98 case FK_Data_8
: return 3;
102 void X86MachObjectWriter::RecordX86_64Relocation(
103 MachObjectWriter
*Writer
, MCAssembler
&Asm
, const MCAsmLayout
&Layout
,
104 const MCFragment
*Fragment
, const MCFixup
&Fixup
, MCValue Target
,
105 uint64_t &FixedValue
) {
106 unsigned IsPCRel
= Writer
->isFixupKindPCRel(Asm
, Fixup
.getKind());
107 unsigned IsRIPRel
= isFixupKindRIPRel(Fixup
.getKind());
108 unsigned Log2Size
= getFixupKindLog2Size(Fixup
.getKind());
111 uint32_t FixupOffset
=
112 Layout
.getFragmentOffset(Fragment
) + Fixup
.getOffset();
113 uint32_t FixupAddress
=
114 Writer
->getFragmentAddress(Fragment
, Layout
) + Fixup
.getOffset();
117 unsigned IsExtern
= 0;
119 const MCSymbol
*RelSymbol
= nullptr;
121 Value
= Target
.getConstant();
124 // Compensate for the relocation offset, Darwin x86_64 relocations only have
125 // the addend and appear to have attempted to define it to be the actual
126 // expression addend without the PCrel bias. However, instructions with data
127 // following the relocation are not accommodated for (see comment below
128 // regarding SIGNED{1,2,4}), so it isn't exactly that either.
129 Value
+= 1LL << Log2Size
;
132 if (Target
.isAbsolute()) { // constant
133 // SymbolNum of 0 indicates the absolute section.
134 Type
= MachO::X86_64_RELOC_UNSIGNED
;
136 // FIXME: I believe this is broken, I don't think the linker can understand
137 // it. I think it would require a local relocation, but I'm not sure if that
138 // would work either. The official way to get an absolute PCrel relocation
139 // is to use an absolute symbol (which we don't support yet).
142 Type
= MachO::X86_64_RELOC_BRANCH
;
144 } else if (Target
.getSymB()) { // A - B + constant
145 const MCSymbol
*A
= &Target
.getSymA()->getSymbol();
146 if (A
->isTemporary())
147 A
= &Writer
->findAliasedSymbol(*A
);
148 const MCSymbol
*A_Base
= Asm
.getAtom(*A
);
150 const MCSymbol
*B
= &Target
.getSymB()->getSymbol();
151 if (B
->isTemporary())
152 B
= &Writer
->findAliasedSymbol(*B
);
153 const MCSymbol
*B_Base
= Asm
.getAtom(*B
);
155 // Neither symbol can be modified.
156 if (Target
.getSymA()->getKind() != MCSymbolRefExpr::VK_None
) {
157 Asm
.getContext().reportError(Fixup
.getLoc(),
158 "unsupported relocation of modified symbol");
162 // We don't support PCrel relocations of differences. Darwin 'as' doesn't
163 // implement most of these correctly.
165 Asm
.getContext().reportError(
166 Fixup
.getLoc(), "unsupported pc-relative relocation of difference");
170 // The support for the situation where one or both of the symbols would
171 // require a local relocation is handled just like if the symbols were
172 // external. This is certainly used in the case of debug sections where the
173 // section has only temporary symbols and thus the symbols don't have base
174 // symbols. This is encoded using the section ordinal and non-extern
175 // relocation entries.
177 // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
178 // single SIGNED relocation); reject it for now. Except the case where both
179 // symbols don't have a base, equal but both NULL.
180 if (A_Base
== B_Base
&& A_Base
) {
181 Asm
.getContext().reportError(
182 Fixup
.getLoc(), "unsupported relocation with identical base");
186 // A subtraction expression where either symbol is undefined is a
187 // non-relocatable expression.
188 if (A
->isUndefined() || B
->isUndefined()) {
189 StringRef Name
= A
->isUndefined() ? A
->getName() : B
->getName();
190 Asm
.getContext().reportError(Fixup
.getLoc(),
191 "unsupported relocation with subtraction expression, symbol '" +
192 Name
+ "' can not be undefined in a subtraction expression");
196 Value
+= Writer
->getSymbolAddress(*A
, Layout
) -
197 (!A_Base
? 0 : Writer
->getSymbolAddress(*A_Base
, Layout
));
198 Value
-= Writer
->getSymbolAddress(*B
, Layout
) -
199 (!B_Base
? 0 : Writer
->getSymbolAddress(*B_Base
, Layout
));
202 Index
= A
->getFragment()->getParent()->getOrdinal() + 1;
203 Type
= MachO::X86_64_RELOC_UNSIGNED
;
205 MachO::any_relocation_info MRE
;
206 MRE
.r_word0
= FixupOffset
;
208 (Index
<< 0) | (IsPCRel
<< 24) | (Log2Size
<< 25) | (Type
<< 28);
209 Writer
->addRelocation(A_Base
, Fragment
->getParent(), MRE
);
214 Index
= B
->getFragment()->getParent()->getOrdinal() + 1;
215 Type
= MachO::X86_64_RELOC_SUBTRACTOR
;
217 const MCSymbol
*Symbol
= &Target
.getSymA()->getSymbol();
218 if (Symbol
->isTemporary() && Value
) {
219 const MCSection
&Sec
= Symbol
->getSection();
220 if (!Asm
.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec
))
221 Symbol
->setUsedInReloc();
223 RelSymbol
= Asm
.getAtom(*Symbol
);
225 // Relocations inside debug sections always use local relocations when
226 // possible. This seems to be done because the debugger doesn't fully
227 // understand x86_64 relocation entries, and expects to find values that
228 // have already been fixed up.
229 if (Symbol
->isInSection()) {
230 const MCSectionMachO
&Section
=
231 static_cast<const MCSectionMachO
&>(*Fragment
->getParent());
232 if (Section
.hasAttribute(MachO::S_ATTR_DEBUG
))
236 // x86_64 almost always uses external relocations, except when there is no
237 // symbol to use as a base address (a local symbol with no preceding
238 // non-local symbol).
240 // Add the local offset, if needed.
241 if (RelSymbol
!= Symbol
)
242 Value
+= Layout
.getSymbolOffset(*Symbol
) -
243 Layout
.getSymbolOffset(*RelSymbol
);
244 } else if (Symbol
->isInSection() && !Symbol
->isVariable()) {
245 // The index is the section ordinal (1-based).
246 Index
= Symbol
->getFragment()->getParent()->getOrdinal() + 1;
247 Value
+= Writer
->getSymbolAddress(*Symbol
, Layout
);
250 Value
-= FixupAddress
+ (1 << Log2Size
);
251 } else if (Symbol
->isVariable()) {
252 const MCExpr
*Value
= Symbol
->getVariableValue();
254 bool isAbs
= Value
->evaluateAsAbsolute(Res
, Layout
,
255 Writer
->getSectionAddressMap());
260 Asm
.getContext().reportError(Fixup
.getLoc(),
261 "unsupported relocation of variable '" +
262 Symbol
->getName() + "'");
266 Asm
.getContext().reportError(
267 Fixup
.getLoc(), "unsupported relocation of undefined symbol '" +
268 Symbol
->getName() + "'");
272 MCSymbolRefExpr::VariantKind Modifier
= Target
.getSymA()->getKind();
275 if (Modifier
== MCSymbolRefExpr::VK_GOTPCREL
) {
276 // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
277 // rewrite the movq to an leaq at link time if the symbol ends up in
278 // the same linkage unit.
279 if (Fixup
.getTargetKind() == X86::reloc_riprel_4byte_movq_load
)
280 Type
= MachO::X86_64_RELOC_GOT_LOAD
;
282 Type
= MachO::X86_64_RELOC_GOT
;
283 } else if (Modifier
== MCSymbolRefExpr::VK_TLVP
) {
284 Type
= MachO::X86_64_RELOC_TLV
;
285 } else if (Modifier
!= MCSymbolRefExpr::VK_None
) {
286 Asm
.getContext().reportError(
287 Fixup
.getLoc(), "unsupported symbol modifier in relocation");
290 Type
= MachO::X86_64_RELOC_SIGNED
;
292 // The Darwin x86_64 relocation format has a problem where it cannot
293 // encode an address (L<foo> + <constant>) which is outside the atom
294 // containing L<foo>. Generally, this shouldn't occur but it does
295 // happen when we have a RIPrel instruction with data following the
296 // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
297 // adjustment Darwin x86_64 uses, the offset is still negative and the
298 // linker has no way to recognize this.
300 // To work around this, Darwin uses several special relocation types
301 // to indicate the offsets. However, the specification or
302 // implementation of these seems to also be incomplete; they should
303 // adjust the addend as well based on the actual encoded instruction
304 // (the additional bias), but instead appear to just look at the final
306 switch (-(Target
.getConstant() + (1LL << Log2Size
))) {
307 case 1: Type
= MachO::X86_64_RELOC_SIGNED_1
; break;
308 case 2: Type
= MachO::X86_64_RELOC_SIGNED_2
; break;
309 case 4: Type
= MachO::X86_64_RELOC_SIGNED_4
; break;
313 if (Modifier
!= MCSymbolRefExpr::VK_None
) {
314 Asm
.getContext().reportError(
316 "unsupported symbol modifier in branch relocation");
320 Type
= MachO::X86_64_RELOC_BRANCH
;
323 if (Modifier
== MCSymbolRefExpr::VK_GOT
) {
324 Type
= MachO::X86_64_RELOC_GOT
;
325 } else if (Modifier
== MCSymbolRefExpr::VK_GOTPCREL
) {
326 // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
327 // case all we do is set the PCrel bit in the relocation entry; this is
328 // used with exception handling, for example. The source is required to
329 // include any necessary offset directly.
330 Type
= MachO::X86_64_RELOC_GOT
;
332 } else if (Modifier
== MCSymbolRefExpr::VK_TLVP
) {
333 Asm
.getContext().reportError(
334 Fixup
.getLoc(), "TLVP symbol modifier should have been rip-rel");
336 } else if (Modifier
!= MCSymbolRefExpr::VK_None
) {
337 Asm
.getContext().reportError(
338 Fixup
.getLoc(), "unsupported symbol modifier in relocation");
341 Type
= MachO::X86_64_RELOC_UNSIGNED
;
342 if (Fixup
.getTargetKind() == X86::reloc_signed_4byte
) {
343 Asm
.getContext().reportError(
345 "32-bit absolute addressing is not supported in 64-bit mode");
352 // x86_64 always writes custom values into the fixups.
355 // struct relocation_info (8 bytes)
356 MachO::any_relocation_info MRE
;
357 MRE
.r_word0
= FixupOffset
;
358 MRE
.r_word1
= (Index
<< 0) | (IsPCRel
<< 24) | (Log2Size
<< 25) |
359 (IsExtern
<< 27) | (Type
<< 28);
360 Writer
->addRelocation(RelSymbol
, Fragment
->getParent(), MRE
);
363 bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter
*Writer
,
364 const MCAssembler
&Asm
,
365 const MCAsmLayout
&Layout
,
366 const MCFragment
*Fragment
,
367 const MCFixup
&Fixup
,
370 uint64_t &FixedValue
) {
371 uint64_t OriginalFixedValue
= FixedValue
;
372 uint32_t FixupOffset
= Layout
.getFragmentOffset(Fragment
)+Fixup
.getOffset();
373 unsigned IsPCRel
= Writer
->isFixupKindPCRel(Asm
, Fixup
.getKind());
374 unsigned Type
= MachO::GENERIC_RELOC_VANILLA
;
377 const MCSymbol
*A
= &Target
.getSymA()->getSymbol();
379 if (!A
->getFragment()) {
380 Asm
.getContext().reportError(
382 "symbol '" + A
->getName() +
383 "' can not be undefined in a subtraction expression");
387 uint32_t Value
= Writer
->getSymbolAddress(*A
, Layout
);
388 uint64_t SecAddr
= Writer
->getSectionAddress(A
->getFragment()->getParent());
389 FixedValue
+= SecAddr
;
392 if (const MCSymbolRefExpr
*B
= Target
.getSymB()) {
393 const MCSymbol
*SB
= &B
->getSymbol();
395 if (!SB
->getFragment()) {
396 Asm
.getContext().reportError(
398 "symbol '" + SB
->getName() +
399 "' can not be undefined in a subtraction expression");
403 // Select the appropriate difference relocation type.
405 // Note that there is no longer any semantic difference between these two
406 // relocation types from the linkers point of view, this is done solely for
407 // pedantic compatibility with 'as'.
408 Type
= A
->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF
409 : (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF
;
410 Value2
= Writer
->getSymbolAddress(*SB
, Layout
);
411 FixedValue
-= Writer
->getSectionAddress(SB
->getFragment()->getParent());
414 // Relocations are written out in reverse order, so the PAIR comes first.
415 if (Type
== MachO::GENERIC_RELOC_SECTDIFF
||
416 Type
== MachO::GENERIC_RELOC_LOCAL_SECTDIFF
) {
417 // If the offset is too large to fit in a scattered relocation,
418 // we're hosed. It's an unfortunate limitation of the MachO format.
419 if (FixupOffset
> 0xffffff) {
421 format("0x%x", FixupOffset
).print(Buffer
, sizeof(Buffer
));
422 Asm
.getContext().reportError(Fixup
.getLoc(),
423 Twine("Section too large, can't encode "
424 "r_address (") + Buffer
+
425 ") into 24 bits of scattered "
426 "relocation entry.");
430 MachO::any_relocation_info MRE
;
431 MRE
.r_word0
= ((0 << 0) | // r_address
432 (MachO::GENERIC_RELOC_PAIR
<< 24) | // r_type
436 MRE
.r_word1
= Value2
;
437 Writer
->addRelocation(nullptr, Fragment
->getParent(), MRE
);
439 // If the offset is more than 24-bits, it won't fit in a scattered
440 // relocation offset field, so we fall back to using a non-scattered
441 // relocation. This is a bit risky, as if the offset reaches out of
442 // the block and the linker is doing scattered loading on this
443 // symbol, things can go badly.
445 // Required for 'as' compatibility.
446 if (FixupOffset
> 0xffffff) {
447 FixedValue
= OriginalFixedValue
;
452 MachO::any_relocation_info MRE
;
453 MRE
.r_word0
= ((FixupOffset
<< 0) |
459 Writer
->addRelocation(nullptr, Fragment
->getParent(), MRE
);
463 void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter
*Writer
,
464 const MCAssembler
&Asm
,
465 const MCAsmLayout
&Layout
,
466 const MCFragment
*Fragment
,
467 const MCFixup
&Fixup
,
469 uint64_t &FixedValue
) {
470 const MCSymbolRefExpr
*SymA
= Target
.getSymA();
471 assert(SymA
->getKind() == MCSymbolRefExpr::VK_TLVP
&& !is64Bit() &&
472 "Should only be called with a 32-bit TLVP relocation!");
474 unsigned Log2Size
= getFixupKindLog2Size(Fixup
.getKind());
475 uint32_t Value
= Layout
.getFragmentOffset(Fragment
)+Fixup
.getOffset();
476 unsigned IsPCRel
= 0;
478 // We're only going to have a second symbol in pic mode and it'll be a
479 // subtraction from the picbase. For 32-bit pic the addend is the difference
480 // between the picbase and the next address. For 32-bit static the addend is
482 if (auto *SymB
= Target
.getSymB()) {
483 // If this is a subtraction then we're pcrel.
484 uint32_t FixupAddress
=
485 Writer
->getFragmentAddress(Fragment
, Layout
) + Fixup
.getOffset();
487 FixedValue
= FixupAddress
-
488 Writer
->getSymbolAddress(SymB
->getSymbol(), Layout
) +
489 Target
.getConstant();
490 FixedValue
+= 1ULL << Log2Size
;
495 // struct relocation_info (8 bytes)
496 MachO::any_relocation_info MRE
;
499 (IsPCRel
<< 24) | (Log2Size
<< 25) | (MachO::GENERIC_RELOC_TLV
<< 28);
500 Writer
->addRelocation(&SymA
->getSymbol(), Fragment
->getParent(), MRE
);
503 void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter
*Writer
,
504 const MCAssembler
&Asm
,
505 const MCAsmLayout
&Layout
,
506 const MCFragment
*Fragment
,
507 const MCFixup
&Fixup
,
509 uint64_t &FixedValue
) {
510 unsigned IsPCRel
= Writer
->isFixupKindPCRel(Asm
, Fixup
.getKind());
511 unsigned Log2Size
= getFixupKindLog2Size(Fixup
.getKind());
513 // If this is a 32-bit TLVP reloc it's handled a bit differently.
514 if (Target
.getSymA() &&
515 Target
.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP
) {
516 recordTLVPRelocation(Writer
, Asm
, Layout
, Fragment
, Fixup
, Target
,
521 // If this is a difference or a defined symbol plus an offset, then we need a
522 // scattered relocation entry. Differences always require scattered
524 if (Target
.getSymB()) {
525 recordScatteredRelocation(Writer
, Asm
, Layout
, Fragment
, Fixup
,
526 Target
, Log2Size
, FixedValue
);
530 // Get the symbol data, if any.
531 const MCSymbol
*A
= nullptr;
532 if (Target
.getSymA())
533 A
= &Target
.getSymA()->getSymbol();
535 // If this is an internal relocation with an offset, it also needs a scattered
537 uint32_t Offset
= Target
.getConstant();
539 Offset
+= 1 << Log2Size
;
540 // Try to record the scattered relocation if needed. Fall back to non
541 // scattered if necessary (see comments in recordScatteredRelocation()
543 if (Offset
&& A
&& !Writer
->doesSymbolRequireExternRelocation(*A
) &&
544 recordScatteredRelocation(Writer
, Asm
, Layout
, Fragment
, Fixup
, Target
,
545 Log2Size
, FixedValue
))
549 uint32_t FixupOffset
= Layout
.getFragmentOffset(Fragment
)+Fixup
.getOffset();
552 const MCSymbol
*RelSymbol
= nullptr;
554 if (Target
.isAbsolute()) { // constant
555 // SymbolNum of 0 indicates the absolute section.
557 // FIXME: Currently, these are never generated (see code below). I cannot
558 // find a case where they are actually emitted.
559 Type
= MachO::GENERIC_RELOC_VANILLA
;
561 // Resolve constant variables.
562 if (A
->isVariable()) {
564 if (A
->getVariableValue()->evaluateAsAbsolute(
565 Res
, Layout
, Writer
->getSectionAddressMap())) {
571 // Check whether we need an external or internal relocation.
572 if (Writer
->doesSymbolRequireExternRelocation(*A
)) {
574 // For external relocations, make sure to offset the fixup value to
575 // compensate for the addend of the symbol address, if it was
576 // undefined. This occurs with weak definitions, for example.
577 if (!A
->isUndefined())
578 FixedValue
-= Layout
.getSymbolOffset(*A
);
580 // The index is the section ordinal (1-based).
581 const MCSection
&Sec
= A
->getSection();
582 Index
= Sec
.getOrdinal() + 1;
583 FixedValue
+= Writer
->getSectionAddress(&Sec
);
586 FixedValue
-= Writer
->getSectionAddress(Fragment
->getParent());
588 Type
= MachO::GENERIC_RELOC_VANILLA
;
591 // struct relocation_info (8 bytes)
592 MachO::any_relocation_info MRE
;
593 MRE
.r_word0
= FixupOffset
;
595 (Index
<< 0) | (IsPCRel
<< 24) | (Log2Size
<< 25) | (Type
<< 28);
596 Writer
->addRelocation(RelSymbol
, Fragment
->getParent(), MRE
);
599 std::unique_ptr
<MCObjectTargetWriter
>
600 llvm::createX86MachObjectWriter(bool Is64Bit
, uint32_t CPUType
,
601 uint32_t CPUSubtype
) {
602 return std::make_unique
<X86MachObjectWriter
>(Is64Bit
, CPUType
, CPUSubtype
);