1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectWriter.h"
23 #include "llvm/MC/MCSection.h"
24 #include "llvm/MC/MCSectionMachO.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCSymbolMachO.h"
27 #include "llvm/MC/MCValue.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Support/raw_ostream.h"
42 #define DEBUG_TYPE "mc"
44 void MachObjectWriter::reset() {
46 IndirectSymBase
.clear();
48 LocalSymbolData
.clear();
49 ExternalSymbolData
.clear();
50 UndefinedSymbolData
.clear();
51 MCObjectWriter::reset();
54 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol
&S
) {
55 // Undefined symbols are always extern.
59 // References to weak definitions require external relocation entries; the
60 // definition may not always be the one in the same object file.
61 if (cast
<MCSymbolMachO
>(S
).isWeakDefinition())
64 // Otherwise, we can use an internal relocation.
68 bool MachObjectWriter::
69 MachSymbolData::operator<(const MachSymbolData
&RHS
) const {
70 return Symbol
->getName() < RHS
.Symbol
->getName();
73 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler
&Asm
, unsigned Kind
) {
74 const MCFixupKindInfo
&FKI
= Asm
.getBackend().getFixupKindInfo(
77 return FKI
.Flags
& MCFixupKindInfo::FKF_IsPCRel
;
80 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment
*Fragment
,
81 const MCAsmLayout
&Layout
) const {
82 return getSectionAddress(Fragment
->getParent()) +
83 Layout
.getFragmentOffset(Fragment
);
86 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol
&S
,
87 const MCAsmLayout
&Layout
) const {
88 // If this is a variable, then recursively evaluate now.
90 if (const MCConstantExpr
*C
=
91 dyn_cast
<const MCConstantExpr
>(S
.getVariableValue()))
95 if (!S
.getVariableValue()->evaluateAsRelocatable(Target
, &Layout
, nullptr))
96 report_fatal_error("unable to evaluate offset for variable '" +
99 // Verify that any used symbols are defined.
100 if (Target
.getSymA() && Target
.getSymA()->getSymbol().isUndefined())
101 report_fatal_error("unable to evaluate offset to undefined symbol '" +
102 Target
.getSymA()->getSymbol().getName() + "'");
103 if (Target
.getSymB() && Target
.getSymB()->getSymbol().isUndefined())
104 report_fatal_error("unable to evaluate offset to undefined symbol '" +
105 Target
.getSymB()->getSymbol().getName() + "'");
107 uint64_t Address
= Target
.getConstant();
108 if (Target
.getSymA())
109 Address
+= getSymbolAddress(Target
.getSymA()->getSymbol(), Layout
);
110 if (Target
.getSymB())
111 Address
+= getSymbolAddress(Target
.getSymB()->getSymbol(), Layout
);
115 return getSectionAddress(S
.getFragment()->getParent()) +
116 Layout
.getSymbolOffset(S
);
119 uint64_t MachObjectWriter::getPaddingSize(const MCSection
*Sec
,
120 const MCAsmLayout
&Layout
) const {
121 uint64_t EndAddr
= getSectionAddress(Sec
) + Layout
.getSectionAddressSize(Sec
);
122 unsigned Next
= Sec
->getLayoutOrder() + 1;
123 if (Next
>= Layout
.getSectionOrder().size())
126 const MCSection
&NextSec
= *Layout
.getSectionOrder()[Next
];
127 if (NextSec
.isVirtualSection())
129 return OffsetToAlignment(EndAddr
, NextSec
.getAlignment());
132 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type
,
133 unsigned NumLoadCommands
,
134 unsigned LoadCommandsSize
,
135 bool SubsectionsViaSymbols
) {
138 if (SubsectionsViaSymbols
)
139 Flags
|= MachO::MH_SUBSECTIONS_VIA_SYMBOLS
;
141 // struct mach_header (28 bytes) or
142 // struct mach_header_64 (32 bytes)
144 uint64_t Start
= W
.OS
.tell();
147 W
.write
<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64
: MachO::MH_MAGIC
);
149 W
.write
<uint32_t>(TargetObjectWriter
->getCPUType());
150 W
.write
<uint32_t>(TargetObjectWriter
->getCPUSubtype());
152 W
.write
<uint32_t>(Type
);
153 W
.write
<uint32_t>(NumLoadCommands
);
154 W
.write
<uint32_t>(LoadCommandsSize
);
155 W
.write
<uint32_t>(Flags
);
157 W
.write
<uint32_t>(0); // reserved
159 assert(W
.OS
.tell() - Start
== (is64Bit() ? sizeof(MachO::mach_header_64
)
160 : sizeof(MachO::mach_header
)));
163 void MachObjectWriter::writeWithPadding(StringRef Str
, uint64_t Size
) {
164 assert(Size
>= Str
.size());
166 W
.OS
.write_zeros(Size
- Str
.size());
169 /// writeSegmentLoadCommand - Write a segment load command.
171 /// \param NumSections The number of sections in this segment.
172 /// \param SectionDataSize The total size of the sections.
173 void MachObjectWriter::writeSegmentLoadCommand(
174 StringRef Name
, unsigned NumSections
, uint64_t VMAddr
, uint64_t VMSize
,
175 uint64_t SectionDataStartOffset
, uint64_t SectionDataSize
, uint32_t MaxProt
,
177 // struct segment_command (56 bytes) or
178 // struct segment_command_64 (72 bytes)
180 uint64_t Start
= W
.OS
.tell();
183 unsigned SegmentLoadCommandSize
=
184 is64Bit() ? sizeof(MachO::segment_command_64
):
185 sizeof(MachO::segment_command
);
186 W
.write
<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64
: MachO::LC_SEGMENT
);
187 W
.write
<uint32_t>(SegmentLoadCommandSize
+
188 NumSections
* (is64Bit() ? sizeof(MachO::section_64
) :
189 sizeof(MachO::section
)));
191 writeWithPadding(Name
, 16);
193 W
.write
<uint64_t>(VMAddr
); // vmaddr
194 W
.write
<uint64_t>(VMSize
); // vmsize
195 W
.write
<uint64_t>(SectionDataStartOffset
); // file offset
196 W
.write
<uint64_t>(SectionDataSize
); // file size
198 W
.write
<uint32_t>(VMAddr
); // vmaddr
199 W
.write
<uint32_t>(VMSize
); // vmsize
200 W
.write
<uint32_t>(SectionDataStartOffset
); // file offset
201 W
.write
<uint32_t>(SectionDataSize
); // file size
204 W
.write
<uint32_t>(MaxProt
);
206 W
.write
<uint32_t>(InitProt
);
207 W
.write
<uint32_t>(NumSections
);
208 W
.write
<uint32_t>(0); // flags
210 assert(W
.OS
.tell() - Start
== SegmentLoadCommandSize
);
213 void MachObjectWriter::writeSection(const MCAsmLayout
&Layout
,
214 const MCSection
&Sec
, uint64_t VMAddr
,
215 uint64_t FileOffset
, unsigned Flags
,
216 uint64_t RelocationsStart
,
217 unsigned NumRelocations
) {
218 uint64_t SectionSize
= Layout
.getSectionAddressSize(&Sec
);
219 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(Sec
);
221 // The offset is unused for virtual sections.
222 if (Section
.isVirtualSection()) {
223 assert(Layout
.getSectionFileSize(&Sec
) == 0 && "Invalid file size!");
227 // struct section (68 bytes) or
228 // struct section_64 (80 bytes)
230 uint64_t Start
= W
.OS
.tell();
233 writeWithPadding(Section
.getSectionName(), 16);
234 writeWithPadding(Section
.getSegmentName(), 16);
236 W
.write
<uint64_t>(VMAddr
); // address
237 W
.write
<uint64_t>(SectionSize
); // size
239 W
.write
<uint32_t>(VMAddr
); // address
240 W
.write
<uint32_t>(SectionSize
); // size
242 W
.write
<uint32_t>(FileOffset
);
244 assert(isPowerOf2_32(Section
.getAlignment()) && "Invalid alignment!");
245 W
.write
<uint32_t>(Log2_32(Section
.getAlignment()));
246 W
.write
<uint32_t>(NumRelocations
? RelocationsStart
: 0);
247 W
.write
<uint32_t>(NumRelocations
);
248 W
.write
<uint32_t>(Flags
);
249 W
.write
<uint32_t>(IndirectSymBase
.lookup(&Sec
)); // reserved1
250 W
.write
<uint32_t>(Section
.getStubSize()); // reserved2
252 W
.write
<uint32_t>(0); // reserved3
254 assert(W
.OS
.tell() - Start
==
255 (is64Bit() ? sizeof(MachO::section_64
) : sizeof(MachO::section
)));
258 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset
,
260 uint32_t StringTableOffset
,
261 uint32_t StringTableSize
) {
262 // struct symtab_command (24 bytes)
264 uint64_t Start
= W
.OS
.tell();
267 W
.write
<uint32_t>(MachO::LC_SYMTAB
);
268 W
.write
<uint32_t>(sizeof(MachO::symtab_command
));
269 W
.write
<uint32_t>(SymbolOffset
);
270 W
.write
<uint32_t>(NumSymbols
);
271 W
.write
<uint32_t>(StringTableOffset
);
272 W
.write
<uint32_t>(StringTableSize
);
274 assert(W
.OS
.tell() - Start
== sizeof(MachO::symtab_command
));
277 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol
,
278 uint32_t NumLocalSymbols
,
279 uint32_t FirstExternalSymbol
,
280 uint32_t NumExternalSymbols
,
281 uint32_t FirstUndefinedSymbol
,
282 uint32_t NumUndefinedSymbols
,
283 uint32_t IndirectSymbolOffset
,
284 uint32_t NumIndirectSymbols
) {
285 // struct dysymtab_command (80 bytes)
287 uint64_t Start
= W
.OS
.tell();
290 W
.write
<uint32_t>(MachO::LC_DYSYMTAB
);
291 W
.write
<uint32_t>(sizeof(MachO::dysymtab_command
));
292 W
.write
<uint32_t>(FirstLocalSymbol
);
293 W
.write
<uint32_t>(NumLocalSymbols
);
294 W
.write
<uint32_t>(FirstExternalSymbol
);
295 W
.write
<uint32_t>(NumExternalSymbols
);
296 W
.write
<uint32_t>(FirstUndefinedSymbol
);
297 W
.write
<uint32_t>(NumUndefinedSymbols
);
298 W
.write
<uint32_t>(0); // tocoff
299 W
.write
<uint32_t>(0); // ntoc
300 W
.write
<uint32_t>(0); // modtaboff
301 W
.write
<uint32_t>(0); // nmodtab
302 W
.write
<uint32_t>(0); // extrefsymoff
303 W
.write
<uint32_t>(0); // nextrefsyms
304 W
.write
<uint32_t>(IndirectSymbolOffset
);
305 W
.write
<uint32_t>(NumIndirectSymbols
);
306 W
.write
<uint32_t>(0); // extreloff
307 W
.write
<uint32_t>(0); // nextrel
308 W
.write
<uint32_t>(0); // locreloff
309 W
.write
<uint32_t>(0); // nlocrel
311 assert(W
.OS
.tell() - Start
== sizeof(MachO::dysymtab_command
));
314 MachObjectWriter::MachSymbolData
*
315 MachObjectWriter::findSymbolData(const MCSymbol
&Sym
) {
316 for (auto *SymbolData
:
317 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
318 for (MachSymbolData
&Entry
: *SymbolData
)
319 if (Entry
.Symbol
== &Sym
)
325 const MCSymbol
&MachObjectWriter::findAliasedSymbol(const MCSymbol
&Sym
) const {
326 const MCSymbol
*S
= &Sym
;
327 while (S
->isVariable()) {
328 const MCExpr
*Value
= S
->getVariableValue();
329 const auto *Ref
= dyn_cast
<MCSymbolRefExpr
>(Value
);
332 S
= &Ref
->getSymbol();
337 void MachObjectWriter::writeNlist(MachSymbolData
&MSD
,
338 const MCAsmLayout
&Layout
) {
339 const MCSymbol
*Symbol
= MSD
.Symbol
;
340 const MCSymbol
&Data
= *Symbol
;
341 const MCSymbol
*AliasedSymbol
= &findAliasedSymbol(*Symbol
);
342 uint8_t SectionIndex
= MSD
.SectionIndex
;
344 uint64_t Address
= 0;
345 bool IsAlias
= Symbol
!= AliasedSymbol
;
347 const MCSymbol
&OrigSymbol
= *Symbol
;
348 MachSymbolData
*AliaseeInfo
;
350 AliaseeInfo
= findSymbolData(*AliasedSymbol
);
352 SectionIndex
= AliaseeInfo
->SectionIndex
;
353 Symbol
= AliasedSymbol
;
354 // FIXME: Should this update Data as well?
357 // Set the N_TYPE bits. See <mach-o/nlist.h>.
359 // FIXME: Are the prebound or indirect fields possible here?
360 if (IsAlias
&& Symbol
->isUndefined())
361 Type
= MachO::N_INDR
;
362 else if (Symbol
->isUndefined())
363 Type
= MachO::N_UNDF
;
364 else if (Symbol
->isAbsolute())
367 Type
= MachO::N_SECT
;
369 // FIXME: Set STAB bits.
371 if (Data
.isPrivateExtern())
372 Type
|= MachO::N_PEXT
;
375 if (Data
.isExternal() || (!IsAlias
&& Symbol
->isUndefined()))
376 Type
|= MachO::N_EXT
;
378 // Compute the symbol address.
379 if (IsAlias
&& Symbol
->isUndefined())
380 Address
= AliaseeInfo
->StringIndex
;
381 else if (Symbol
->isDefined())
382 Address
= getSymbolAddress(OrigSymbol
, Layout
);
383 else if (Symbol
->isCommon()) {
384 // Common symbols are encoded with the size in the address
385 // field, and their alignment in the flags.
386 Address
= Symbol
->getCommonSize();
389 // struct nlist (12 bytes)
391 W
.write
<uint32_t>(MSD
.StringIndex
);
393 W
.OS
<< char(SectionIndex
);
395 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
397 bool EncodeAsAltEntry
=
398 IsAlias
&& cast
<MCSymbolMachO
>(OrigSymbol
).isAltEntry();
399 W
.write
<uint16_t>(cast
<MCSymbolMachO
>(Symbol
)->getEncodedFlags(EncodeAsAltEntry
));
401 W
.write
<uint64_t>(Address
);
403 W
.write
<uint32_t>(Address
);
406 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type
,
409 uint64_t Start
= W
.OS
.tell();
412 W
.write
<uint32_t>(Type
);
413 W
.write
<uint32_t>(sizeof(MachO::linkedit_data_command
));
414 W
.write
<uint32_t>(DataOffset
);
415 W
.write
<uint32_t>(DataSize
);
417 assert(W
.OS
.tell() - Start
== sizeof(MachO::linkedit_data_command
));
420 static unsigned ComputeLinkerOptionsLoadCommandSize(
421 const std::vector
<std::string
> &Options
, bool is64Bit
)
423 unsigned Size
= sizeof(MachO::linker_option_command
);
424 for (const std::string
&Option
: Options
)
425 Size
+= Option
.size() + 1;
426 return alignTo(Size
, is64Bit
? 8 : 4);
429 void MachObjectWriter::writeLinkerOptionsLoadCommand(
430 const std::vector
<std::string
> &Options
)
432 unsigned Size
= ComputeLinkerOptionsLoadCommandSize(Options
, is64Bit());
433 uint64_t Start
= W
.OS
.tell();
436 W
.write
<uint32_t>(MachO::LC_LINKER_OPTION
);
437 W
.write
<uint32_t>(Size
);
438 W
.write
<uint32_t>(Options
.size());
439 uint64_t BytesWritten
= sizeof(MachO::linker_option_command
);
440 for (const std::string
&Option
: Options
) {
441 // Write each string, including the null byte.
442 W
.OS
<< Option
<< '\0';
443 BytesWritten
+= Option
.size() + 1;
446 // Pad to a multiple of the pointer size.
447 W
.OS
.write_zeros(OffsetToAlignment(BytesWritten
, is64Bit() ? 8 : 4));
449 assert(W
.OS
.tell() - Start
== Size
);
452 static bool isFixupTargetValid(const MCValue
&Target
) {
453 // Target is (LHS - RHS + cst).
454 // We don't support the form where LHS is null: -RHS + cst
455 if (!Target
.getSymA() && Target
.getSymB())
460 void MachObjectWriter::recordRelocation(MCAssembler
&Asm
,
461 const MCAsmLayout
&Layout
,
462 const MCFragment
*Fragment
,
463 const MCFixup
&Fixup
, MCValue Target
,
464 uint64_t &FixedValue
) {
465 if (!isFixupTargetValid(Target
)) {
466 Asm
.getContext().reportError(Fixup
.getLoc(),
467 "unsupported relocation expression");
471 TargetObjectWriter
->recordRelocation(this, Asm
, Layout
, Fragment
, Fixup
,
475 void MachObjectWriter::bindIndirectSymbols(MCAssembler
&Asm
) {
476 // This is the point where 'as' creates actual symbols for indirect symbols
477 // (in the following two passes). It would be easier for us to do this sooner
478 // when we see the attribute, but that makes getting the order in the symbol
479 // table much more complicated than it is worth.
481 // FIXME: Revisit this when the dust settles.
483 // Report errors for use of .indirect_symbol not in a symbol pointer section
485 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
486 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
) {
487 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
489 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
490 Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
491 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
&&
492 Section
.getType() != MachO::S_SYMBOL_STUBS
) {
493 MCSymbol
&Symbol
= *it
->Symbol
;
494 report_fatal_error("indirect symbol '" + Symbol
.getName() +
495 "' not in a symbol pointer or stub section");
499 // Bind non-lazy symbol pointers first.
500 unsigned IndirectIndex
= 0;
501 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
502 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
, ++IndirectIndex
) {
503 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
505 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
506 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
)
509 // Initialize the section indirect symbol base, if necessary.
510 IndirectSymBase
.insert(std::make_pair(it
->Section
, IndirectIndex
));
512 Asm
.registerSymbol(*it
->Symbol
);
515 // Then lazy symbol pointers and symbol stubs.
517 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
518 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
, ++IndirectIndex
) {
519 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
521 if (Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
522 Section
.getType() != MachO::S_SYMBOL_STUBS
)
525 // Initialize the section indirect symbol base, if necessary.
526 IndirectSymBase
.insert(std::make_pair(it
->Section
, IndirectIndex
));
528 // Set the symbol type to undefined lazy, but only on construction.
530 // FIXME: Do not hardcode.
532 Asm
.registerSymbol(*it
->Symbol
, &Created
);
534 cast
<MCSymbolMachO
>(it
->Symbol
)->setReferenceTypeUndefinedLazy(true);
538 /// computeSymbolTable - Compute the symbol table data
539 void MachObjectWriter::computeSymbolTable(
540 MCAssembler
&Asm
, std::vector
<MachSymbolData
> &LocalSymbolData
,
541 std::vector
<MachSymbolData
> &ExternalSymbolData
,
542 std::vector
<MachSymbolData
> &UndefinedSymbolData
) {
543 // Build section lookup table.
544 DenseMap
<const MCSection
*, uint8_t> SectionIndexMap
;
546 for (MCAssembler::iterator it
= Asm
.begin(),
547 ie
= Asm
.end(); it
!= ie
; ++it
, ++Index
)
548 SectionIndexMap
[&*it
] = Index
;
549 assert(Index
<= 256 && "Too many sections!");
551 // Build the string table.
552 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
553 if (!Asm
.isSymbolLinkerVisible(Symbol
))
556 StringTable
.add(Symbol
.getName());
558 StringTable
.finalize();
560 // Build the symbol arrays but only for non-local symbols.
562 // The particular order that we collect and then sort the symbols is chosen to
563 // match 'as'. Even though it doesn't matter for correctness, this is
564 // important for letting us diff .o files.
565 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
566 // Ignore non-linker visible symbols.
567 if (!Asm
.isSymbolLinkerVisible(Symbol
))
570 if (!Symbol
.isExternal() && !Symbol
.isUndefined())
574 MSD
.Symbol
= &Symbol
;
575 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
577 if (Symbol
.isUndefined()) {
578 MSD
.SectionIndex
= 0;
579 UndefinedSymbolData
.push_back(MSD
);
580 } else if (Symbol
.isAbsolute()) {
581 MSD
.SectionIndex
= 0;
582 ExternalSymbolData
.push_back(MSD
);
584 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
585 assert(MSD
.SectionIndex
&& "Invalid section index!");
586 ExternalSymbolData
.push_back(MSD
);
590 // Now add the data for local symbols.
591 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
592 // Ignore non-linker visible symbols.
593 if (!Asm
.isSymbolLinkerVisible(Symbol
))
596 if (Symbol
.isExternal() || Symbol
.isUndefined())
600 MSD
.Symbol
= &Symbol
;
601 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
603 if (Symbol
.isAbsolute()) {
604 MSD
.SectionIndex
= 0;
605 LocalSymbolData
.push_back(MSD
);
607 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
608 assert(MSD
.SectionIndex
&& "Invalid section index!");
609 LocalSymbolData
.push_back(MSD
);
613 // External and undefined symbols are required to be in lexicographic order.
614 llvm::sort(ExternalSymbolData
);
615 llvm::sort(UndefinedSymbolData
);
617 // Set the symbol indices.
619 for (auto *SymbolData
:
620 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
621 for (MachSymbolData
&Entry
: *SymbolData
)
622 Entry
.Symbol
->setIndex(Index
++);
624 for (const MCSection
&Section
: Asm
) {
625 for (RelAndSymbol
&Rel
: Relocations
[&Section
]) {
629 // Set the Index and the IsExtern bit.
630 unsigned Index
= Rel
.Sym
->getIndex();
631 assert(isInt
<24>(Index
));
632 if (W
.Endian
== support::little
)
633 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& (~0U << 24)) | Index
| (1 << 27);
635 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& 0xff) | Index
<< 8 | (1 << 4);
640 void MachObjectWriter::computeSectionAddresses(const MCAssembler
&Asm
,
641 const MCAsmLayout
&Layout
) {
642 uint64_t StartAddress
= 0;
643 for (const MCSection
*Sec
: Layout
.getSectionOrder()) {
644 StartAddress
= alignTo(StartAddress
, Sec
->getAlignment());
645 SectionAddress
[Sec
] = StartAddress
;
646 StartAddress
+= Layout
.getSectionAddressSize(Sec
);
648 // Explicitly pad the section to match the alignment requirements of the
649 // following one. This is for 'gas' compatibility, it shouldn't
650 /// strictly be necessary.
651 StartAddress
+= getPaddingSize(Sec
, Layout
);
655 void MachObjectWriter::executePostLayoutBinding(MCAssembler
&Asm
,
656 const MCAsmLayout
&Layout
) {
657 computeSectionAddresses(Asm
, Layout
);
659 // Create symbol data for any indirect symbols.
660 bindIndirectSymbols(Asm
);
663 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
664 const MCAssembler
&Asm
, const MCSymbol
&A
, const MCSymbol
&B
,
666 // FIXME: We don't handle things like
669 if (A
.isVariable() || B
.isVariable())
671 return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm
, A
, B
,
675 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
676 const MCAssembler
&Asm
, const MCSymbol
&SymA
, const MCFragment
&FB
,
677 bool InSet
, bool IsPCRel
) const {
681 // The effective address is
682 // addr(atom(A)) + offset(A)
683 // - addr(atom(B)) - offset(B)
684 // and the offsets are not relocatable, so the fixup is fully resolved when
685 // addr(atom(A)) - addr(atom(B)) == 0.
686 const MCSymbol
&SA
= findAliasedSymbol(SymA
);
687 const MCSection
&SecA
= SA
.getSection();
688 const MCSection
&SecB
= *FB
.getParent();
691 // The simple (Darwin, except on x86_64) way of dealing with this was to
692 // assume that any reference to a temporary symbol *must* be a temporary
693 // symbol in the same atom, unless the sections differ. Therefore, any PCrel
694 // relocation to a temporary symbol (in the same section) is fully
695 // resolved. This also works in conjunction with absolutized .set, which
696 // requires the compiler to use .set to absolutize the differences between
697 // symbols which the compiler knows to be assembly time constants, so we
698 // don't need to worry about considering symbol differences fully resolved.
700 // If the file isn't using sub-sections-via-symbols, we can make the
701 // same assumptions about any symbol that we normally make about
704 bool hasReliableSymbolDifference
= isX86_64();
705 if (!hasReliableSymbolDifference
) {
706 if (!SA
.isInSection() || &SecA
!= &SecB
||
707 (!SA
.isTemporary() && FB
.getAtom() != SA
.getFragment()->getAtom() &&
708 Asm
.getSubsectionsViaSymbols()))
712 // For Darwin x86_64, there is one special case when the reference IsPCRel.
713 // If the fragment with the reference does not have a base symbol but meets
714 // the simple way of dealing with this, in that it is a temporary symbol in
715 // the same atom then it is assumed to be fully resolved. This is needed so
716 // a relocation entry is not created and so the static linker does not
717 // mess up the reference later.
718 else if(!FB
.getAtom() &&
719 SA
.isTemporary() && SA
.isInSection() && &SecA
== &SecB
){
724 // If they are not in the same section, we can't compute the diff.
728 const MCFragment
*FA
= SA
.getFragment();
730 // Bail if the symbol has no fragment.
734 // If the atoms are the same, they are guaranteed to have the same address.
735 if (FA
->getAtom() == FB
.getAtom())
738 // Otherwise, we can't prove this is fully resolved.
742 static MachO::LoadCommandType
getLCFromMCVM(MCVersionMinType Type
) {
744 case MCVM_OSXVersionMin
: return MachO::LC_VERSION_MIN_MACOSX
;
745 case MCVM_IOSVersionMin
: return MachO::LC_VERSION_MIN_IPHONEOS
;
746 case MCVM_TvOSVersionMin
: return MachO::LC_VERSION_MIN_TVOS
;
747 case MCVM_WatchOSVersionMin
: return MachO::LC_VERSION_MIN_WATCHOS
;
749 llvm_unreachable("Invalid mc version min type");
752 uint64_t MachObjectWriter::writeObject(MCAssembler
&Asm
,
753 const MCAsmLayout
&Layout
) {
754 uint64_t StartOffset
= W
.OS
.tell();
756 // Compute symbol table information and bind symbol indices.
757 computeSymbolTable(Asm
, LocalSymbolData
, ExternalSymbolData
,
758 UndefinedSymbolData
);
760 unsigned NumSections
= Asm
.size();
761 const MCAssembler::VersionInfoType
&VersionInfo
=
762 Layout
.getAssembler().getVersionInfo();
764 // The section data starts after the header, the segment load command (and
765 // section headers) and the symbol table.
766 unsigned NumLoadCommands
= 1;
767 uint64_t LoadCommandsSize
= is64Bit() ?
768 sizeof(MachO::segment_command_64
) + NumSections
* sizeof(MachO::section_64
):
769 sizeof(MachO::segment_command
) + NumSections
* sizeof(MachO::section
);
771 // Add the deployment target version info load command size, if used.
772 if (VersionInfo
.Major
!= 0) {
774 if (VersionInfo
.EmitBuildVersion
)
775 LoadCommandsSize
+= sizeof(MachO::build_version_command
);
777 LoadCommandsSize
+= sizeof(MachO::version_min_command
);
780 // Add the data-in-code load command size, if used.
781 unsigned NumDataRegions
= Asm
.getDataRegions().size();
782 if (NumDataRegions
) {
784 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
787 // Add the loh load command size, if used.
788 uint64_t LOHRawSize
= Asm
.getLOHContainer().getEmitSize(*this, Layout
);
789 uint64_t LOHSize
= alignTo(LOHRawSize
, is64Bit() ? 8 : 4);
792 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
795 // Add the symbol table load command sizes, if used.
796 unsigned NumSymbols
= LocalSymbolData
.size() + ExternalSymbolData
.size() +
797 UndefinedSymbolData
.size();
799 NumLoadCommands
+= 2;
800 LoadCommandsSize
+= (sizeof(MachO::symtab_command
) +
801 sizeof(MachO::dysymtab_command
));
804 // Add the linker option load commands sizes.
805 for (const auto &Option
: Asm
.getLinkerOptions()) {
807 LoadCommandsSize
+= ComputeLinkerOptionsLoadCommandSize(Option
, is64Bit());
810 // Compute the total size of the section data, as well as its file size and vm
812 uint64_t SectionDataStart
= (is64Bit() ? sizeof(MachO::mach_header_64
) :
813 sizeof(MachO::mach_header
)) + LoadCommandsSize
;
814 uint64_t SectionDataSize
= 0;
815 uint64_t SectionDataFileSize
= 0;
817 for (const MCSection
&Sec
: Asm
) {
818 uint64_t Address
= getSectionAddress(&Sec
);
819 uint64_t Size
= Layout
.getSectionAddressSize(&Sec
);
820 uint64_t FileSize
= Layout
.getSectionFileSize(&Sec
);
821 FileSize
+= getPaddingSize(&Sec
, Layout
);
823 VMSize
= std::max(VMSize
, Address
+ Size
);
825 if (Sec
.isVirtualSection())
828 SectionDataSize
= std::max(SectionDataSize
, Address
+ Size
);
829 SectionDataFileSize
= std::max(SectionDataFileSize
, Address
+ FileSize
);
832 // The section data is padded to 4 bytes.
834 // FIXME: Is this machine dependent?
835 unsigned SectionDataPadding
= OffsetToAlignment(SectionDataFileSize
, 4);
836 SectionDataFileSize
+= SectionDataPadding
;
838 // Write the prolog, starting with the header and load command...
839 writeHeader(MachO::MH_OBJECT
, NumLoadCommands
, LoadCommandsSize
,
840 Asm
.getSubsectionsViaSymbols());
842 MachO::VM_PROT_READ
| MachO::VM_PROT_WRITE
| MachO::VM_PROT_EXECUTE
;
843 writeSegmentLoadCommand("", NumSections
, 0, VMSize
, SectionDataStart
,
844 SectionDataSize
, Prot
, Prot
);
846 // ... and then the section headers.
847 uint64_t RelocTableEnd
= SectionDataStart
+ SectionDataFileSize
;
848 for (const MCSection
&Section
: Asm
) {
849 const auto &Sec
= cast
<MCSectionMachO
>(Section
);
850 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
851 unsigned NumRelocs
= Relocs
.size();
852 uint64_t SectionStart
= SectionDataStart
+ getSectionAddress(&Sec
);
853 unsigned Flags
= Sec
.getTypeAndAttributes();
854 if (Sec
.hasInstructions())
855 Flags
|= MachO::S_ATTR_SOME_INSTRUCTIONS
;
856 writeSection(Layout
, Sec
, getSectionAddress(&Sec
), SectionStart
, Flags
,
857 RelocTableEnd
, NumRelocs
);
858 RelocTableEnd
+= NumRelocs
* sizeof(MachO::any_relocation_info
);
861 // Write out the deployment target information, if it's available.
862 if (VersionInfo
.Major
!= 0) {
863 auto EncodeVersion
= [](VersionTuple V
) -> uint32_t {
864 assert(!V
.empty() && "empty version");
865 unsigned Update
= V
.getSubminor() ? *V
.getSubminor() : 0;
866 unsigned Minor
= V
.getMinor() ? *V
.getMinor() : 0;
867 assert(Update
< 256 && "unencodable update target version");
868 assert(Minor
< 256 && "unencodable minor target version");
869 assert(V
.getMajor() < 65536 && "unencodable major target version");
870 return Update
| (Minor
<< 8) | (V
.getMajor() << 16);
872 uint32_t EncodedVersion
= EncodeVersion(
873 VersionTuple(VersionInfo
.Major
, VersionInfo
.Minor
, VersionInfo
.Update
));
874 uint32_t SDKVersion
= !VersionInfo
.SDKVersion
.empty()
875 ? EncodeVersion(VersionInfo
.SDKVersion
)
877 if (VersionInfo
.EmitBuildVersion
) {
878 // FIXME: Currently empty tools. Add clang version in the future.
879 W
.write
<uint32_t>(MachO::LC_BUILD_VERSION
);
880 W
.write
<uint32_t>(sizeof(MachO::build_version_command
));
881 W
.write
<uint32_t>(VersionInfo
.TypeOrPlatform
.Platform
);
882 W
.write
<uint32_t>(EncodedVersion
);
883 W
.write
<uint32_t>(SDKVersion
);
884 W
.write
<uint32_t>(0); // Empty tools list.
886 MachO::LoadCommandType LCType
887 = getLCFromMCVM(VersionInfo
.TypeOrPlatform
.Type
);
888 W
.write
<uint32_t>(LCType
);
889 W
.write
<uint32_t>(sizeof(MachO::version_min_command
));
890 W
.write
<uint32_t>(EncodedVersion
);
891 W
.write
<uint32_t>(SDKVersion
);
895 // Write the data-in-code load command, if used.
896 uint64_t DataInCodeTableEnd
= RelocTableEnd
+ NumDataRegions
* 8;
897 if (NumDataRegions
) {
898 uint64_t DataRegionsOffset
= RelocTableEnd
;
899 uint64_t DataRegionsSize
= NumDataRegions
* 8;
900 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE
, DataRegionsOffset
,
904 // Write the loh load command, if used.
905 uint64_t LOHTableEnd
= DataInCodeTableEnd
+ LOHSize
;
907 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT
,
908 DataInCodeTableEnd
, LOHSize
);
910 // Write the symbol table load command, if used.
912 unsigned FirstLocalSymbol
= 0;
913 unsigned NumLocalSymbols
= LocalSymbolData
.size();
914 unsigned FirstExternalSymbol
= FirstLocalSymbol
+ NumLocalSymbols
;
915 unsigned NumExternalSymbols
= ExternalSymbolData
.size();
916 unsigned FirstUndefinedSymbol
= FirstExternalSymbol
+ NumExternalSymbols
;
917 unsigned NumUndefinedSymbols
= UndefinedSymbolData
.size();
918 unsigned NumIndirectSymbols
= Asm
.indirect_symbol_size();
919 unsigned NumSymTabSymbols
=
920 NumLocalSymbols
+ NumExternalSymbols
+ NumUndefinedSymbols
;
921 uint64_t IndirectSymbolSize
= NumIndirectSymbols
* 4;
922 uint64_t IndirectSymbolOffset
= 0;
924 // If used, the indirect symbols are written after the section data.
925 if (NumIndirectSymbols
)
926 IndirectSymbolOffset
= LOHTableEnd
;
928 // The symbol table is written after the indirect symbol data.
929 uint64_t SymbolTableOffset
= LOHTableEnd
+ IndirectSymbolSize
;
931 // The string table is written after symbol table.
932 uint64_t StringTableOffset
=
933 SymbolTableOffset
+ NumSymTabSymbols
* (is64Bit() ?
934 sizeof(MachO::nlist_64
) :
935 sizeof(MachO::nlist
));
936 writeSymtabLoadCommand(SymbolTableOffset
, NumSymTabSymbols
,
937 StringTableOffset
, StringTable
.getSize());
939 writeDysymtabLoadCommand(FirstLocalSymbol
, NumLocalSymbols
,
940 FirstExternalSymbol
, NumExternalSymbols
,
941 FirstUndefinedSymbol
, NumUndefinedSymbols
,
942 IndirectSymbolOffset
, NumIndirectSymbols
);
945 // Write the linker options load commands.
946 for (const auto &Option
: Asm
.getLinkerOptions())
947 writeLinkerOptionsLoadCommand(Option
);
949 // Write the actual section data.
950 for (const MCSection
&Sec
: Asm
) {
951 Asm
.writeSectionData(W
.OS
, &Sec
, Layout
);
953 uint64_t Pad
= getPaddingSize(&Sec
, Layout
);
954 W
.OS
.write_zeros(Pad
);
957 // Write the extra padding.
958 W
.OS
.write_zeros(SectionDataPadding
);
960 // Write the relocation entries.
961 for (const MCSection
&Sec
: Asm
) {
962 // Write the section relocation entries, in reverse order to match 'as'
963 // (approximately, the exact algorithm is more complicated than this).
964 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
965 for (const RelAndSymbol
&Rel
: make_range(Relocs
.rbegin(), Relocs
.rend())) {
966 W
.write
<uint32_t>(Rel
.MRE
.r_word0
);
967 W
.write
<uint32_t>(Rel
.MRE
.r_word1
);
971 // Write out the data-in-code region payload, if there is one.
972 for (MCAssembler::const_data_region_iterator
973 it
= Asm
.data_region_begin(), ie
= Asm
.data_region_end();
975 const DataRegionData
*Data
= &(*it
);
976 uint64_t Start
= getSymbolAddress(*Data
->Start
, Layout
);
979 End
= getSymbolAddress(*Data
->End
, Layout
);
981 report_fatal_error("Data region not terminated");
983 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data
->Kind
984 << " start: " << Start
<< "(" << Data
->Start
->getName()
986 << " end: " << End
<< "(" << Data
->End
->getName() << ")"
987 << " size: " << End
- Start
<< "\n");
988 W
.write
<uint32_t>(Start
);
989 W
.write
<uint16_t>(End
- Start
);
990 W
.write
<uint16_t>(Data
->Kind
);
993 // Write out the loh commands, if there is one.
996 unsigned Start
= W
.OS
.tell();
998 Asm
.getLOHContainer().emit(*this, Layout
);
999 // Pad to a multiple of the pointer size.
1000 W
.OS
.write_zeros(OffsetToAlignment(LOHRawSize
, is64Bit() ? 8 : 4));
1001 assert(W
.OS
.tell() - Start
== LOHSize
);
1004 // Write the symbol table data, if used.
1006 // Write the indirect symbol entries.
1007 for (MCAssembler::const_indirect_symbol_iterator
1008 it
= Asm
.indirect_symbol_begin(),
1009 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
) {
1010 // Indirect symbols in the non-lazy symbol pointer section have some
1011 // special handling.
1012 const MCSectionMachO
&Section
=
1013 static_cast<const MCSectionMachO
&>(*it
->Section
);
1014 if (Section
.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS
) {
1015 // If this symbol is defined and internal, mark it as such.
1016 if (it
->Symbol
->isDefined() && !it
->Symbol
->isExternal()) {
1017 uint32_t Flags
= MachO::INDIRECT_SYMBOL_LOCAL
;
1018 if (it
->Symbol
->isAbsolute())
1019 Flags
|= MachO::INDIRECT_SYMBOL_ABS
;
1020 W
.write
<uint32_t>(Flags
);
1025 W
.write
<uint32_t>(it
->Symbol
->getIndex());
1028 // FIXME: Check that offsets match computed ones.
1030 // Write the symbol table entries.
1031 for (auto *SymbolData
:
1032 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
1033 for (MachSymbolData
&Entry
: *SymbolData
)
1034 writeNlist(Entry
, Layout
);
1036 // Write the string table.
1037 StringTable
.write(W
.OS
);
1040 return W
.OS
.tell() - StartOffset
;
1043 std::unique_ptr
<MCObjectWriter
>
1044 llvm::createMachObjectWriter(std::unique_ptr
<MCMachObjectTargetWriter
> MOTW
,
1045 raw_pwrite_stream
&OS
, bool IsLittleEndian
) {
1046 return llvm::make_unique
<MachObjectWriter
>(std::move(MOTW
), OS
,