1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectWriter.h"
23 #include "llvm/MC/MCSection.h"
24 #include "llvm/MC/MCSectionMachO.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCSymbolMachO.h"
27 #include "llvm/MC/MCValue.h"
28 #include "llvm/Support/Alignment.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Support/raw_ostream.h"
43 #define DEBUG_TYPE "mc"
45 void MachObjectWriter::reset() {
47 IndirectSymBase
.clear();
49 LocalSymbolData
.clear();
50 ExternalSymbolData
.clear();
51 UndefinedSymbolData
.clear();
52 MCObjectWriter::reset();
55 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol
&S
) {
56 // Undefined symbols are always extern.
60 // References to weak definitions require external relocation entries; the
61 // definition may not always be the one in the same object file.
62 if (cast
<MCSymbolMachO
>(S
).isWeakDefinition())
65 // Otherwise, we can use an internal relocation.
69 bool MachObjectWriter::
70 MachSymbolData::operator<(const MachSymbolData
&RHS
) const {
71 return Symbol
->getName() < RHS
.Symbol
->getName();
74 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler
&Asm
, unsigned Kind
) {
75 const MCFixupKindInfo
&FKI
= Asm
.getBackend().getFixupKindInfo(
78 return FKI
.Flags
& MCFixupKindInfo::FKF_IsPCRel
;
81 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment
*Fragment
,
82 const MCAsmLayout
&Layout
) const {
83 return getSectionAddress(Fragment
->getParent()) +
84 Layout
.getFragmentOffset(Fragment
);
87 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol
&S
,
88 const MCAsmLayout
&Layout
) const {
89 // If this is a variable, then recursively evaluate now.
91 if (const MCConstantExpr
*C
=
92 dyn_cast
<const MCConstantExpr
>(S
.getVariableValue()))
96 if (!S
.getVariableValue()->evaluateAsRelocatable(Target
, &Layout
, nullptr))
97 report_fatal_error("unable to evaluate offset for variable '" +
100 // Verify that any used symbols are defined.
101 if (Target
.getSymA() && Target
.getSymA()->getSymbol().isUndefined())
102 report_fatal_error("unable to evaluate offset to undefined symbol '" +
103 Target
.getSymA()->getSymbol().getName() + "'");
104 if (Target
.getSymB() && Target
.getSymB()->getSymbol().isUndefined())
105 report_fatal_error("unable to evaluate offset to undefined symbol '" +
106 Target
.getSymB()->getSymbol().getName() + "'");
108 uint64_t Address
= Target
.getConstant();
109 if (Target
.getSymA())
110 Address
+= getSymbolAddress(Target
.getSymA()->getSymbol(), Layout
);
111 if (Target
.getSymB())
112 Address
+= getSymbolAddress(Target
.getSymB()->getSymbol(), Layout
);
116 return getSectionAddress(S
.getFragment()->getParent()) +
117 Layout
.getSymbolOffset(S
);
120 uint64_t MachObjectWriter::getPaddingSize(const MCSection
*Sec
,
121 const MCAsmLayout
&Layout
) const {
122 uint64_t EndAddr
= getSectionAddress(Sec
) + Layout
.getSectionAddressSize(Sec
);
123 unsigned Next
= Sec
->getLayoutOrder() + 1;
124 if (Next
>= Layout
.getSectionOrder().size())
127 const MCSection
&NextSec
= *Layout
.getSectionOrder()[Next
];
128 if (NextSec
.isVirtualSection())
130 return offsetToAlignment(EndAddr
, Align(NextSec
.getAlignment()));
133 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type
,
134 unsigned NumLoadCommands
,
135 unsigned LoadCommandsSize
,
136 bool SubsectionsViaSymbols
) {
139 if (SubsectionsViaSymbols
)
140 Flags
|= MachO::MH_SUBSECTIONS_VIA_SYMBOLS
;
142 // struct mach_header (28 bytes) or
143 // struct mach_header_64 (32 bytes)
145 uint64_t Start
= W
.OS
.tell();
148 W
.write
<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64
: MachO::MH_MAGIC
);
150 W
.write
<uint32_t>(TargetObjectWriter
->getCPUType());
151 W
.write
<uint32_t>(TargetObjectWriter
->getCPUSubtype());
153 W
.write
<uint32_t>(Type
);
154 W
.write
<uint32_t>(NumLoadCommands
);
155 W
.write
<uint32_t>(LoadCommandsSize
);
156 W
.write
<uint32_t>(Flags
);
158 W
.write
<uint32_t>(0); // reserved
160 assert(W
.OS
.tell() - Start
== (is64Bit() ? sizeof(MachO::mach_header_64
)
161 : sizeof(MachO::mach_header
)));
164 void MachObjectWriter::writeWithPadding(StringRef Str
, uint64_t Size
) {
165 assert(Size
>= Str
.size());
167 W
.OS
.write_zeros(Size
- Str
.size());
170 /// writeSegmentLoadCommand - Write a segment load command.
172 /// \param NumSections The number of sections in this segment.
173 /// \param SectionDataSize The total size of the sections.
174 void MachObjectWriter::writeSegmentLoadCommand(
175 StringRef Name
, unsigned NumSections
, uint64_t VMAddr
, uint64_t VMSize
,
176 uint64_t SectionDataStartOffset
, uint64_t SectionDataSize
, uint32_t MaxProt
,
178 // struct segment_command (56 bytes) or
179 // struct segment_command_64 (72 bytes)
181 uint64_t Start
= W
.OS
.tell();
184 unsigned SegmentLoadCommandSize
=
185 is64Bit() ? sizeof(MachO::segment_command_64
):
186 sizeof(MachO::segment_command
);
187 W
.write
<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64
: MachO::LC_SEGMENT
);
188 W
.write
<uint32_t>(SegmentLoadCommandSize
+
189 NumSections
* (is64Bit() ? sizeof(MachO::section_64
) :
190 sizeof(MachO::section
)));
192 writeWithPadding(Name
, 16);
194 W
.write
<uint64_t>(VMAddr
); // vmaddr
195 W
.write
<uint64_t>(VMSize
); // vmsize
196 W
.write
<uint64_t>(SectionDataStartOffset
); // file offset
197 W
.write
<uint64_t>(SectionDataSize
); // file size
199 W
.write
<uint32_t>(VMAddr
); // vmaddr
200 W
.write
<uint32_t>(VMSize
); // vmsize
201 W
.write
<uint32_t>(SectionDataStartOffset
); // file offset
202 W
.write
<uint32_t>(SectionDataSize
); // file size
205 W
.write
<uint32_t>(MaxProt
);
207 W
.write
<uint32_t>(InitProt
);
208 W
.write
<uint32_t>(NumSections
);
209 W
.write
<uint32_t>(0); // flags
211 assert(W
.OS
.tell() - Start
== SegmentLoadCommandSize
);
214 void MachObjectWriter::writeSection(const MCAsmLayout
&Layout
,
215 const MCSection
&Sec
, uint64_t VMAddr
,
216 uint64_t FileOffset
, unsigned Flags
,
217 uint64_t RelocationsStart
,
218 unsigned NumRelocations
) {
219 uint64_t SectionSize
= Layout
.getSectionAddressSize(&Sec
);
220 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(Sec
);
222 // The offset is unused for virtual sections.
223 if (Section
.isVirtualSection()) {
224 assert(Layout
.getSectionFileSize(&Sec
) == 0 && "Invalid file size!");
228 // struct section (68 bytes) or
229 // struct section_64 (80 bytes)
231 uint64_t Start
= W
.OS
.tell();
234 writeWithPadding(Section
.getName(), 16);
235 writeWithPadding(Section
.getSegmentName(), 16);
237 W
.write
<uint64_t>(VMAddr
); // address
238 W
.write
<uint64_t>(SectionSize
); // size
240 W
.write
<uint32_t>(VMAddr
); // address
241 W
.write
<uint32_t>(SectionSize
); // size
243 W
.write
<uint32_t>(FileOffset
);
245 assert(isPowerOf2_32(Section
.getAlignment()) && "Invalid alignment!");
246 W
.write
<uint32_t>(Log2_32(Section
.getAlignment()));
247 W
.write
<uint32_t>(NumRelocations
? RelocationsStart
: 0);
248 W
.write
<uint32_t>(NumRelocations
);
249 W
.write
<uint32_t>(Flags
);
250 W
.write
<uint32_t>(IndirectSymBase
.lookup(&Sec
)); // reserved1
251 W
.write
<uint32_t>(Section
.getStubSize()); // reserved2
253 W
.write
<uint32_t>(0); // reserved3
255 assert(W
.OS
.tell() - Start
==
256 (is64Bit() ? sizeof(MachO::section_64
) : sizeof(MachO::section
)));
259 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset
,
261 uint32_t StringTableOffset
,
262 uint32_t StringTableSize
) {
263 // struct symtab_command (24 bytes)
265 uint64_t Start
= W
.OS
.tell();
268 W
.write
<uint32_t>(MachO::LC_SYMTAB
);
269 W
.write
<uint32_t>(sizeof(MachO::symtab_command
));
270 W
.write
<uint32_t>(SymbolOffset
);
271 W
.write
<uint32_t>(NumSymbols
);
272 W
.write
<uint32_t>(StringTableOffset
);
273 W
.write
<uint32_t>(StringTableSize
);
275 assert(W
.OS
.tell() - Start
== sizeof(MachO::symtab_command
));
278 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol
,
279 uint32_t NumLocalSymbols
,
280 uint32_t FirstExternalSymbol
,
281 uint32_t NumExternalSymbols
,
282 uint32_t FirstUndefinedSymbol
,
283 uint32_t NumUndefinedSymbols
,
284 uint32_t IndirectSymbolOffset
,
285 uint32_t NumIndirectSymbols
) {
286 // struct dysymtab_command (80 bytes)
288 uint64_t Start
= W
.OS
.tell();
291 W
.write
<uint32_t>(MachO::LC_DYSYMTAB
);
292 W
.write
<uint32_t>(sizeof(MachO::dysymtab_command
));
293 W
.write
<uint32_t>(FirstLocalSymbol
);
294 W
.write
<uint32_t>(NumLocalSymbols
);
295 W
.write
<uint32_t>(FirstExternalSymbol
);
296 W
.write
<uint32_t>(NumExternalSymbols
);
297 W
.write
<uint32_t>(FirstUndefinedSymbol
);
298 W
.write
<uint32_t>(NumUndefinedSymbols
);
299 W
.write
<uint32_t>(0); // tocoff
300 W
.write
<uint32_t>(0); // ntoc
301 W
.write
<uint32_t>(0); // modtaboff
302 W
.write
<uint32_t>(0); // nmodtab
303 W
.write
<uint32_t>(0); // extrefsymoff
304 W
.write
<uint32_t>(0); // nextrefsyms
305 W
.write
<uint32_t>(IndirectSymbolOffset
);
306 W
.write
<uint32_t>(NumIndirectSymbols
);
307 W
.write
<uint32_t>(0); // extreloff
308 W
.write
<uint32_t>(0); // nextrel
309 W
.write
<uint32_t>(0); // locreloff
310 W
.write
<uint32_t>(0); // nlocrel
312 assert(W
.OS
.tell() - Start
== sizeof(MachO::dysymtab_command
));
315 MachObjectWriter::MachSymbolData
*
316 MachObjectWriter::findSymbolData(const MCSymbol
&Sym
) {
317 for (auto *SymbolData
:
318 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
319 for (MachSymbolData
&Entry
: *SymbolData
)
320 if (Entry
.Symbol
== &Sym
)
326 const MCSymbol
&MachObjectWriter::findAliasedSymbol(const MCSymbol
&Sym
) const {
327 const MCSymbol
*S
= &Sym
;
328 while (S
->isVariable()) {
329 const MCExpr
*Value
= S
->getVariableValue();
330 const auto *Ref
= dyn_cast
<MCSymbolRefExpr
>(Value
);
333 S
= &Ref
->getSymbol();
338 void MachObjectWriter::writeNlist(MachSymbolData
&MSD
,
339 const MCAsmLayout
&Layout
) {
340 const MCSymbol
*Symbol
= MSD
.Symbol
;
341 const MCSymbol
&Data
= *Symbol
;
342 const MCSymbol
*AliasedSymbol
= &findAliasedSymbol(*Symbol
);
343 uint8_t SectionIndex
= MSD
.SectionIndex
;
345 uint64_t Address
= 0;
346 bool IsAlias
= Symbol
!= AliasedSymbol
;
348 const MCSymbol
&OrigSymbol
= *Symbol
;
349 MachSymbolData
*AliaseeInfo
;
351 AliaseeInfo
= findSymbolData(*AliasedSymbol
);
353 SectionIndex
= AliaseeInfo
->SectionIndex
;
354 Symbol
= AliasedSymbol
;
355 // FIXME: Should this update Data as well?
358 // Set the N_TYPE bits. See <mach-o/nlist.h>.
360 // FIXME: Are the prebound or indirect fields possible here?
361 if (IsAlias
&& Symbol
->isUndefined())
362 Type
= MachO::N_INDR
;
363 else if (Symbol
->isUndefined())
364 Type
= MachO::N_UNDF
;
365 else if (Symbol
->isAbsolute())
368 Type
= MachO::N_SECT
;
370 // FIXME: Set STAB bits.
372 if (Data
.isPrivateExtern())
373 Type
|= MachO::N_PEXT
;
376 if (Data
.isExternal() || (!IsAlias
&& Symbol
->isUndefined()))
377 Type
|= MachO::N_EXT
;
379 // Compute the symbol address.
380 if (IsAlias
&& Symbol
->isUndefined())
381 Address
= AliaseeInfo
->StringIndex
;
382 else if (Symbol
->isDefined())
383 Address
= getSymbolAddress(OrigSymbol
, Layout
);
384 else if (Symbol
->isCommon()) {
385 // Common symbols are encoded with the size in the address
386 // field, and their alignment in the flags.
387 Address
= Symbol
->getCommonSize();
390 // struct nlist (12 bytes)
392 W
.write
<uint32_t>(MSD
.StringIndex
);
394 W
.OS
<< char(SectionIndex
);
396 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
398 bool EncodeAsAltEntry
=
399 IsAlias
&& cast
<MCSymbolMachO
>(OrigSymbol
).isAltEntry();
400 W
.write
<uint16_t>(cast
<MCSymbolMachO
>(Symbol
)->getEncodedFlags(EncodeAsAltEntry
));
402 W
.write
<uint64_t>(Address
);
404 W
.write
<uint32_t>(Address
);
407 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type
,
410 uint64_t Start
= W
.OS
.tell();
413 W
.write
<uint32_t>(Type
);
414 W
.write
<uint32_t>(sizeof(MachO::linkedit_data_command
));
415 W
.write
<uint32_t>(DataOffset
);
416 W
.write
<uint32_t>(DataSize
);
418 assert(W
.OS
.tell() - Start
== sizeof(MachO::linkedit_data_command
));
421 static unsigned ComputeLinkerOptionsLoadCommandSize(
422 const std::vector
<std::string
> &Options
, bool is64Bit
)
424 unsigned Size
= sizeof(MachO::linker_option_command
);
425 for (const std::string
&Option
: Options
)
426 Size
+= Option
.size() + 1;
427 return alignTo(Size
, is64Bit
? 8 : 4);
430 void MachObjectWriter::writeLinkerOptionsLoadCommand(
431 const std::vector
<std::string
> &Options
)
433 unsigned Size
= ComputeLinkerOptionsLoadCommandSize(Options
, is64Bit());
434 uint64_t Start
= W
.OS
.tell();
437 W
.write
<uint32_t>(MachO::LC_LINKER_OPTION
);
438 W
.write
<uint32_t>(Size
);
439 W
.write
<uint32_t>(Options
.size());
440 uint64_t BytesWritten
= sizeof(MachO::linker_option_command
);
441 for (const std::string
&Option
: Options
) {
442 // Write each string, including the null byte.
443 W
.OS
<< Option
<< '\0';
444 BytesWritten
+= Option
.size() + 1;
447 // Pad to a multiple of the pointer size.
449 offsetToAlignment(BytesWritten
, is64Bit() ? Align(8) : Align(4)));
451 assert(W
.OS
.tell() - Start
== Size
);
454 static bool isFixupTargetValid(const MCValue
&Target
) {
455 // Target is (LHS - RHS + cst).
456 // We don't support the form where LHS is null: -RHS + cst
457 if (!Target
.getSymA() && Target
.getSymB())
462 void MachObjectWriter::recordRelocation(MCAssembler
&Asm
,
463 const MCAsmLayout
&Layout
,
464 const MCFragment
*Fragment
,
465 const MCFixup
&Fixup
, MCValue Target
,
466 uint64_t &FixedValue
) {
467 if (!isFixupTargetValid(Target
)) {
468 Asm
.getContext().reportError(Fixup
.getLoc(),
469 "unsupported relocation expression");
473 TargetObjectWriter
->recordRelocation(this, Asm
, Layout
, Fragment
, Fixup
,
477 void MachObjectWriter::bindIndirectSymbols(MCAssembler
&Asm
) {
478 // This is the point where 'as' creates actual symbols for indirect symbols
479 // (in the following two passes). It would be easier for us to do this sooner
480 // when we see the attribute, but that makes getting the order in the symbol
481 // table much more complicated than it is worth.
483 // FIXME: Revisit this when the dust settles.
485 // Report errors for use of .indirect_symbol not in a symbol pointer section
487 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
488 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
) {
489 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
491 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
492 Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
493 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
&&
494 Section
.getType() != MachO::S_SYMBOL_STUBS
) {
495 MCSymbol
&Symbol
= *it
->Symbol
;
496 report_fatal_error("indirect symbol '" + Symbol
.getName() +
497 "' not in a symbol pointer or stub section");
501 // Bind non-lazy symbol pointers first.
502 unsigned IndirectIndex
= 0;
503 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
504 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
, ++IndirectIndex
) {
505 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
507 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
508 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
)
511 // Initialize the section indirect symbol base, if necessary.
512 IndirectSymBase
.insert(std::make_pair(it
->Section
, IndirectIndex
));
514 Asm
.registerSymbol(*it
->Symbol
);
517 // Then lazy symbol pointers and symbol stubs.
519 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
520 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
, ++IndirectIndex
) {
521 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
523 if (Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
524 Section
.getType() != MachO::S_SYMBOL_STUBS
)
527 // Initialize the section indirect symbol base, if necessary.
528 IndirectSymBase
.insert(std::make_pair(it
->Section
, IndirectIndex
));
530 // Set the symbol type to undefined lazy, but only on construction.
532 // FIXME: Do not hardcode.
534 Asm
.registerSymbol(*it
->Symbol
, &Created
);
536 cast
<MCSymbolMachO
>(it
->Symbol
)->setReferenceTypeUndefinedLazy(true);
540 /// computeSymbolTable - Compute the symbol table data
541 void MachObjectWriter::computeSymbolTable(
542 MCAssembler
&Asm
, std::vector
<MachSymbolData
> &LocalSymbolData
,
543 std::vector
<MachSymbolData
> &ExternalSymbolData
,
544 std::vector
<MachSymbolData
> &UndefinedSymbolData
) {
545 // Build section lookup table.
546 DenseMap
<const MCSection
*, uint8_t> SectionIndexMap
;
548 for (MCAssembler::iterator it
= Asm
.begin(),
549 ie
= Asm
.end(); it
!= ie
; ++it
, ++Index
)
550 SectionIndexMap
[&*it
] = Index
;
551 assert(Index
<= 256 && "Too many sections!");
553 // Build the string table.
554 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
555 if (!Asm
.isSymbolLinkerVisible(Symbol
))
558 StringTable
.add(Symbol
.getName());
560 StringTable
.finalize();
562 // Build the symbol arrays but only for non-local symbols.
564 // The particular order that we collect and then sort the symbols is chosen to
565 // match 'as'. Even though it doesn't matter for correctness, this is
566 // important for letting us diff .o files.
567 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
568 // Ignore non-linker visible symbols.
569 if (!Asm
.isSymbolLinkerVisible(Symbol
))
572 if (!Symbol
.isExternal() && !Symbol
.isUndefined())
576 MSD
.Symbol
= &Symbol
;
577 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
579 if (Symbol
.isUndefined()) {
580 MSD
.SectionIndex
= 0;
581 UndefinedSymbolData
.push_back(MSD
);
582 } else if (Symbol
.isAbsolute()) {
583 MSD
.SectionIndex
= 0;
584 ExternalSymbolData
.push_back(MSD
);
586 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
587 assert(MSD
.SectionIndex
&& "Invalid section index!");
588 ExternalSymbolData
.push_back(MSD
);
592 // Now add the data for local symbols.
593 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
594 // Ignore non-linker visible symbols.
595 if (!Asm
.isSymbolLinkerVisible(Symbol
))
598 if (Symbol
.isExternal() || Symbol
.isUndefined())
602 MSD
.Symbol
= &Symbol
;
603 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
605 if (Symbol
.isAbsolute()) {
606 MSD
.SectionIndex
= 0;
607 LocalSymbolData
.push_back(MSD
);
609 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
610 assert(MSD
.SectionIndex
&& "Invalid section index!");
611 LocalSymbolData
.push_back(MSD
);
615 // External and undefined symbols are required to be in lexicographic order.
616 llvm::sort(ExternalSymbolData
);
617 llvm::sort(UndefinedSymbolData
);
619 // Set the symbol indices.
621 for (auto *SymbolData
:
622 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
623 for (MachSymbolData
&Entry
: *SymbolData
)
624 Entry
.Symbol
->setIndex(Index
++);
626 for (const MCSection
&Section
: Asm
) {
627 for (RelAndSymbol
&Rel
: Relocations
[&Section
]) {
631 // Set the Index and the IsExtern bit.
632 unsigned Index
= Rel
.Sym
->getIndex();
633 assert(isInt
<24>(Index
));
634 if (W
.Endian
== support::little
)
635 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& (~0U << 24)) | Index
| (1 << 27);
637 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& 0xff) | Index
<< 8 | (1 << 4);
642 void MachObjectWriter::computeSectionAddresses(const MCAssembler
&Asm
,
643 const MCAsmLayout
&Layout
) {
644 uint64_t StartAddress
= 0;
645 for (const MCSection
*Sec
: Layout
.getSectionOrder()) {
646 StartAddress
= alignTo(StartAddress
, Sec
->getAlignment());
647 SectionAddress
[Sec
] = StartAddress
;
648 StartAddress
+= Layout
.getSectionAddressSize(Sec
);
650 // Explicitly pad the section to match the alignment requirements of the
651 // following one. This is for 'gas' compatibility, it shouldn't
652 /// strictly be necessary.
653 StartAddress
+= getPaddingSize(Sec
, Layout
);
657 void MachObjectWriter::executePostLayoutBinding(MCAssembler
&Asm
,
658 const MCAsmLayout
&Layout
) {
659 computeSectionAddresses(Asm
, Layout
);
661 // Create symbol data for any indirect symbols.
662 bindIndirectSymbols(Asm
);
665 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
666 const MCAssembler
&Asm
, const MCSymbol
&A
, const MCSymbol
&B
,
668 // FIXME: We don't handle things like
671 if (A
.isVariable() || B
.isVariable())
673 return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm
, A
, B
,
677 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
678 const MCAssembler
&Asm
, const MCSymbol
&SymA
, const MCFragment
&FB
,
679 bool InSet
, bool IsPCRel
) const {
683 // The effective address is
684 // addr(atom(A)) + offset(A)
685 // - addr(atom(B)) - offset(B)
686 // and the offsets are not relocatable, so the fixup is fully resolved when
687 // addr(atom(A)) - addr(atom(B)) == 0.
688 const MCSymbol
&SA
= findAliasedSymbol(SymA
);
689 const MCSection
&SecA
= SA
.getSection();
690 const MCSection
&SecB
= *FB
.getParent();
693 // The simple (Darwin, except on x86_64) way of dealing with this was to
694 // assume that any reference to a temporary symbol *must* be a temporary
695 // symbol in the same atom, unless the sections differ. Therefore, any PCrel
696 // relocation to a temporary symbol (in the same section) is fully
697 // resolved. This also works in conjunction with absolutized .set, which
698 // requires the compiler to use .set to absolutize the differences between
699 // symbols which the compiler knows to be assembly time constants, so we
700 // don't need to worry about considering symbol differences fully resolved.
702 // If the file isn't using sub-sections-via-symbols, we can make the
703 // same assumptions about any symbol that we normally make about
706 bool hasReliableSymbolDifference
= isX86_64();
707 if (!hasReliableSymbolDifference
) {
708 if (!SA
.isInSection() || &SecA
!= &SecB
||
709 (!SA
.isTemporary() && FB
.getAtom() != SA
.getFragment()->getAtom() &&
710 Asm
.getSubsectionsViaSymbols()))
714 // For Darwin x86_64, there is one special case when the reference IsPCRel.
715 // If the fragment with the reference does not have a base symbol but meets
716 // the simple way of dealing with this, in that it is a temporary symbol in
717 // the same atom then it is assumed to be fully resolved. This is needed so
718 // a relocation entry is not created and so the static linker does not
719 // mess up the reference later.
720 else if(!FB
.getAtom() &&
721 SA
.isTemporary() && SA
.isInSection() && &SecA
== &SecB
){
726 // If they are not in the same section, we can't compute the diff.
730 const MCFragment
*FA
= SA
.getFragment();
732 // Bail if the symbol has no fragment.
736 // If the atoms are the same, they are guaranteed to have the same address.
737 if (FA
->getAtom() == FB
.getAtom())
740 // Otherwise, we can't prove this is fully resolved.
744 static MachO::LoadCommandType
getLCFromMCVM(MCVersionMinType Type
) {
746 case MCVM_OSXVersionMin
: return MachO::LC_VERSION_MIN_MACOSX
;
747 case MCVM_IOSVersionMin
: return MachO::LC_VERSION_MIN_IPHONEOS
;
748 case MCVM_TvOSVersionMin
: return MachO::LC_VERSION_MIN_TVOS
;
749 case MCVM_WatchOSVersionMin
: return MachO::LC_VERSION_MIN_WATCHOS
;
751 llvm_unreachable("Invalid mc version min type");
754 uint64_t MachObjectWriter::writeObject(MCAssembler
&Asm
,
755 const MCAsmLayout
&Layout
) {
756 uint64_t StartOffset
= W
.OS
.tell();
758 // Compute symbol table information and bind symbol indices.
759 computeSymbolTable(Asm
, LocalSymbolData
, ExternalSymbolData
,
760 UndefinedSymbolData
);
762 unsigned NumSections
= Asm
.size();
763 const MCAssembler::VersionInfoType
&VersionInfo
=
764 Layout
.getAssembler().getVersionInfo();
766 // The section data starts after the header, the segment load command (and
767 // section headers) and the symbol table.
768 unsigned NumLoadCommands
= 1;
769 uint64_t LoadCommandsSize
= is64Bit() ?
770 sizeof(MachO::segment_command_64
) + NumSections
* sizeof(MachO::section_64
):
771 sizeof(MachO::segment_command
) + NumSections
* sizeof(MachO::section
);
773 // Add the deployment target version info load command size, if used.
774 if (VersionInfo
.Major
!= 0) {
776 if (VersionInfo
.EmitBuildVersion
)
777 LoadCommandsSize
+= sizeof(MachO::build_version_command
);
779 LoadCommandsSize
+= sizeof(MachO::version_min_command
);
782 // Add the data-in-code load command size, if used.
783 unsigned NumDataRegions
= Asm
.getDataRegions().size();
784 if (NumDataRegions
) {
786 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
789 // Add the loh load command size, if used.
790 uint64_t LOHRawSize
= Asm
.getLOHContainer().getEmitSize(*this, Layout
);
791 uint64_t LOHSize
= alignTo(LOHRawSize
, is64Bit() ? 8 : 4);
794 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
797 // Add the symbol table load command sizes, if used.
798 unsigned NumSymbols
= LocalSymbolData
.size() + ExternalSymbolData
.size() +
799 UndefinedSymbolData
.size();
801 NumLoadCommands
+= 2;
802 LoadCommandsSize
+= (sizeof(MachO::symtab_command
) +
803 sizeof(MachO::dysymtab_command
));
806 // Add the linker option load commands sizes.
807 for (const auto &Option
: Asm
.getLinkerOptions()) {
809 LoadCommandsSize
+= ComputeLinkerOptionsLoadCommandSize(Option
, is64Bit());
812 // Compute the total size of the section data, as well as its file size and vm
814 uint64_t SectionDataStart
= (is64Bit() ? sizeof(MachO::mach_header_64
) :
815 sizeof(MachO::mach_header
)) + LoadCommandsSize
;
816 uint64_t SectionDataSize
= 0;
817 uint64_t SectionDataFileSize
= 0;
819 for (const MCSection
&Sec
: Asm
) {
820 uint64_t Address
= getSectionAddress(&Sec
);
821 uint64_t Size
= Layout
.getSectionAddressSize(&Sec
);
822 uint64_t FileSize
= Layout
.getSectionFileSize(&Sec
);
823 FileSize
+= getPaddingSize(&Sec
, Layout
);
825 VMSize
= std::max(VMSize
, Address
+ Size
);
827 if (Sec
.isVirtualSection())
830 SectionDataSize
= std::max(SectionDataSize
, Address
+ Size
);
831 SectionDataFileSize
= std::max(SectionDataFileSize
, Address
+ FileSize
);
834 // The section data is padded to pointer size bytes.
836 // FIXME: Is this machine dependent?
837 unsigned SectionDataPadding
=
838 offsetToAlignment(SectionDataFileSize
, is64Bit() ? Align(8) : Align(4));
839 SectionDataFileSize
+= SectionDataPadding
;
841 // Write the prolog, starting with the header and load command...
842 writeHeader(MachO::MH_OBJECT
, NumLoadCommands
, LoadCommandsSize
,
843 Asm
.getSubsectionsViaSymbols());
845 MachO::VM_PROT_READ
| MachO::VM_PROT_WRITE
| MachO::VM_PROT_EXECUTE
;
846 writeSegmentLoadCommand("", NumSections
, 0, VMSize
, SectionDataStart
,
847 SectionDataSize
, Prot
, Prot
);
849 // ... and then the section headers.
850 uint64_t RelocTableEnd
= SectionDataStart
+ SectionDataFileSize
;
851 for (const MCSection
&Section
: Asm
) {
852 const auto &Sec
= cast
<MCSectionMachO
>(Section
);
853 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
854 unsigned NumRelocs
= Relocs
.size();
855 uint64_t SectionStart
= SectionDataStart
+ getSectionAddress(&Sec
);
856 unsigned Flags
= Sec
.getTypeAndAttributes();
857 if (Sec
.hasInstructions())
858 Flags
|= MachO::S_ATTR_SOME_INSTRUCTIONS
;
859 writeSection(Layout
, Sec
, getSectionAddress(&Sec
), SectionStart
, Flags
,
860 RelocTableEnd
, NumRelocs
);
861 RelocTableEnd
+= NumRelocs
* sizeof(MachO::any_relocation_info
);
864 // Write out the deployment target information, if it's available.
865 if (VersionInfo
.Major
!= 0) {
866 auto EncodeVersion
= [](VersionTuple V
) -> uint32_t {
867 assert(!V
.empty() && "empty version");
868 unsigned Update
= V
.getSubminor() ? *V
.getSubminor() : 0;
869 unsigned Minor
= V
.getMinor() ? *V
.getMinor() : 0;
870 assert(Update
< 256 && "unencodable update target version");
871 assert(Minor
< 256 && "unencodable minor target version");
872 assert(V
.getMajor() < 65536 && "unencodable major target version");
873 return Update
| (Minor
<< 8) | (V
.getMajor() << 16);
875 uint32_t EncodedVersion
= EncodeVersion(
876 VersionTuple(VersionInfo
.Major
, VersionInfo
.Minor
, VersionInfo
.Update
));
877 uint32_t SDKVersion
= !VersionInfo
.SDKVersion
.empty()
878 ? EncodeVersion(VersionInfo
.SDKVersion
)
880 if (VersionInfo
.EmitBuildVersion
) {
881 // FIXME: Currently empty tools. Add clang version in the future.
882 W
.write
<uint32_t>(MachO::LC_BUILD_VERSION
);
883 W
.write
<uint32_t>(sizeof(MachO::build_version_command
));
884 W
.write
<uint32_t>(VersionInfo
.TypeOrPlatform
.Platform
);
885 W
.write
<uint32_t>(EncodedVersion
);
886 W
.write
<uint32_t>(SDKVersion
);
887 W
.write
<uint32_t>(0); // Empty tools list.
889 MachO::LoadCommandType LCType
890 = getLCFromMCVM(VersionInfo
.TypeOrPlatform
.Type
);
891 W
.write
<uint32_t>(LCType
);
892 W
.write
<uint32_t>(sizeof(MachO::version_min_command
));
893 W
.write
<uint32_t>(EncodedVersion
);
894 W
.write
<uint32_t>(SDKVersion
);
898 // Write the data-in-code load command, if used.
899 uint64_t DataInCodeTableEnd
= RelocTableEnd
+ NumDataRegions
* 8;
900 if (NumDataRegions
) {
901 uint64_t DataRegionsOffset
= RelocTableEnd
;
902 uint64_t DataRegionsSize
= NumDataRegions
* 8;
903 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE
, DataRegionsOffset
,
907 // Write the loh load command, if used.
908 uint64_t LOHTableEnd
= DataInCodeTableEnd
+ LOHSize
;
910 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT
,
911 DataInCodeTableEnd
, LOHSize
);
913 // Write the symbol table load command, if used.
915 unsigned FirstLocalSymbol
= 0;
916 unsigned NumLocalSymbols
= LocalSymbolData
.size();
917 unsigned FirstExternalSymbol
= FirstLocalSymbol
+ NumLocalSymbols
;
918 unsigned NumExternalSymbols
= ExternalSymbolData
.size();
919 unsigned FirstUndefinedSymbol
= FirstExternalSymbol
+ NumExternalSymbols
;
920 unsigned NumUndefinedSymbols
= UndefinedSymbolData
.size();
921 unsigned NumIndirectSymbols
= Asm
.indirect_symbol_size();
922 unsigned NumSymTabSymbols
=
923 NumLocalSymbols
+ NumExternalSymbols
+ NumUndefinedSymbols
;
924 uint64_t IndirectSymbolSize
= NumIndirectSymbols
* 4;
925 uint64_t IndirectSymbolOffset
= 0;
927 // If used, the indirect symbols are written after the section data.
928 if (NumIndirectSymbols
)
929 IndirectSymbolOffset
= LOHTableEnd
;
931 // The symbol table is written after the indirect symbol data.
932 uint64_t SymbolTableOffset
= LOHTableEnd
+ IndirectSymbolSize
;
934 // The string table is written after symbol table.
935 uint64_t StringTableOffset
=
936 SymbolTableOffset
+ NumSymTabSymbols
* (is64Bit() ?
937 sizeof(MachO::nlist_64
) :
938 sizeof(MachO::nlist
));
939 writeSymtabLoadCommand(SymbolTableOffset
, NumSymTabSymbols
,
940 StringTableOffset
, StringTable
.getSize());
942 writeDysymtabLoadCommand(FirstLocalSymbol
, NumLocalSymbols
,
943 FirstExternalSymbol
, NumExternalSymbols
,
944 FirstUndefinedSymbol
, NumUndefinedSymbols
,
945 IndirectSymbolOffset
, NumIndirectSymbols
);
948 // Write the linker options load commands.
949 for (const auto &Option
: Asm
.getLinkerOptions())
950 writeLinkerOptionsLoadCommand(Option
);
952 // Write the actual section data.
953 for (const MCSection
&Sec
: Asm
) {
954 Asm
.writeSectionData(W
.OS
, &Sec
, Layout
);
956 uint64_t Pad
= getPaddingSize(&Sec
, Layout
);
957 W
.OS
.write_zeros(Pad
);
960 // Write the extra padding.
961 W
.OS
.write_zeros(SectionDataPadding
);
963 // Write the relocation entries.
964 for (const MCSection
&Sec
: Asm
) {
965 // Write the section relocation entries, in reverse order to match 'as'
966 // (approximately, the exact algorithm is more complicated than this).
967 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
968 for (const RelAndSymbol
&Rel
: make_range(Relocs
.rbegin(), Relocs
.rend())) {
969 W
.write
<uint32_t>(Rel
.MRE
.r_word0
);
970 W
.write
<uint32_t>(Rel
.MRE
.r_word1
);
974 // Write out the data-in-code region payload, if there is one.
975 for (MCAssembler::const_data_region_iterator
976 it
= Asm
.data_region_begin(), ie
= Asm
.data_region_end();
978 const DataRegionData
*Data
= &(*it
);
979 uint64_t Start
= getSymbolAddress(*Data
->Start
, Layout
);
982 End
= getSymbolAddress(*Data
->End
, Layout
);
984 report_fatal_error("Data region not terminated");
986 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data
->Kind
987 << " start: " << Start
<< "(" << Data
->Start
->getName()
989 << " end: " << End
<< "(" << Data
->End
->getName() << ")"
990 << " size: " << End
- Start
<< "\n");
991 W
.write
<uint32_t>(Start
);
992 W
.write
<uint16_t>(End
- Start
);
993 W
.write
<uint16_t>(Data
->Kind
);
996 // Write out the loh commands, if there is one.
999 unsigned Start
= W
.OS
.tell();
1001 Asm
.getLOHContainer().emit(*this, Layout
);
1002 // Pad to a multiple of the pointer size.
1004 offsetToAlignment(LOHRawSize
, is64Bit() ? Align(8) : Align(4)));
1005 assert(W
.OS
.tell() - Start
== LOHSize
);
1008 // Write the symbol table data, if used.
1010 // Write the indirect symbol entries.
1011 for (MCAssembler::const_indirect_symbol_iterator
1012 it
= Asm
.indirect_symbol_begin(),
1013 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
) {
1014 // Indirect symbols in the non-lazy symbol pointer section have some
1015 // special handling.
1016 const MCSectionMachO
&Section
=
1017 static_cast<const MCSectionMachO
&>(*it
->Section
);
1018 if (Section
.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS
) {
1019 // If this symbol is defined and internal, mark it as such.
1020 if (it
->Symbol
->isDefined() && !it
->Symbol
->isExternal()) {
1021 uint32_t Flags
= MachO::INDIRECT_SYMBOL_LOCAL
;
1022 if (it
->Symbol
->isAbsolute())
1023 Flags
|= MachO::INDIRECT_SYMBOL_ABS
;
1024 W
.write
<uint32_t>(Flags
);
1029 W
.write
<uint32_t>(it
->Symbol
->getIndex());
1032 // FIXME: Check that offsets match computed ones.
1034 // Write the symbol table entries.
1035 for (auto *SymbolData
:
1036 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
1037 for (MachSymbolData
&Entry
: *SymbolData
)
1038 writeNlist(Entry
, Layout
);
1040 // Write the string table.
1041 StringTable
.write(W
.OS
);
1044 return W
.OS
.tell() - StartOffset
;
1047 std::unique_ptr
<MCObjectWriter
>
1048 llvm::createMachObjectWriter(std::unique_ptr
<MCMachObjectTargetWriter
> MOTW
,
1049 raw_pwrite_stream
&OS
, bool IsLittleEndian
) {
1050 return std::make_unique
<MachObjectWriter
>(std::move(MOTW
), OS
,