1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCObjectWriter.h"
24 #include "llvm/MC/MCSection.h"
25 #include "llvm/MC/MCSectionMachO.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCSymbolMachO.h"
28 #include "llvm/MC/MCValue.h"
29 #include "llvm/Support/Alignment.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/raw_ostream.h"
45 #define DEBUG_TYPE "mc"
47 void MachObjectWriter::reset() {
49 IndirectSymBase
.clear();
51 LocalSymbolData
.clear();
52 ExternalSymbolData
.clear();
53 UndefinedSymbolData
.clear();
54 MCObjectWriter::reset();
57 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol
&S
) {
58 // Undefined symbols are always extern.
62 // References to weak definitions require external relocation entries; the
63 // definition may not always be the one in the same object file.
64 if (cast
<MCSymbolMachO
>(S
).isWeakDefinition())
67 // Otherwise, we can use an internal relocation.
71 bool MachObjectWriter::
72 MachSymbolData::operator<(const MachSymbolData
&RHS
) const {
73 return Symbol
->getName() < RHS
.Symbol
->getName();
76 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler
&Asm
, unsigned Kind
) {
77 const MCFixupKindInfo
&FKI
= Asm
.getBackend().getFixupKindInfo(
80 return FKI
.Flags
& MCFixupKindInfo::FKF_IsPCRel
;
83 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment
*Fragment
,
84 const MCAsmLayout
&Layout
) const {
85 return getSectionAddress(Fragment
->getParent()) +
86 Layout
.getFragmentOffset(Fragment
);
89 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol
&S
,
90 const MCAsmLayout
&Layout
) const {
91 // If this is a variable, then recursively evaluate now.
93 if (const MCConstantExpr
*C
=
94 dyn_cast
<const MCConstantExpr
>(S
.getVariableValue()))
98 if (!S
.getVariableValue()->evaluateAsRelocatable(Target
, &Layout
, nullptr))
99 report_fatal_error("unable to evaluate offset for variable '" +
102 // Verify that any used symbols are defined.
103 if (Target
.getSymA() && Target
.getSymA()->getSymbol().isUndefined())
104 report_fatal_error("unable to evaluate offset to undefined symbol '" +
105 Target
.getSymA()->getSymbol().getName() + "'");
106 if (Target
.getSymB() && Target
.getSymB()->getSymbol().isUndefined())
107 report_fatal_error("unable to evaluate offset to undefined symbol '" +
108 Target
.getSymB()->getSymbol().getName() + "'");
110 uint64_t Address
= Target
.getConstant();
111 if (Target
.getSymA())
112 Address
+= getSymbolAddress(Target
.getSymA()->getSymbol(), Layout
);
113 if (Target
.getSymB())
114 Address
+= getSymbolAddress(Target
.getSymB()->getSymbol(), Layout
);
118 return getSectionAddress(S
.getFragment()->getParent()) +
119 Layout
.getSymbolOffset(S
);
122 uint64_t MachObjectWriter::getPaddingSize(const MCSection
*Sec
,
123 const MCAsmLayout
&Layout
) const {
124 uint64_t EndAddr
= getSectionAddress(Sec
) + Layout
.getSectionAddressSize(Sec
);
125 unsigned Next
= Sec
->getLayoutOrder() + 1;
126 if (Next
>= Layout
.getSectionOrder().size())
129 const MCSection
&NextSec
= *Layout
.getSectionOrder()[Next
];
130 if (NextSec
.isVirtualSection())
132 return offsetToAlignment(EndAddr
, NextSec
.getAlign());
135 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type
,
136 unsigned NumLoadCommands
,
137 unsigned LoadCommandsSize
,
138 bool SubsectionsViaSymbols
) {
141 if (SubsectionsViaSymbols
)
142 Flags
|= MachO::MH_SUBSECTIONS_VIA_SYMBOLS
;
144 // struct mach_header (28 bytes) or
145 // struct mach_header_64 (32 bytes)
147 uint64_t Start
= W
.OS
.tell();
150 W
.write
<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64
: MachO::MH_MAGIC
);
152 W
.write
<uint32_t>(TargetObjectWriter
->getCPUType());
153 W
.write
<uint32_t>(TargetObjectWriter
->getCPUSubtype());
155 W
.write
<uint32_t>(Type
);
156 W
.write
<uint32_t>(NumLoadCommands
);
157 W
.write
<uint32_t>(LoadCommandsSize
);
158 W
.write
<uint32_t>(Flags
);
160 W
.write
<uint32_t>(0); // reserved
162 assert(W
.OS
.tell() - Start
== (is64Bit() ? sizeof(MachO::mach_header_64
)
163 : sizeof(MachO::mach_header
)));
166 void MachObjectWriter::writeWithPadding(StringRef Str
, uint64_t Size
) {
167 assert(Size
>= Str
.size());
169 W
.OS
.write_zeros(Size
- Str
.size());
172 /// writeSegmentLoadCommand - Write a segment load command.
174 /// \param NumSections The number of sections in this segment.
175 /// \param SectionDataSize The total size of the sections.
176 void MachObjectWriter::writeSegmentLoadCommand(
177 StringRef Name
, unsigned NumSections
, uint64_t VMAddr
, uint64_t VMSize
,
178 uint64_t SectionDataStartOffset
, uint64_t SectionDataSize
, uint32_t MaxProt
,
180 // struct segment_command (56 bytes) or
181 // struct segment_command_64 (72 bytes)
183 uint64_t Start
= W
.OS
.tell();
186 unsigned SegmentLoadCommandSize
=
187 is64Bit() ? sizeof(MachO::segment_command_64
):
188 sizeof(MachO::segment_command
);
189 W
.write
<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64
: MachO::LC_SEGMENT
);
190 W
.write
<uint32_t>(SegmentLoadCommandSize
+
191 NumSections
* (is64Bit() ? sizeof(MachO::section_64
) :
192 sizeof(MachO::section
)));
194 writeWithPadding(Name
, 16);
196 W
.write
<uint64_t>(VMAddr
); // vmaddr
197 W
.write
<uint64_t>(VMSize
); // vmsize
198 W
.write
<uint64_t>(SectionDataStartOffset
); // file offset
199 W
.write
<uint64_t>(SectionDataSize
); // file size
201 W
.write
<uint32_t>(VMAddr
); // vmaddr
202 W
.write
<uint32_t>(VMSize
); // vmsize
203 W
.write
<uint32_t>(SectionDataStartOffset
); // file offset
204 W
.write
<uint32_t>(SectionDataSize
); // file size
207 W
.write
<uint32_t>(MaxProt
);
209 W
.write
<uint32_t>(InitProt
);
210 W
.write
<uint32_t>(NumSections
);
211 W
.write
<uint32_t>(0); // flags
213 assert(W
.OS
.tell() - Start
== SegmentLoadCommandSize
);
216 void MachObjectWriter::writeSection(const MCAsmLayout
&Layout
,
217 const MCSection
&Sec
, uint64_t VMAddr
,
218 uint64_t FileOffset
, unsigned Flags
,
219 uint64_t RelocationsStart
,
220 unsigned NumRelocations
) {
221 uint64_t SectionSize
= Layout
.getSectionAddressSize(&Sec
);
222 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(Sec
);
224 // The offset is unused for virtual sections.
225 if (Section
.isVirtualSection()) {
226 assert(Layout
.getSectionFileSize(&Sec
) == 0 && "Invalid file size!");
230 // struct section (68 bytes) or
231 // struct section_64 (80 bytes)
233 uint64_t Start
= W
.OS
.tell();
236 writeWithPadding(Section
.getName(), 16);
237 writeWithPadding(Section
.getSegmentName(), 16);
239 W
.write
<uint64_t>(VMAddr
); // address
240 W
.write
<uint64_t>(SectionSize
); // size
242 W
.write
<uint32_t>(VMAddr
); // address
243 W
.write
<uint32_t>(SectionSize
); // size
245 W
.write
<uint32_t>(FileOffset
);
247 W
.write
<uint32_t>(Log2(Section
.getAlign()));
248 W
.write
<uint32_t>(NumRelocations
? RelocationsStart
: 0);
249 W
.write
<uint32_t>(NumRelocations
);
250 W
.write
<uint32_t>(Flags
);
251 W
.write
<uint32_t>(IndirectSymBase
.lookup(&Sec
)); // reserved1
252 W
.write
<uint32_t>(Section
.getStubSize()); // reserved2
254 W
.write
<uint32_t>(0); // reserved3
256 assert(W
.OS
.tell() - Start
==
257 (is64Bit() ? sizeof(MachO::section_64
) : sizeof(MachO::section
)));
260 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset
,
262 uint32_t StringTableOffset
,
263 uint32_t StringTableSize
) {
264 // struct symtab_command (24 bytes)
266 uint64_t Start
= W
.OS
.tell();
269 W
.write
<uint32_t>(MachO::LC_SYMTAB
);
270 W
.write
<uint32_t>(sizeof(MachO::symtab_command
));
271 W
.write
<uint32_t>(SymbolOffset
);
272 W
.write
<uint32_t>(NumSymbols
);
273 W
.write
<uint32_t>(StringTableOffset
);
274 W
.write
<uint32_t>(StringTableSize
);
276 assert(W
.OS
.tell() - Start
== sizeof(MachO::symtab_command
));
279 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol
,
280 uint32_t NumLocalSymbols
,
281 uint32_t FirstExternalSymbol
,
282 uint32_t NumExternalSymbols
,
283 uint32_t FirstUndefinedSymbol
,
284 uint32_t NumUndefinedSymbols
,
285 uint32_t IndirectSymbolOffset
,
286 uint32_t NumIndirectSymbols
) {
287 // struct dysymtab_command (80 bytes)
289 uint64_t Start
= W
.OS
.tell();
292 W
.write
<uint32_t>(MachO::LC_DYSYMTAB
);
293 W
.write
<uint32_t>(sizeof(MachO::dysymtab_command
));
294 W
.write
<uint32_t>(FirstLocalSymbol
);
295 W
.write
<uint32_t>(NumLocalSymbols
);
296 W
.write
<uint32_t>(FirstExternalSymbol
);
297 W
.write
<uint32_t>(NumExternalSymbols
);
298 W
.write
<uint32_t>(FirstUndefinedSymbol
);
299 W
.write
<uint32_t>(NumUndefinedSymbols
);
300 W
.write
<uint32_t>(0); // tocoff
301 W
.write
<uint32_t>(0); // ntoc
302 W
.write
<uint32_t>(0); // modtaboff
303 W
.write
<uint32_t>(0); // nmodtab
304 W
.write
<uint32_t>(0); // extrefsymoff
305 W
.write
<uint32_t>(0); // nextrefsyms
306 W
.write
<uint32_t>(IndirectSymbolOffset
);
307 W
.write
<uint32_t>(NumIndirectSymbols
);
308 W
.write
<uint32_t>(0); // extreloff
309 W
.write
<uint32_t>(0); // nextrel
310 W
.write
<uint32_t>(0); // locreloff
311 W
.write
<uint32_t>(0); // nlocrel
313 assert(W
.OS
.tell() - Start
== sizeof(MachO::dysymtab_command
));
316 MachObjectWriter::MachSymbolData
*
317 MachObjectWriter::findSymbolData(const MCSymbol
&Sym
) {
318 for (auto *SymbolData
:
319 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
320 for (MachSymbolData
&Entry
: *SymbolData
)
321 if (Entry
.Symbol
== &Sym
)
327 const MCSymbol
&MachObjectWriter::findAliasedSymbol(const MCSymbol
&Sym
) const {
328 const MCSymbol
*S
= &Sym
;
329 while (S
->isVariable()) {
330 const MCExpr
*Value
= S
->getVariableValue();
331 const auto *Ref
= dyn_cast
<MCSymbolRefExpr
>(Value
);
334 S
= &Ref
->getSymbol();
339 void MachObjectWriter::writeNlist(MachSymbolData
&MSD
,
340 const MCAsmLayout
&Layout
) {
341 const MCSymbol
*Symbol
= MSD
.Symbol
;
342 const MCSymbol
&Data
= *Symbol
;
343 const MCSymbol
*AliasedSymbol
= &findAliasedSymbol(*Symbol
);
344 uint8_t SectionIndex
= MSD
.SectionIndex
;
346 uint64_t Address
= 0;
347 bool IsAlias
= Symbol
!= AliasedSymbol
;
349 const MCSymbol
&OrigSymbol
= *Symbol
;
350 MachSymbolData
*AliaseeInfo
;
352 AliaseeInfo
= findSymbolData(*AliasedSymbol
);
354 SectionIndex
= AliaseeInfo
->SectionIndex
;
355 Symbol
= AliasedSymbol
;
356 // FIXME: Should this update Data as well?
359 // Set the N_TYPE bits. See <mach-o/nlist.h>.
361 // FIXME: Are the prebound or indirect fields possible here?
362 if (IsAlias
&& Symbol
->isUndefined())
363 Type
= MachO::N_INDR
;
364 else if (Symbol
->isUndefined())
365 Type
= MachO::N_UNDF
;
366 else if (Symbol
->isAbsolute())
369 Type
= MachO::N_SECT
;
371 // FIXME: Set STAB bits.
373 if (Data
.isPrivateExtern())
374 Type
|= MachO::N_PEXT
;
377 if (Data
.isExternal() || (!IsAlias
&& Symbol
->isUndefined()))
378 Type
|= MachO::N_EXT
;
380 // Compute the symbol address.
381 if (IsAlias
&& Symbol
->isUndefined())
382 Address
= AliaseeInfo
->StringIndex
;
383 else if (Symbol
->isDefined())
384 Address
= getSymbolAddress(OrigSymbol
, Layout
);
385 else if (Symbol
->isCommon()) {
386 // Common symbols are encoded with the size in the address
387 // field, and their alignment in the flags.
388 Address
= Symbol
->getCommonSize();
391 // struct nlist (12 bytes)
393 W
.write
<uint32_t>(MSD
.StringIndex
);
395 W
.OS
<< char(SectionIndex
);
397 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
399 bool EncodeAsAltEntry
=
400 IsAlias
&& cast
<MCSymbolMachO
>(OrigSymbol
).isAltEntry();
401 W
.write
<uint16_t>(cast
<MCSymbolMachO
>(Symbol
)->getEncodedFlags(EncodeAsAltEntry
));
403 W
.write
<uint64_t>(Address
);
405 W
.write
<uint32_t>(Address
);
408 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type
,
411 uint64_t Start
= W
.OS
.tell();
414 W
.write
<uint32_t>(Type
);
415 W
.write
<uint32_t>(sizeof(MachO::linkedit_data_command
));
416 W
.write
<uint32_t>(DataOffset
);
417 W
.write
<uint32_t>(DataSize
);
419 assert(W
.OS
.tell() - Start
== sizeof(MachO::linkedit_data_command
));
422 static unsigned ComputeLinkerOptionsLoadCommandSize(
423 const std::vector
<std::string
> &Options
, bool is64Bit
)
425 unsigned Size
= sizeof(MachO::linker_option_command
);
426 for (const std::string
&Option
: Options
)
427 Size
+= Option
.size() + 1;
428 return alignTo(Size
, is64Bit
? 8 : 4);
431 void MachObjectWriter::writeLinkerOptionsLoadCommand(
432 const std::vector
<std::string
> &Options
)
434 unsigned Size
= ComputeLinkerOptionsLoadCommandSize(Options
, is64Bit());
435 uint64_t Start
= W
.OS
.tell();
438 W
.write
<uint32_t>(MachO::LC_LINKER_OPTION
);
439 W
.write
<uint32_t>(Size
);
440 W
.write
<uint32_t>(Options
.size());
441 uint64_t BytesWritten
= sizeof(MachO::linker_option_command
);
442 for (const std::string
&Option
: Options
) {
443 // Write each string, including the null byte.
444 W
.OS
<< Option
<< '\0';
445 BytesWritten
+= Option
.size() + 1;
448 // Pad to a multiple of the pointer size.
450 offsetToAlignment(BytesWritten
, is64Bit() ? Align(8) : Align(4)));
452 assert(W
.OS
.tell() - Start
== Size
);
455 static bool isFixupTargetValid(const MCValue
&Target
) {
456 // Target is (LHS - RHS + cst).
457 // We don't support the form where LHS is null: -RHS + cst
458 if (!Target
.getSymA() && Target
.getSymB())
463 void MachObjectWriter::recordRelocation(MCAssembler
&Asm
,
464 const MCAsmLayout
&Layout
,
465 const MCFragment
*Fragment
,
466 const MCFixup
&Fixup
, MCValue Target
,
467 uint64_t &FixedValue
) {
468 if (!isFixupTargetValid(Target
)) {
469 Asm
.getContext().reportError(Fixup
.getLoc(),
470 "unsupported relocation expression");
474 TargetObjectWriter
->recordRelocation(this, Asm
, Layout
, Fragment
, Fixup
,
478 void MachObjectWriter::bindIndirectSymbols(MCAssembler
&Asm
) {
479 // This is the point where 'as' creates actual symbols for indirect symbols
480 // (in the following two passes). It would be easier for us to do this sooner
481 // when we see the attribute, but that makes getting the order in the symbol
482 // table much more complicated than it is worth.
484 // FIXME: Revisit this when the dust settles.
486 // Report errors for use of .indirect_symbol not in a symbol pointer section
488 for (IndirectSymbolData
&ISD
: llvm::make_range(Asm
.indirect_symbol_begin(),
489 Asm
.indirect_symbol_end())) {
490 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*ISD
.Section
);
492 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
493 Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
494 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
&&
495 Section
.getType() != MachO::S_SYMBOL_STUBS
) {
496 MCSymbol
&Symbol
= *ISD
.Symbol
;
497 report_fatal_error("indirect symbol '" + Symbol
.getName() +
498 "' not in a symbol pointer or stub section");
502 // Bind non-lazy symbol pointers first.
503 unsigned IndirectIndex
= 0;
504 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
505 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
, ++IndirectIndex
) {
506 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
508 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
509 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
)
512 // Initialize the section indirect symbol base, if necessary.
513 IndirectSymBase
.insert(std::make_pair(it
->Section
, IndirectIndex
));
515 Asm
.registerSymbol(*it
->Symbol
);
518 // Then lazy symbol pointers and symbol stubs.
520 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
521 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
, ++IndirectIndex
) {
522 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
524 if (Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
525 Section
.getType() != MachO::S_SYMBOL_STUBS
)
528 // Initialize the section indirect symbol base, if necessary.
529 IndirectSymBase
.insert(std::make_pair(it
->Section
, IndirectIndex
));
531 // Set the symbol type to undefined lazy, but only on construction.
533 // FIXME: Do not hardcode.
534 if (Asm
.registerSymbol(*it
->Symbol
))
535 cast
<MCSymbolMachO
>(it
->Symbol
)->setReferenceTypeUndefinedLazy(true);
539 /// computeSymbolTable - Compute the symbol table data
540 void MachObjectWriter::computeSymbolTable(
541 MCAssembler
&Asm
, std::vector
<MachSymbolData
> &LocalSymbolData
,
542 std::vector
<MachSymbolData
> &ExternalSymbolData
,
543 std::vector
<MachSymbolData
> &UndefinedSymbolData
) {
544 // Build section lookup table.
545 DenseMap
<const MCSection
*, uint8_t> SectionIndexMap
;
547 for (MCAssembler::iterator it
= Asm
.begin(),
548 ie
= Asm
.end(); it
!= ie
; ++it
, ++Index
)
549 SectionIndexMap
[&*it
] = Index
;
550 assert(Index
<= 256 && "Too many sections!");
552 // Build the string table.
553 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
554 if (!Asm
.isSymbolLinkerVisible(Symbol
))
557 StringTable
.add(Symbol
.getName());
559 StringTable
.finalize();
561 // Build the symbol arrays but only for non-local symbols.
563 // The particular order that we collect and then sort the symbols is chosen to
564 // match 'as'. Even though it doesn't matter for correctness, this is
565 // important for letting us diff .o files.
566 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
567 // Ignore non-linker visible symbols.
568 if (!Asm
.isSymbolLinkerVisible(Symbol
))
571 if (!Symbol
.isExternal() && !Symbol
.isUndefined())
575 MSD
.Symbol
= &Symbol
;
576 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
578 if (Symbol
.isUndefined()) {
579 MSD
.SectionIndex
= 0;
580 UndefinedSymbolData
.push_back(MSD
);
581 } else if (Symbol
.isAbsolute()) {
582 MSD
.SectionIndex
= 0;
583 ExternalSymbolData
.push_back(MSD
);
585 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
586 assert(MSD
.SectionIndex
&& "Invalid section index!");
587 ExternalSymbolData
.push_back(MSD
);
591 // Now add the data for local symbols.
592 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
593 // Ignore non-linker visible symbols.
594 if (!Asm
.isSymbolLinkerVisible(Symbol
))
597 if (Symbol
.isExternal() || Symbol
.isUndefined())
601 MSD
.Symbol
= &Symbol
;
602 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
604 if (Symbol
.isAbsolute()) {
605 MSD
.SectionIndex
= 0;
606 LocalSymbolData
.push_back(MSD
);
608 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
609 assert(MSD
.SectionIndex
&& "Invalid section index!");
610 LocalSymbolData
.push_back(MSD
);
614 // External and undefined symbols are required to be in lexicographic order.
615 llvm::sort(ExternalSymbolData
);
616 llvm::sort(UndefinedSymbolData
);
618 // Set the symbol indices.
620 for (auto *SymbolData
:
621 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
622 for (MachSymbolData
&Entry
: *SymbolData
)
623 Entry
.Symbol
->setIndex(Index
++);
625 for (const MCSection
&Section
: Asm
) {
626 for (RelAndSymbol
&Rel
: Relocations
[&Section
]) {
630 // Set the Index and the IsExtern bit.
631 unsigned Index
= Rel
.Sym
->getIndex();
632 assert(isInt
<24>(Index
));
633 if (W
.Endian
== llvm::endianness::little
)
634 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& (~0U << 24)) | Index
| (1 << 27);
636 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& 0xff) | Index
<< 8 | (1 << 4);
641 void MachObjectWriter::computeSectionAddresses(const MCAssembler
&Asm
,
642 const MCAsmLayout
&Layout
) {
643 uint64_t StartAddress
= 0;
644 for (const MCSection
*Sec
: Layout
.getSectionOrder()) {
645 StartAddress
= alignTo(StartAddress
, Sec
->getAlign());
646 SectionAddress
[Sec
] = StartAddress
;
647 StartAddress
+= Layout
.getSectionAddressSize(Sec
);
649 // Explicitly pad the section to match the alignment requirements of the
650 // following one. This is for 'gas' compatibility, it shouldn't
651 /// strictly be necessary.
652 StartAddress
+= getPaddingSize(Sec
, Layout
);
656 void MachObjectWriter::executePostLayoutBinding(MCAssembler
&Asm
,
657 const MCAsmLayout
&Layout
) {
658 computeSectionAddresses(Asm
, Layout
);
660 // Create symbol data for any indirect symbols.
661 bindIndirectSymbols(Asm
);
664 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
665 const MCAssembler
&Asm
, const MCSymbol
&A
, const MCSymbol
&B
,
667 // FIXME: We don't handle things like
670 if (A
.isVariable() || B
.isVariable())
672 return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm
, A
, B
,
676 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
677 const MCAssembler
&Asm
, const MCSymbol
&SymA
, const MCFragment
&FB
,
678 bool InSet
, bool IsPCRel
) const {
682 // The effective address is
683 // addr(atom(A)) + offset(A)
684 // - addr(atom(B)) - offset(B)
685 // and the offsets are not relocatable, so the fixup is fully resolved when
686 // addr(atom(A)) - addr(atom(B)) == 0.
687 const MCSymbol
&SA
= findAliasedSymbol(SymA
);
688 const MCSection
&SecA
= SA
.getSection();
689 const MCSection
&SecB
= *FB
.getParent();
692 // The simple (Darwin, except on x86_64) way of dealing with this was to
693 // assume that any reference to a temporary symbol *must* be a temporary
694 // symbol in the same atom, unless the sections differ. Therefore, any PCrel
695 // relocation to a temporary symbol (in the same section) is fully
696 // resolved. This also works in conjunction with absolutized .set, which
697 // requires the compiler to use .set to absolutize the differences between
698 // symbols which the compiler knows to be assembly time constants, so we
699 // don't need to worry about considering symbol differences fully resolved.
701 // If the file isn't using sub-sections-via-symbols, we can make the
702 // same assumptions about any symbol that we normally make about
705 bool hasReliableSymbolDifference
= isX86_64();
706 if (!hasReliableSymbolDifference
) {
707 if (!SA
.isInSection() || &SecA
!= &SecB
||
708 (!SA
.isTemporary() && FB
.getAtom() != SA
.getFragment()->getAtom() &&
709 Asm
.getSubsectionsViaSymbols()))
715 // If they are not in the same section, we can't compute the diff.
719 const MCFragment
*FA
= SA
.getFragment();
721 // Bail if the symbol has no fragment.
725 // If the atoms are the same, they are guaranteed to have the same address.
726 if (FA
->getAtom() == FB
.getAtom())
729 // Otherwise, we can't prove this is fully resolved.
733 static MachO::LoadCommandType
getLCFromMCVM(MCVersionMinType Type
) {
735 case MCVM_OSXVersionMin
: return MachO::LC_VERSION_MIN_MACOSX
;
736 case MCVM_IOSVersionMin
: return MachO::LC_VERSION_MIN_IPHONEOS
;
737 case MCVM_TvOSVersionMin
: return MachO::LC_VERSION_MIN_TVOS
;
738 case MCVM_WatchOSVersionMin
: return MachO::LC_VERSION_MIN_WATCHOS
;
740 llvm_unreachable("Invalid mc version min type");
743 void MachObjectWriter::populateAddrSigSection(MCAssembler
&Asm
) {
744 MCSection
*AddrSigSection
=
745 Asm
.getContext().getObjectFileInfo()->getAddrSigSection();
746 unsigned Log2Size
= is64Bit() ? 3 : 2;
747 for (const MCSymbol
*S
: getAddrsigSyms()) {
748 if (!S
->isRegistered())
750 MachO::any_relocation_info MRE
;
752 MRE
.r_word1
= (Log2Size
<< 25) | (MachO::GENERIC_RELOC_VANILLA
<< 28);
753 addRelocation(S
, AddrSigSection
, MRE
);
757 uint64_t MachObjectWriter::writeObject(MCAssembler
&Asm
,
758 const MCAsmLayout
&Layout
) {
759 uint64_t StartOffset
= W
.OS
.tell();
761 populateAddrSigSection(Asm
);
763 // Compute symbol table information and bind symbol indices.
764 computeSymbolTable(Asm
, LocalSymbolData
, ExternalSymbolData
,
765 UndefinedSymbolData
);
767 if (!Asm
.CGProfile
.empty()) {
768 MCSection
*CGProfileSection
= Asm
.getContext().getMachOSection(
769 "__LLVM", "__cg_profile", 0, SectionKind::getMetadata());
770 MCDataFragment
*Frag
= dyn_cast_or_null
<MCDataFragment
>(
771 &*CGProfileSection
->getFragmentList().begin());
772 assert(Frag
&& "call graph profile section not reserved");
773 Frag
->getContents().clear();
774 raw_svector_ostream
OS(Frag
->getContents());
775 for (const MCAssembler::CGProfileEntry
&CGPE
: Asm
.CGProfile
) {
776 uint32_t FromIndex
= CGPE
.From
->getSymbol().getIndex();
777 uint32_t ToIndex
= CGPE
.To
->getSymbol().getIndex();
778 support::endian::write(OS
, FromIndex
, W
.Endian
);
779 support::endian::write(OS
, ToIndex
, W
.Endian
);
780 support::endian::write(OS
, CGPE
.Count
, W
.Endian
);
784 unsigned NumSections
= Asm
.size();
785 const MCAssembler::VersionInfoType
&VersionInfo
=
786 Layout
.getAssembler().getVersionInfo();
788 // The section data starts after the header, the segment load command (and
789 // section headers) and the symbol table.
790 unsigned NumLoadCommands
= 1;
791 uint64_t LoadCommandsSize
= is64Bit() ?
792 sizeof(MachO::segment_command_64
) + NumSections
* sizeof(MachO::section_64
):
793 sizeof(MachO::segment_command
) + NumSections
* sizeof(MachO::section
);
795 // Add the deployment target version info load command size, if used.
796 if (VersionInfo
.Major
!= 0) {
798 if (VersionInfo
.EmitBuildVersion
)
799 LoadCommandsSize
+= sizeof(MachO::build_version_command
);
801 LoadCommandsSize
+= sizeof(MachO::version_min_command
);
804 const MCAssembler::VersionInfoType
&TargetVariantVersionInfo
=
805 Layout
.getAssembler().getDarwinTargetVariantVersionInfo();
807 // Add the target variant version info load command size, if used.
808 if (TargetVariantVersionInfo
.Major
!= 0) {
810 assert(TargetVariantVersionInfo
.EmitBuildVersion
&&
811 "target variant should use build version");
812 LoadCommandsSize
+= sizeof(MachO::build_version_command
);
815 // Add the data-in-code load command size, if used.
816 unsigned NumDataRegions
= Asm
.getDataRegions().size();
817 if (NumDataRegions
) {
819 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
822 // Add the loh load command size, if used.
823 uint64_t LOHRawSize
= Asm
.getLOHContainer().getEmitSize(*this, Layout
);
824 uint64_t LOHSize
= alignTo(LOHRawSize
, is64Bit() ? 8 : 4);
827 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
830 // Add the symbol table load command sizes, if used.
831 unsigned NumSymbols
= LocalSymbolData
.size() + ExternalSymbolData
.size() +
832 UndefinedSymbolData
.size();
834 NumLoadCommands
+= 2;
835 LoadCommandsSize
+= (sizeof(MachO::symtab_command
) +
836 sizeof(MachO::dysymtab_command
));
839 // Add the linker option load commands sizes.
840 for (const auto &Option
: Asm
.getLinkerOptions()) {
842 LoadCommandsSize
+= ComputeLinkerOptionsLoadCommandSize(Option
, is64Bit());
845 // Compute the total size of the section data, as well as its file size and vm
847 uint64_t SectionDataStart
= (is64Bit() ? sizeof(MachO::mach_header_64
) :
848 sizeof(MachO::mach_header
)) + LoadCommandsSize
;
849 uint64_t SectionDataSize
= 0;
850 uint64_t SectionDataFileSize
= 0;
852 for (const MCSection
&Sec
: Asm
) {
853 uint64_t Address
= getSectionAddress(&Sec
);
854 uint64_t Size
= Layout
.getSectionAddressSize(&Sec
);
855 uint64_t FileSize
= Layout
.getSectionFileSize(&Sec
);
856 FileSize
+= getPaddingSize(&Sec
, Layout
);
858 VMSize
= std::max(VMSize
, Address
+ Size
);
860 if (Sec
.isVirtualSection())
863 SectionDataSize
= std::max(SectionDataSize
, Address
+ Size
);
864 SectionDataFileSize
= std::max(SectionDataFileSize
, Address
+ FileSize
);
867 // The section data is padded to pointer size bytes.
869 // FIXME: Is this machine dependent?
870 unsigned SectionDataPadding
=
871 offsetToAlignment(SectionDataFileSize
, is64Bit() ? Align(8) : Align(4));
872 SectionDataFileSize
+= SectionDataPadding
;
874 // Write the prolog, starting with the header and load command...
875 writeHeader(MachO::MH_OBJECT
, NumLoadCommands
, LoadCommandsSize
,
876 Asm
.getSubsectionsViaSymbols());
878 MachO::VM_PROT_READ
| MachO::VM_PROT_WRITE
| MachO::VM_PROT_EXECUTE
;
879 writeSegmentLoadCommand("", NumSections
, 0, VMSize
, SectionDataStart
,
880 SectionDataSize
, Prot
, Prot
);
882 // ... and then the section headers.
883 uint64_t RelocTableEnd
= SectionDataStart
+ SectionDataFileSize
;
884 for (const MCSection
&Section
: Asm
) {
885 const auto &Sec
= cast
<MCSectionMachO
>(Section
);
886 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
887 unsigned NumRelocs
= Relocs
.size();
888 uint64_t SectionStart
= SectionDataStart
+ getSectionAddress(&Sec
);
889 unsigned Flags
= Sec
.getTypeAndAttributes();
890 if (Sec
.hasInstructions())
891 Flags
|= MachO::S_ATTR_SOME_INSTRUCTIONS
;
892 writeSection(Layout
, Sec
, getSectionAddress(&Sec
), SectionStart
, Flags
,
893 RelocTableEnd
, NumRelocs
);
894 RelocTableEnd
+= NumRelocs
* sizeof(MachO::any_relocation_info
);
897 // Write out the deployment target information, if it's available.
898 auto EmitDeploymentTargetVersion
=
899 [&](const MCAssembler::VersionInfoType
&VersionInfo
) {
900 auto EncodeVersion
= [](VersionTuple V
) -> uint32_t {
901 assert(!V
.empty() && "empty version");
902 unsigned Update
= V
.getSubminor().value_or(0);
903 unsigned Minor
= V
.getMinor().value_or(0);
904 assert(Update
< 256 && "unencodable update target version");
905 assert(Minor
< 256 && "unencodable minor target version");
906 assert(V
.getMajor() < 65536 && "unencodable major target version");
907 return Update
| (Minor
<< 8) | (V
.getMajor() << 16);
909 uint32_t EncodedVersion
= EncodeVersion(VersionTuple(
910 VersionInfo
.Major
, VersionInfo
.Minor
, VersionInfo
.Update
));
911 uint32_t SDKVersion
= !VersionInfo
.SDKVersion
.empty()
912 ? EncodeVersion(VersionInfo
.SDKVersion
)
914 if (VersionInfo
.EmitBuildVersion
) {
915 // FIXME: Currently empty tools. Add clang version in the future.
916 W
.write
<uint32_t>(MachO::LC_BUILD_VERSION
);
917 W
.write
<uint32_t>(sizeof(MachO::build_version_command
));
918 W
.write
<uint32_t>(VersionInfo
.TypeOrPlatform
.Platform
);
919 W
.write
<uint32_t>(EncodedVersion
);
920 W
.write
<uint32_t>(SDKVersion
);
921 W
.write
<uint32_t>(0); // Empty tools list.
923 MachO::LoadCommandType LCType
=
924 getLCFromMCVM(VersionInfo
.TypeOrPlatform
.Type
);
925 W
.write
<uint32_t>(LCType
);
926 W
.write
<uint32_t>(sizeof(MachO::version_min_command
));
927 W
.write
<uint32_t>(EncodedVersion
);
928 W
.write
<uint32_t>(SDKVersion
);
931 if (VersionInfo
.Major
!= 0)
932 EmitDeploymentTargetVersion(VersionInfo
);
933 if (TargetVariantVersionInfo
.Major
!= 0)
934 EmitDeploymentTargetVersion(TargetVariantVersionInfo
);
936 // Write the data-in-code load command, if used.
937 uint64_t DataInCodeTableEnd
= RelocTableEnd
+ NumDataRegions
* 8;
938 if (NumDataRegions
) {
939 uint64_t DataRegionsOffset
= RelocTableEnd
;
940 uint64_t DataRegionsSize
= NumDataRegions
* 8;
941 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE
, DataRegionsOffset
,
945 // Write the loh load command, if used.
946 uint64_t LOHTableEnd
= DataInCodeTableEnd
+ LOHSize
;
948 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT
,
949 DataInCodeTableEnd
, LOHSize
);
951 // Write the symbol table load command, if used.
953 unsigned FirstLocalSymbol
= 0;
954 unsigned NumLocalSymbols
= LocalSymbolData
.size();
955 unsigned FirstExternalSymbol
= FirstLocalSymbol
+ NumLocalSymbols
;
956 unsigned NumExternalSymbols
= ExternalSymbolData
.size();
957 unsigned FirstUndefinedSymbol
= FirstExternalSymbol
+ NumExternalSymbols
;
958 unsigned NumUndefinedSymbols
= UndefinedSymbolData
.size();
959 unsigned NumIndirectSymbols
= Asm
.indirect_symbol_size();
960 unsigned NumSymTabSymbols
=
961 NumLocalSymbols
+ NumExternalSymbols
+ NumUndefinedSymbols
;
962 uint64_t IndirectSymbolSize
= NumIndirectSymbols
* 4;
963 uint64_t IndirectSymbolOffset
= 0;
965 // If used, the indirect symbols are written after the section data.
966 if (NumIndirectSymbols
)
967 IndirectSymbolOffset
= LOHTableEnd
;
969 // The symbol table is written after the indirect symbol data.
970 uint64_t SymbolTableOffset
= LOHTableEnd
+ IndirectSymbolSize
;
972 // The string table is written after symbol table.
973 uint64_t StringTableOffset
=
974 SymbolTableOffset
+ NumSymTabSymbols
* (is64Bit() ?
975 sizeof(MachO::nlist_64
) :
976 sizeof(MachO::nlist
));
977 writeSymtabLoadCommand(SymbolTableOffset
, NumSymTabSymbols
,
978 StringTableOffset
, StringTable
.getSize());
980 writeDysymtabLoadCommand(FirstLocalSymbol
, NumLocalSymbols
,
981 FirstExternalSymbol
, NumExternalSymbols
,
982 FirstUndefinedSymbol
, NumUndefinedSymbols
,
983 IndirectSymbolOffset
, NumIndirectSymbols
);
986 // Write the linker options load commands.
987 for (const auto &Option
: Asm
.getLinkerOptions())
988 writeLinkerOptionsLoadCommand(Option
);
990 // Write the actual section data.
991 for (const MCSection
&Sec
: Asm
) {
992 Asm
.writeSectionData(W
.OS
, &Sec
, Layout
);
994 uint64_t Pad
= getPaddingSize(&Sec
, Layout
);
995 W
.OS
.write_zeros(Pad
);
998 // Write the extra padding.
999 W
.OS
.write_zeros(SectionDataPadding
);
1001 // Write the relocation entries.
1002 for (const MCSection
&Sec
: Asm
) {
1003 // Write the section relocation entries, in reverse order to match 'as'
1004 // (approximately, the exact algorithm is more complicated than this).
1005 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
1006 for (const RelAndSymbol
&Rel
: llvm::reverse(Relocs
)) {
1007 W
.write
<uint32_t>(Rel
.MRE
.r_word0
);
1008 W
.write
<uint32_t>(Rel
.MRE
.r_word1
);
1012 // Write out the data-in-code region payload, if there is one.
1013 for (MCAssembler::const_data_region_iterator
1014 it
= Asm
.data_region_begin(), ie
= Asm
.data_region_end();
1016 const DataRegionData
*Data
= &(*it
);
1017 uint64_t Start
= getSymbolAddress(*Data
->Start
, Layout
);
1020 End
= getSymbolAddress(*Data
->End
, Layout
);
1022 report_fatal_error("Data region not terminated");
1024 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data
->Kind
1025 << " start: " << Start
<< "(" << Data
->Start
->getName()
1027 << " end: " << End
<< "(" << Data
->End
->getName() << ")"
1028 << " size: " << End
- Start
<< "\n");
1029 W
.write
<uint32_t>(Start
);
1030 W
.write
<uint16_t>(End
- Start
);
1031 W
.write
<uint16_t>(Data
->Kind
);
1034 // Write out the loh commands, if there is one.
1037 unsigned Start
= W
.OS
.tell();
1039 Asm
.getLOHContainer().emit(*this, Layout
);
1040 // Pad to a multiple of the pointer size.
1042 offsetToAlignment(LOHRawSize
, is64Bit() ? Align(8) : Align(4)));
1043 assert(W
.OS
.tell() - Start
== LOHSize
);
1046 // Write the symbol table data, if used.
1048 // Write the indirect symbol entries.
1049 for (MCAssembler::const_indirect_symbol_iterator
1050 it
= Asm
.indirect_symbol_begin(),
1051 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
) {
1052 // Indirect symbols in the non-lazy symbol pointer section have some
1053 // special handling.
1054 const MCSectionMachO
&Section
=
1055 static_cast<const MCSectionMachO
&>(*it
->Section
);
1056 if (Section
.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS
) {
1057 // If this symbol is defined and internal, mark it as such.
1058 if (it
->Symbol
->isDefined() && !it
->Symbol
->isExternal()) {
1059 uint32_t Flags
= MachO::INDIRECT_SYMBOL_LOCAL
;
1060 if (it
->Symbol
->isAbsolute())
1061 Flags
|= MachO::INDIRECT_SYMBOL_ABS
;
1062 W
.write
<uint32_t>(Flags
);
1067 W
.write
<uint32_t>(it
->Symbol
->getIndex());
1070 // FIXME: Check that offsets match computed ones.
1072 // Write the symbol table entries.
1073 for (auto *SymbolData
:
1074 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
1075 for (MachSymbolData
&Entry
: *SymbolData
)
1076 writeNlist(Entry
, Layout
);
1078 // Write the string table.
1079 StringTable
.write(W
.OS
);
1082 return W
.OS
.tell() - StartOffset
;
1085 std::unique_ptr
<MCObjectWriter
>
1086 llvm::createMachObjectWriter(std::unique_ptr
<MCMachObjectTargetWriter
> MOTW
,
1087 raw_pwrite_stream
&OS
, bool IsLittleEndian
) {
1088 return std::make_unique
<MachObjectWriter
>(std::move(MOTW
), OS
,