1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/BinaryFormat/MachO.h"
12 #include "llvm/MC/MCAsmBackend.h"
13 #include "llvm/MC/MCAsmInfoDarwin.h"
14 #include "llvm/MC/MCAssembler.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDirectives.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCFixupKindInfo.h"
19 #include "llvm/MC/MCFragment.h"
20 #include "llvm/MC/MCMachObjectWriter.h"
21 #include "llvm/MC/MCObjectFileInfo.h"
22 #include "llvm/MC/MCObjectWriter.h"
23 #include "llvm/MC/MCSection.h"
24 #include "llvm/MC/MCSectionMachO.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCSymbolMachO.h"
27 #include "llvm/MC/MCValue.h"
28 #include "llvm/Support/Alignment.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Support/raw_ostream.h"
43 #define DEBUG_TYPE "mc"
45 void MachObjectWriter::reset() {
47 IndirectSymBase
.clear();
48 IndirectSymbols
.clear();
50 SectionAddress
.clear();
53 LocalSymbolData
.clear();
54 ExternalSymbolData
.clear();
55 UndefinedSymbolData
.clear();
57 VersionInfo
.Major
= 0;
58 VersionInfo
.SDKVersion
= VersionTuple();
59 TargetVariantVersionInfo
.Major
= 0;
60 TargetVariantVersionInfo
.SDKVersion
= VersionTuple();
61 LinkerOptions
.clear();
62 MCObjectWriter::reset();
65 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol
&S
) {
66 // Undefined symbols are always extern.
70 // References to weak definitions require external relocation entries; the
71 // definition may not always be the one in the same object file.
72 if (cast
<MCSymbolMachO
>(S
).isWeakDefinition())
75 // Otherwise, we can use an internal relocation.
79 bool MachObjectWriter::
80 MachSymbolData::operator<(const MachSymbolData
&RHS
) const {
81 return Symbol
->getName() < RHS
.Symbol
->getName();
84 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler
&Asm
, unsigned Kind
) {
85 const MCFixupKindInfo
&FKI
= Asm
.getBackend().getFixupKindInfo(
88 return FKI
.Flags
& MCFixupKindInfo::FKF_IsPCRel
;
92 MachObjectWriter::getFragmentAddress(const MCAssembler
&Asm
,
93 const MCFragment
*Fragment
) const {
94 return getSectionAddress(Fragment
->getParent()) +
95 Asm
.getFragmentOffset(*Fragment
);
98 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol
&S
,
99 const MCAssembler
&Asm
) const {
100 // If this is a variable, then recursively evaluate now.
101 if (S
.isVariable()) {
102 if (const MCConstantExpr
*C
=
103 dyn_cast
<const MCConstantExpr
>(S
.getVariableValue()))
104 return C
->getValue();
107 if (!S
.getVariableValue()->evaluateAsRelocatable(Target
, &Asm
, nullptr))
108 report_fatal_error("unable to evaluate offset for variable '" +
111 // Verify that any used symbols are defined.
112 if (Target
.getSymA() && Target
.getSymA()->getSymbol().isUndefined())
113 report_fatal_error("unable to evaluate offset to undefined symbol '" +
114 Target
.getSymA()->getSymbol().getName() + "'");
115 if (Target
.getSymB() && Target
.getSymB()->getSymbol().isUndefined())
116 report_fatal_error("unable to evaluate offset to undefined symbol '" +
117 Target
.getSymB()->getSymbol().getName() + "'");
119 uint64_t Address
= Target
.getConstant();
120 if (Target
.getSymA())
121 Address
+= getSymbolAddress(Target
.getSymA()->getSymbol(), Asm
);
122 if (Target
.getSymB())
123 Address
+= getSymbolAddress(Target
.getSymB()->getSymbol(), Asm
);
127 return getSectionAddress(S
.getFragment()->getParent()) +
128 Asm
.getSymbolOffset(S
);
131 uint64_t MachObjectWriter::getPaddingSize(const MCAssembler
&Asm
,
132 const MCSection
*Sec
) const {
133 uint64_t EndAddr
= getSectionAddress(Sec
) + Asm
.getSectionAddressSize(*Sec
);
134 unsigned Next
= cast
<MCSectionMachO
>(Sec
)->getLayoutOrder() + 1;
135 if (Next
>= SectionOrder
.size())
138 const MCSection
&NextSec
= *SectionOrder
[Next
];
139 if (NextSec
.isVirtualSection())
141 return offsetToAlignment(EndAddr
, NextSec
.getAlign());
144 static bool isSymbolLinkerVisible(const MCSymbol
&Symbol
) {
145 // Non-temporary labels should always be visible to the linker.
146 if (!Symbol
.isTemporary())
149 if (Symbol
.isUsedInReloc())
155 const MCSymbol
*MachObjectWriter::getAtom(const MCSymbol
&S
) const {
156 // Linker visible symbols define atoms.
157 if (isSymbolLinkerVisible(S
))
160 // Absolute and undefined symbols have no defining atom.
161 if (!S
.isInSection())
164 // Non-linker visible symbols in sections which can't be atomized have no
166 if (!MCAsmInfoDarwin::isSectionAtomizableBySymbols(
167 *S
.getFragment()->getParent()))
170 // Otherwise, return the atom for the containing fragment.
171 return S
.getFragment()->getAtom();
174 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type
,
175 unsigned NumLoadCommands
,
176 unsigned LoadCommandsSize
,
177 bool SubsectionsViaSymbols
) {
180 if (SubsectionsViaSymbols
)
181 Flags
|= MachO::MH_SUBSECTIONS_VIA_SYMBOLS
;
183 // struct mach_header (28 bytes) or
184 // struct mach_header_64 (32 bytes)
186 uint64_t Start
= W
.OS
.tell();
189 W
.write
<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64
: MachO::MH_MAGIC
);
191 W
.write
<uint32_t>(TargetObjectWriter
->getCPUType());
193 uint32_t Cpusubtype
= TargetObjectWriter
->getCPUSubtype();
195 // Promote arm64e subtypes to always be ptrauth-ABI-versioned, at version 0.
196 // We never need to emit unversioned binaries.
197 // And we don't support arbitrary ABI versions (or the kernel flag) yet.
198 if (TargetObjectWriter
->getCPUType() == MachO::CPU_TYPE_ARM64
&&
199 Cpusubtype
== MachO::CPU_SUBTYPE_ARM64E
)
200 Cpusubtype
= MachO::CPU_SUBTYPE_ARM64E_WITH_PTRAUTH_VERSION(
201 /*PtrAuthABIVersion=*/0, /*PtrAuthKernelABIVersion=*/false);
203 W
.write
<uint32_t>(Cpusubtype
);
205 W
.write
<uint32_t>(Type
);
206 W
.write
<uint32_t>(NumLoadCommands
);
207 W
.write
<uint32_t>(LoadCommandsSize
);
208 W
.write
<uint32_t>(Flags
);
210 W
.write
<uint32_t>(0); // reserved
212 assert(W
.OS
.tell() - Start
== (is64Bit() ? sizeof(MachO::mach_header_64
)
213 : sizeof(MachO::mach_header
)));
216 void MachObjectWriter::writeWithPadding(StringRef Str
, uint64_t Size
) {
217 assert(Size
>= Str
.size());
219 W
.OS
.write_zeros(Size
- Str
.size());
222 /// writeSegmentLoadCommand - Write a segment load command.
224 /// \param NumSections The number of sections in this segment.
225 /// \param SectionDataSize The total size of the sections.
226 void MachObjectWriter::writeSegmentLoadCommand(
227 StringRef Name
, unsigned NumSections
, uint64_t VMAddr
, uint64_t VMSize
,
228 uint64_t SectionDataStartOffset
, uint64_t SectionDataSize
, uint32_t MaxProt
,
230 // struct segment_command (56 bytes) or
231 // struct segment_command_64 (72 bytes)
233 uint64_t Start
= W
.OS
.tell();
236 unsigned SegmentLoadCommandSize
=
237 is64Bit() ? sizeof(MachO::segment_command_64
):
238 sizeof(MachO::segment_command
);
239 W
.write
<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64
: MachO::LC_SEGMENT
);
240 W
.write
<uint32_t>(SegmentLoadCommandSize
+
241 NumSections
* (is64Bit() ? sizeof(MachO::section_64
) :
242 sizeof(MachO::section
)));
244 writeWithPadding(Name
, 16);
246 W
.write
<uint64_t>(VMAddr
); // vmaddr
247 W
.write
<uint64_t>(VMSize
); // vmsize
248 W
.write
<uint64_t>(SectionDataStartOffset
); // file offset
249 W
.write
<uint64_t>(SectionDataSize
); // file size
251 W
.write
<uint32_t>(VMAddr
); // vmaddr
252 W
.write
<uint32_t>(VMSize
); // vmsize
253 W
.write
<uint32_t>(SectionDataStartOffset
); // file offset
254 W
.write
<uint32_t>(SectionDataSize
); // file size
257 W
.write
<uint32_t>(MaxProt
);
259 W
.write
<uint32_t>(InitProt
);
260 W
.write
<uint32_t>(NumSections
);
261 W
.write
<uint32_t>(0); // flags
263 assert(W
.OS
.tell() - Start
== SegmentLoadCommandSize
);
266 void MachObjectWriter::writeSection(const MCAssembler
&Asm
,
267 const MCSection
&Sec
, uint64_t VMAddr
,
268 uint64_t FileOffset
, unsigned Flags
,
269 uint64_t RelocationsStart
,
270 unsigned NumRelocations
) {
271 uint64_t SectionSize
= Asm
.getSectionAddressSize(Sec
);
272 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(Sec
);
274 // The offset is unused for virtual sections.
275 if (Section
.isVirtualSection()) {
276 assert(Asm
.getSectionFileSize(Sec
) == 0 && "Invalid file size!");
280 // struct section (68 bytes) or
281 // struct section_64 (80 bytes)
283 uint64_t Start
= W
.OS
.tell();
286 writeWithPadding(Section
.getName(), 16);
287 writeWithPadding(Section
.getSegmentName(), 16);
289 W
.write
<uint64_t>(VMAddr
); // address
290 W
.write
<uint64_t>(SectionSize
); // size
292 W
.write
<uint32_t>(VMAddr
); // address
293 W
.write
<uint32_t>(SectionSize
); // size
295 assert(isUInt
<32>(FileOffset
) && "Cannot encode offset of section");
296 W
.write
<uint32_t>(FileOffset
);
298 W
.write
<uint32_t>(Log2(Section
.getAlign()));
299 assert((!NumRelocations
|| isUInt
<32>(RelocationsStart
)) &&
300 "Cannot encode offset of relocations");
301 W
.write
<uint32_t>(NumRelocations
? RelocationsStart
: 0);
302 W
.write
<uint32_t>(NumRelocations
);
303 W
.write
<uint32_t>(Flags
);
304 W
.write
<uint32_t>(IndirectSymBase
.lookup(&Sec
)); // reserved1
305 W
.write
<uint32_t>(Section
.getStubSize()); // reserved2
307 W
.write
<uint32_t>(0); // reserved3
309 assert(W
.OS
.tell() - Start
==
310 (is64Bit() ? sizeof(MachO::section_64
) : sizeof(MachO::section
)));
313 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset
,
315 uint32_t StringTableOffset
,
316 uint32_t StringTableSize
) {
317 // struct symtab_command (24 bytes)
319 uint64_t Start
= W
.OS
.tell();
322 W
.write
<uint32_t>(MachO::LC_SYMTAB
);
323 W
.write
<uint32_t>(sizeof(MachO::symtab_command
));
324 W
.write
<uint32_t>(SymbolOffset
);
325 W
.write
<uint32_t>(NumSymbols
);
326 W
.write
<uint32_t>(StringTableOffset
);
327 W
.write
<uint32_t>(StringTableSize
);
329 assert(W
.OS
.tell() - Start
== sizeof(MachO::symtab_command
));
332 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol
,
333 uint32_t NumLocalSymbols
,
334 uint32_t FirstExternalSymbol
,
335 uint32_t NumExternalSymbols
,
336 uint32_t FirstUndefinedSymbol
,
337 uint32_t NumUndefinedSymbols
,
338 uint32_t IndirectSymbolOffset
,
339 uint32_t NumIndirectSymbols
) {
340 // struct dysymtab_command (80 bytes)
342 uint64_t Start
= W
.OS
.tell();
345 W
.write
<uint32_t>(MachO::LC_DYSYMTAB
);
346 W
.write
<uint32_t>(sizeof(MachO::dysymtab_command
));
347 W
.write
<uint32_t>(FirstLocalSymbol
);
348 W
.write
<uint32_t>(NumLocalSymbols
);
349 W
.write
<uint32_t>(FirstExternalSymbol
);
350 W
.write
<uint32_t>(NumExternalSymbols
);
351 W
.write
<uint32_t>(FirstUndefinedSymbol
);
352 W
.write
<uint32_t>(NumUndefinedSymbols
);
353 W
.write
<uint32_t>(0); // tocoff
354 W
.write
<uint32_t>(0); // ntoc
355 W
.write
<uint32_t>(0); // modtaboff
356 W
.write
<uint32_t>(0); // nmodtab
357 W
.write
<uint32_t>(0); // extrefsymoff
358 W
.write
<uint32_t>(0); // nextrefsyms
359 W
.write
<uint32_t>(IndirectSymbolOffset
);
360 W
.write
<uint32_t>(NumIndirectSymbols
);
361 W
.write
<uint32_t>(0); // extreloff
362 W
.write
<uint32_t>(0); // nextrel
363 W
.write
<uint32_t>(0); // locreloff
364 W
.write
<uint32_t>(0); // nlocrel
366 assert(W
.OS
.tell() - Start
== sizeof(MachO::dysymtab_command
));
369 MachObjectWriter::MachSymbolData
*
370 MachObjectWriter::findSymbolData(const MCSymbol
&Sym
) {
371 for (auto *SymbolData
:
372 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
373 for (MachSymbolData
&Entry
: *SymbolData
)
374 if (Entry
.Symbol
== &Sym
)
380 const MCSymbol
&MachObjectWriter::findAliasedSymbol(const MCSymbol
&Sym
) const {
381 const MCSymbol
*S
= &Sym
;
382 while (S
->isVariable()) {
383 const MCExpr
*Value
= S
->getVariableValue();
384 const auto *Ref
= dyn_cast
<MCSymbolRefExpr
>(Value
);
387 S
= &Ref
->getSymbol();
392 void MachObjectWriter::writeNlist(MachSymbolData
&MSD
, const MCAssembler
&Asm
) {
393 const MCSymbol
*Symbol
= MSD
.Symbol
;
394 const auto &Data
= cast
<MCSymbolMachO
>(*Symbol
);
395 const MCSymbol
*AliasedSymbol
= &findAliasedSymbol(*Symbol
);
396 uint8_t SectionIndex
= MSD
.SectionIndex
;
398 uint64_t Address
= 0;
399 bool IsAlias
= Symbol
!= AliasedSymbol
;
401 const MCSymbol
&OrigSymbol
= *Symbol
;
402 MachSymbolData
*AliaseeInfo
;
404 AliaseeInfo
= findSymbolData(*AliasedSymbol
);
406 SectionIndex
= AliaseeInfo
->SectionIndex
;
407 Symbol
= AliasedSymbol
;
408 // FIXME: Should this update Data as well?
411 // Set the N_TYPE bits. See <mach-o/nlist.h>.
413 // FIXME: Are the prebound or indirect fields possible here?
414 if (IsAlias
&& Symbol
->isUndefined())
415 Type
= MachO::N_INDR
;
416 else if (Symbol
->isUndefined())
417 Type
= MachO::N_UNDF
;
418 else if (Symbol
->isAbsolute())
421 Type
= MachO::N_SECT
;
423 // FIXME: Set STAB bits.
425 if (Data
.isPrivateExtern())
426 Type
|= MachO::N_PEXT
;
429 if (Data
.isExternal() || (!IsAlias
&& Symbol
->isUndefined()))
430 Type
|= MachO::N_EXT
;
432 // Compute the symbol address.
433 if (IsAlias
&& Symbol
->isUndefined())
434 Address
= AliaseeInfo
->StringIndex
;
435 else if (Symbol
->isDefined())
436 Address
= getSymbolAddress(OrigSymbol
, Asm
);
437 else if (Symbol
->isCommon()) {
438 // Common symbols are encoded with the size in the address
439 // field, and their alignment in the flags.
440 Address
= Symbol
->getCommonSize();
443 // struct nlist (12 bytes)
445 W
.write
<uint32_t>(MSD
.StringIndex
);
447 W
.OS
<< char(SectionIndex
);
449 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
451 bool EncodeAsAltEntry
=
452 IsAlias
&& cast
<MCSymbolMachO
>(OrigSymbol
).isAltEntry();
453 W
.write
<uint16_t>(cast
<MCSymbolMachO
>(Symbol
)->getEncodedFlags(EncodeAsAltEntry
));
455 W
.write
<uint64_t>(Address
);
457 W
.write
<uint32_t>(Address
);
460 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type
,
463 uint64_t Start
= W
.OS
.tell();
466 W
.write
<uint32_t>(Type
);
467 W
.write
<uint32_t>(sizeof(MachO::linkedit_data_command
));
468 W
.write
<uint32_t>(DataOffset
);
469 W
.write
<uint32_t>(DataSize
);
471 assert(W
.OS
.tell() - Start
== sizeof(MachO::linkedit_data_command
));
474 static unsigned ComputeLinkerOptionsLoadCommandSize(
475 const std::vector
<std::string
> &Options
, bool is64Bit
)
477 unsigned Size
= sizeof(MachO::linker_option_command
);
478 for (const std::string
&Option
: Options
)
479 Size
+= Option
.size() + 1;
480 return alignTo(Size
, is64Bit
? 8 : 4);
483 void MachObjectWriter::writeLinkerOptionsLoadCommand(
484 const std::vector
<std::string
> &Options
)
486 unsigned Size
= ComputeLinkerOptionsLoadCommandSize(Options
, is64Bit());
487 uint64_t Start
= W
.OS
.tell();
490 W
.write
<uint32_t>(MachO::LC_LINKER_OPTION
);
491 W
.write
<uint32_t>(Size
);
492 W
.write
<uint32_t>(Options
.size());
493 uint64_t BytesWritten
= sizeof(MachO::linker_option_command
);
494 for (const std::string
&Option
: Options
) {
495 // Write each string, including the null byte.
496 W
.OS
<< Option
<< '\0';
497 BytesWritten
+= Option
.size() + 1;
500 // Pad to a multiple of the pointer size.
502 offsetToAlignment(BytesWritten
, is64Bit() ? Align(8) : Align(4)));
504 assert(W
.OS
.tell() - Start
== Size
);
507 static bool isFixupTargetValid(const MCValue
&Target
) {
508 // Target is (LHS - RHS + cst).
509 // We don't support the form where LHS is null: -RHS + cst
510 if (!Target
.getSymA() && Target
.getSymB())
515 void MachObjectWriter::recordRelocation(MCAssembler
&Asm
,
516 const MCFragment
*Fragment
,
517 const MCFixup
&Fixup
, MCValue Target
,
518 uint64_t &FixedValue
) {
519 if (!isFixupTargetValid(Target
)) {
520 Asm
.getContext().reportError(Fixup
.getLoc(),
521 "unsupported relocation expression");
525 TargetObjectWriter
->recordRelocation(this, Asm
, Fragment
, Fixup
, Target
,
529 void MachObjectWriter::bindIndirectSymbols(MCAssembler
&Asm
) {
530 // This is the point where 'as' creates actual symbols for indirect symbols
531 // (in the following two passes). It would be easier for us to do this sooner
532 // when we see the attribute, but that makes getting the order in the symbol
533 // table much more complicated than it is worth.
535 // FIXME: Revisit this when the dust settles.
537 // Report errors for use of .indirect_symbol not in a symbol pointer section
539 for (IndirectSymbolData
&ISD
: IndirectSymbols
) {
540 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*ISD
.Section
);
542 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
543 Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
544 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
&&
545 Section
.getType() != MachO::S_SYMBOL_STUBS
) {
546 MCSymbol
&Symbol
= *ISD
.Symbol
;
547 report_fatal_error("indirect symbol '" + Symbol
.getName() +
548 "' not in a symbol pointer or stub section");
552 // Bind non-lazy symbol pointers first.
553 for (auto [IndirectIndex
, ISD
] : enumerate(IndirectSymbols
)) {
554 const auto &Section
= cast
<MCSectionMachO
>(*ISD
.Section
);
556 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
557 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
)
560 // Initialize the section indirect symbol base, if necessary.
561 IndirectSymBase
.insert(std::make_pair(ISD
.Section
, IndirectIndex
));
563 Asm
.registerSymbol(*ISD
.Symbol
);
566 // Then lazy symbol pointers and symbol stubs.
567 for (auto [IndirectIndex
, ISD
] : enumerate(IndirectSymbols
)) {
568 const auto &Section
= cast
<MCSectionMachO
>(*ISD
.Section
);
570 if (Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
571 Section
.getType() != MachO::S_SYMBOL_STUBS
)
574 // Initialize the section indirect symbol base, if necessary.
575 IndirectSymBase
.insert(std::make_pair(ISD
.Section
, IndirectIndex
));
577 // Set the symbol type to undefined lazy, but only on construction.
579 // FIXME: Do not hardcode.
580 if (Asm
.registerSymbol(*ISD
.Symbol
))
581 cast
<MCSymbolMachO
>(ISD
.Symbol
)->setReferenceTypeUndefinedLazy(true);
585 /// computeSymbolTable - Compute the symbol table data
586 void MachObjectWriter::computeSymbolTable(
587 MCAssembler
&Asm
, std::vector
<MachSymbolData
> &LocalSymbolData
,
588 std::vector
<MachSymbolData
> &ExternalSymbolData
,
589 std::vector
<MachSymbolData
> &UndefinedSymbolData
) {
590 // Build section lookup table.
591 DenseMap
<const MCSection
*, uint8_t> SectionIndexMap
;
593 for (MCSection
&Sec
: Asm
)
594 SectionIndexMap
[&Sec
] = Index
++;
595 assert(Index
<= 256 && "Too many sections!");
597 // Build the string table.
598 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
599 if (!cast
<MCSymbolMachO
>(Symbol
).isSymbolLinkerVisible())
602 StringTable
.add(Symbol
.getName());
604 StringTable
.finalize();
606 // Build the symbol arrays but only for non-local symbols.
608 // The particular order that we collect and then sort the symbols is chosen to
609 // match 'as'. Even though it doesn't matter for correctness, this is
610 // important for letting us diff .o files.
611 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
612 // Ignore non-linker visible symbols.
613 if (!cast
<MCSymbolMachO
>(Symbol
).isSymbolLinkerVisible())
616 if (!Symbol
.isExternal() && !Symbol
.isUndefined())
620 MSD
.Symbol
= &Symbol
;
621 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
623 if (Symbol
.isUndefined()) {
624 MSD
.SectionIndex
= 0;
625 UndefinedSymbolData
.push_back(MSD
);
626 } else if (Symbol
.isAbsolute()) {
627 MSD
.SectionIndex
= 0;
628 ExternalSymbolData
.push_back(MSD
);
630 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
631 assert(MSD
.SectionIndex
&& "Invalid section index!");
632 ExternalSymbolData
.push_back(MSD
);
636 // Now add the data for local symbols.
637 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
638 // Ignore non-linker visible symbols.
639 if (!cast
<MCSymbolMachO
>(Symbol
).isSymbolLinkerVisible())
642 if (Symbol
.isExternal() || Symbol
.isUndefined())
646 MSD
.Symbol
= &Symbol
;
647 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
649 if (Symbol
.isAbsolute()) {
650 MSD
.SectionIndex
= 0;
651 LocalSymbolData
.push_back(MSD
);
653 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
654 assert(MSD
.SectionIndex
&& "Invalid section index!");
655 LocalSymbolData
.push_back(MSD
);
659 // External and undefined symbols are required to be in lexicographic order.
660 llvm::sort(ExternalSymbolData
);
661 llvm::sort(UndefinedSymbolData
);
663 // Set the symbol indices.
665 for (auto *SymbolData
:
666 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
667 for (MachSymbolData
&Entry
: *SymbolData
)
668 Entry
.Symbol
->setIndex(Index
++);
670 for (const MCSection
&Section
: Asm
) {
671 for (RelAndSymbol
&Rel
: Relocations
[&Section
]) {
675 // Set the Index and the IsExtern bit.
676 unsigned Index
= Rel
.Sym
->getIndex();
677 assert(isInt
<24>(Index
));
678 if (W
.Endian
== llvm::endianness::little
)
679 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& (~0U << 24)) | Index
| (1 << 27);
681 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& 0xff) | Index
<< 8 | (1 << 4);
686 void MachObjectWriter::computeSectionAddresses(const MCAssembler
&Asm
) {
687 // Assign layout order indices to sections.
689 // Compute the section layout order. Virtual sections must go last.
690 for (MCSection
&Sec
: Asm
) {
691 if (!Sec
.isVirtualSection()) {
692 SectionOrder
.push_back(&Sec
);
693 cast
<MCSectionMachO
>(Sec
).setLayoutOrder(i
++);
696 for (MCSection
&Sec
: Asm
) {
697 if (Sec
.isVirtualSection()) {
698 SectionOrder
.push_back(&Sec
);
699 cast
<MCSectionMachO
>(Sec
).setLayoutOrder(i
++);
703 uint64_t StartAddress
= 0;
704 for (const MCSection
*Sec
: SectionOrder
) {
705 StartAddress
= alignTo(StartAddress
, Sec
->getAlign());
706 SectionAddress
[Sec
] = StartAddress
;
707 StartAddress
+= Asm
.getSectionAddressSize(*Sec
);
709 // Explicitly pad the section to match the alignment requirements of the
710 // following one. This is for 'gas' compatibility, it shouldn't
711 /// strictly be necessary.
712 StartAddress
+= getPaddingSize(Asm
, Sec
);
716 void MachObjectWriter::executePostLayoutBinding(MCAssembler
&Asm
) {
717 computeSectionAddresses(Asm
);
719 // Create symbol data for any indirect symbols.
720 bindIndirectSymbols(Asm
);
723 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
724 const MCAssembler
&Asm
, const MCSymbol
&SymA
, const MCFragment
&FB
,
725 bool InSet
, bool IsPCRel
) const {
729 // The effective address is
730 // addr(atom(A)) + offset(A)
731 // - addr(atom(B)) - offset(B)
732 // and the offsets are not relocatable, so the fixup is fully resolved when
733 // addr(atom(A)) - addr(atom(B)) == 0.
734 const MCSymbol
&SA
= findAliasedSymbol(SymA
);
735 const MCSection
&SecA
= SA
.getSection();
736 const MCSection
&SecB
= *FB
.getParent();
739 // The simple (Darwin, except on x86_64) way of dealing with this was to
740 // assume that any reference to a temporary symbol *must* be a temporary
741 // symbol in the same atom, unless the sections differ. Therefore, any PCrel
742 // relocation to a temporary symbol (in the same section) is fully
743 // resolved. This also works in conjunction with absolutized .set, which
744 // requires the compiler to use .set to absolutize the differences between
745 // symbols which the compiler knows to be assembly time constants, so we
746 // don't need to worry about considering symbol differences fully resolved.
748 // If the file isn't using sub-sections-via-symbols, we can make the
749 // same assumptions about any symbol that we normally make about
752 bool hasReliableSymbolDifference
= isX86_64();
753 if (!hasReliableSymbolDifference
) {
754 if (!SA
.isInSection() || &SecA
!= &SecB
||
755 (!SA
.isTemporary() && FB
.getAtom() != SA
.getFragment()->getAtom() &&
756 SubsectionsViaSymbols
))
762 // If they are not in the same section, we can't compute the diff.
766 // If the atoms are the same, they are guaranteed to have the same address.
767 return SA
.getFragment()->getAtom() == FB
.getAtom();
770 static MachO::LoadCommandType
getLCFromMCVM(MCVersionMinType Type
) {
772 case MCVM_OSXVersionMin
: return MachO::LC_VERSION_MIN_MACOSX
;
773 case MCVM_IOSVersionMin
: return MachO::LC_VERSION_MIN_IPHONEOS
;
774 case MCVM_TvOSVersionMin
: return MachO::LC_VERSION_MIN_TVOS
;
775 case MCVM_WatchOSVersionMin
: return MachO::LC_VERSION_MIN_WATCHOS
;
777 llvm_unreachable("Invalid mc version min type");
780 void MachObjectWriter::populateAddrSigSection(MCAssembler
&Asm
) {
781 MCSection
*AddrSigSection
=
782 Asm
.getContext().getObjectFileInfo()->getAddrSigSection();
783 unsigned Log2Size
= is64Bit() ? 3 : 2;
784 for (const MCSymbol
*S
: getAddrsigSyms()) {
785 if (!S
->isRegistered())
787 MachO::any_relocation_info MRE
;
789 MRE
.r_word1
= (Log2Size
<< 25) | (MachO::GENERIC_RELOC_VANILLA
<< 28);
790 addRelocation(S
, AddrSigSection
, MRE
);
794 uint64_t MachObjectWriter::writeObject(MCAssembler
&Asm
) {
795 uint64_t StartOffset
= W
.OS
.tell();
796 auto NumBytesWritten
= [&] { return W
.OS
.tell() - StartOffset
; };
798 populateAddrSigSection(Asm
);
800 // Compute symbol table information and bind symbol indices.
801 computeSymbolTable(Asm
, LocalSymbolData
, ExternalSymbolData
,
802 UndefinedSymbolData
);
804 if (!CGProfile
.empty()) {
805 MCSection
*CGProfileSection
= Asm
.getContext().getMachOSection(
806 "__LLVM", "__cg_profile", 0, SectionKind::getMetadata());
807 auto &Frag
= cast
<MCDataFragment
>(*CGProfileSection
->begin());
808 Frag
.getContents().clear();
809 raw_svector_ostream
OS(Frag
.getContents());
810 for (const MCObjectWriter::CGProfileEntry
&CGPE
: CGProfile
) {
811 uint32_t FromIndex
= CGPE
.From
->getSymbol().getIndex();
812 uint32_t ToIndex
= CGPE
.To
->getSymbol().getIndex();
813 support::endian::write(OS
, FromIndex
, W
.Endian
);
814 support::endian::write(OS
, ToIndex
, W
.Endian
);
815 support::endian::write(OS
, CGPE
.Count
, W
.Endian
);
819 unsigned NumSections
= Asm
.end() - Asm
.begin();
821 // The section data starts after the header, the segment load command (and
822 // section headers) and the symbol table.
823 unsigned NumLoadCommands
= 1;
824 uint64_t LoadCommandsSize
= is64Bit() ?
825 sizeof(MachO::segment_command_64
) + NumSections
* sizeof(MachO::section_64
):
826 sizeof(MachO::segment_command
) + NumSections
* sizeof(MachO::section
);
828 // Add the deployment target version info load command size, if used.
829 if (VersionInfo
.Major
!= 0) {
831 if (VersionInfo
.EmitBuildVersion
)
832 LoadCommandsSize
+= sizeof(MachO::build_version_command
);
834 LoadCommandsSize
+= sizeof(MachO::version_min_command
);
837 // Add the target variant version info load command size, if used.
838 if (TargetVariantVersionInfo
.Major
!= 0) {
840 assert(TargetVariantVersionInfo
.EmitBuildVersion
&&
841 "target variant should use build version");
842 LoadCommandsSize
+= sizeof(MachO::build_version_command
);
845 // Add the data-in-code load command size, if used.
846 unsigned NumDataRegions
= DataRegions
.size();
847 if (NumDataRegions
) {
849 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
852 // Add the loh load command size, if used.
853 uint64_t LOHRawSize
= LOHContainer
.getEmitSize(Asm
, *this);
854 uint64_t LOHSize
= alignTo(LOHRawSize
, is64Bit() ? 8 : 4);
857 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
860 // Add the symbol table load command sizes, if used.
861 unsigned NumSymbols
= LocalSymbolData
.size() + ExternalSymbolData
.size() +
862 UndefinedSymbolData
.size();
864 NumLoadCommands
+= 2;
865 LoadCommandsSize
+= (sizeof(MachO::symtab_command
) +
866 sizeof(MachO::dysymtab_command
));
869 // Add the linker option load commands sizes.
870 for (const auto &Option
: LinkerOptions
) {
872 LoadCommandsSize
+= ComputeLinkerOptionsLoadCommandSize(Option
, is64Bit());
875 // Compute the total size of the section data, as well as its file size and vm
877 uint64_t SectionDataStart
= (is64Bit() ? sizeof(MachO::mach_header_64
) :
878 sizeof(MachO::mach_header
)) + LoadCommandsSize
;
879 uint64_t SectionDataSize
= 0;
880 uint64_t SectionDataFileSize
= 0;
882 for (const MCSection
&Sec
: Asm
) {
883 uint64_t Address
= getSectionAddress(&Sec
);
884 uint64_t Size
= Asm
.getSectionAddressSize(Sec
);
885 uint64_t FileSize
= Asm
.getSectionFileSize(Sec
);
886 FileSize
+= getPaddingSize(Asm
, &Sec
);
888 VMSize
= std::max(VMSize
, Address
+ Size
);
890 if (Sec
.isVirtualSection())
893 SectionDataSize
= std::max(SectionDataSize
, Address
+ Size
);
894 SectionDataFileSize
= std::max(SectionDataFileSize
, Address
+ FileSize
);
897 // The section data is padded to pointer size bytes.
899 // FIXME: Is this machine dependent?
900 unsigned SectionDataPadding
=
901 offsetToAlignment(SectionDataFileSize
, is64Bit() ? Align(8) : Align(4));
902 SectionDataFileSize
+= SectionDataPadding
;
904 // Write the prolog, starting with the header and load command...
905 writeHeader(MachO::MH_OBJECT
, NumLoadCommands
, LoadCommandsSize
,
906 SubsectionsViaSymbols
);
908 MachO::VM_PROT_READ
| MachO::VM_PROT_WRITE
| MachO::VM_PROT_EXECUTE
;
909 writeSegmentLoadCommand("", NumSections
, 0, VMSize
, SectionDataStart
,
910 SectionDataSize
, Prot
, Prot
);
912 // ... and then the section headers.
913 uint64_t RelocTableEnd
= SectionDataStart
+ SectionDataFileSize
;
914 for (const MCSection
&Section
: Asm
) {
915 const auto &Sec
= cast
<MCSectionMachO
>(Section
);
916 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
917 unsigned NumRelocs
= Relocs
.size();
918 uint64_t SectionStart
= SectionDataStart
+ getSectionAddress(&Sec
);
919 unsigned Flags
= Sec
.getTypeAndAttributes();
920 if (Sec
.hasInstructions())
921 Flags
|= MachO::S_ATTR_SOME_INSTRUCTIONS
;
922 if (!cast
<MCSectionMachO
>(Sec
).isVirtualSection() &&
923 !isUInt
<32>(SectionStart
)) {
924 Asm
.getContext().reportError(
925 SMLoc(), "cannot encode offset of section; object file too large");
926 return NumBytesWritten();
928 if (NumRelocs
&& !isUInt
<32>(RelocTableEnd
)) {
929 Asm
.getContext().reportError(
931 "cannot encode offset of relocations; object file too large");
932 return NumBytesWritten();
934 writeSection(Asm
, Sec
, getSectionAddress(&Sec
), SectionStart
, Flags
,
935 RelocTableEnd
, NumRelocs
);
936 RelocTableEnd
+= NumRelocs
* sizeof(MachO::any_relocation_info
);
939 // Write out the deployment target information, if it's available.
940 auto EmitDeploymentTargetVersion
=
941 [&](const VersionInfoType
&VersionInfo
) {
942 auto EncodeVersion
= [](VersionTuple V
) -> uint32_t {
943 assert(!V
.empty() && "empty version");
944 unsigned Update
= V
.getSubminor().value_or(0);
945 unsigned Minor
= V
.getMinor().value_or(0);
946 assert(Update
< 256 && "unencodable update target version");
947 assert(Minor
< 256 && "unencodable minor target version");
948 assert(V
.getMajor() < 65536 && "unencodable major target version");
949 return Update
| (Minor
<< 8) | (V
.getMajor() << 16);
951 uint32_t EncodedVersion
= EncodeVersion(VersionTuple(
952 VersionInfo
.Major
, VersionInfo
.Minor
, VersionInfo
.Update
));
953 uint32_t SDKVersion
= !VersionInfo
.SDKVersion
.empty()
954 ? EncodeVersion(VersionInfo
.SDKVersion
)
956 if (VersionInfo
.EmitBuildVersion
) {
957 // FIXME: Currently empty tools. Add clang version in the future.
958 W
.write
<uint32_t>(MachO::LC_BUILD_VERSION
);
959 W
.write
<uint32_t>(sizeof(MachO::build_version_command
));
960 W
.write
<uint32_t>(VersionInfo
.TypeOrPlatform
.Platform
);
961 W
.write
<uint32_t>(EncodedVersion
);
962 W
.write
<uint32_t>(SDKVersion
);
963 W
.write
<uint32_t>(0); // Empty tools list.
965 MachO::LoadCommandType LCType
=
966 getLCFromMCVM(VersionInfo
.TypeOrPlatform
.Type
);
967 W
.write
<uint32_t>(LCType
);
968 W
.write
<uint32_t>(sizeof(MachO::version_min_command
));
969 W
.write
<uint32_t>(EncodedVersion
);
970 W
.write
<uint32_t>(SDKVersion
);
973 if (VersionInfo
.Major
!= 0)
974 EmitDeploymentTargetVersion(VersionInfo
);
975 if (TargetVariantVersionInfo
.Major
!= 0)
976 EmitDeploymentTargetVersion(TargetVariantVersionInfo
);
978 // Write the data-in-code load command, if used.
979 uint64_t DataInCodeTableEnd
= RelocTableEnd
+ NumDataRegions
* 8;
980 if (NumDataRegions
) {
981 uint64_t DataRegionsOffset
= RelocTableEnd
;
982 uint64_t DataRegionsSize
= NumDataRegions
* 8;
983 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE
, DataRegionsOffset
,
987 // Write the loh load command, if used.
988 uint64_t LOHTableEnd
= DataInCodeTableEnd
+ LOHSize
;
990 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT
,
991 DataInCodeTableEnd
, LOHSize
);
993 // Write the symbol table load command, if used.
995 unsigned FirstLocalSymbol
= 0;
996 unsigned NumLocalSymbols
= LocalSymbolData
.size();
997 unsigned FirstExternalSymbol
= FirstLocalSymbol
+ NumLocalSymbols
;
998 unsigned NumExternalSymbols
= ExternalSymbolData
.size();
999 unsigned FirstUndefinedSymbol
= FirstExternalSymbol
+ NumExternalSymbols
;
1000 unsigned NumUndefinedSymbols
= UndefinedSymbolData
.size();
1001 unsigned NumIndirectSymbols
= IndirectSymbols
.size();
1002 unsigned NumSymTabSymbols
=
1003 NumLocalSymbols
+ NumExternalSymbols
+ NumUndefinedSymbols
;
1004 uint64_t IndirectSymbolSize
= NumIndirectSymbols
* 4;
1005 uint64_t IndirectSymbolOffset
= 0;
1007 // If used, the indirect symbols are written after the section data.
1008 if (NumIndirectSymbols
)
1009 IndirectSymbolOffset
= LOHTableEnd
;
1011 // The symbol table is written after the indirect symbol data.
1012 uint64_t SymbolTableOffset
= LOHTableEnd
+ IndirectSymbolSize
;
1014 // The string table is written after symbol table.
1015 uint64_t StringTableOffset
=
1016 SymbolTableOffset
+ NumSymTabSymbols
* (is64Bit() ?
1017 sizeof(MachO::nlist_64
) :
1018 sizeof(MachO::nlist
));
1019 writeSymtabLoadCommand(SymbolTableOffset
, NumSymTabSymbols
,
1020 StringTableOffset
, StringTable
.getSize());
1022 writeDysymtabLoadCommand(FirstLocalSymbol
, NumLocalSymbols
,
1023 FirstExternalSymbol
, NumExternalSymbols
,
1024 FirstUndefinedSymbol
, NumUndefinedSymbols
,
1025 IndirectSymbolOffset
, NumIndirectSymbols
);
1028 // Write the linker options load commands.
1029 for (const auto &Option
: LinkerOptions
)
1030 writeLinkerOptionsLoadCommand(Option
);
1032 // Write the actual section data.
1033 for (const MCSection
&Sec
: Asm
) {
1034 Asm
.writeSectionData(W
.OS
, &Sec
);
1036 uint64_t Pad
= getPaddingSize(Asm
, &Sec
);
1037 W
.OS
.write_zeros(Pad
);
1040 // Write the extra padding.
1041 W
.OS
.write_zeros(SectionDataPadding
);
1043 // Write the relocation entries.
1044 for (const MCSection
&Sec
: Asm
) {
1045 // Write the section relocation entries, in reverse order to match 'as'
1046 // (approximately, the exact algorithm is more complicated than this).
1047 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
1048 for (const RelAndSymbol
&Rel
: llvm::reverse(Relocs
)) {
1049 W
.write
<uint32_t>(Rel
.MRE
.r_word0
);
1050 W
.write
<uint32_t>(Rel
.MRE
.r_word1
);
1054 // Write out the data-in-code region payload, if there is one.
1055 for (DataRegionData Data
: DataRegions
) {
1056 uint64_t Start
= getSymbolAddress(*Data
.Start
, Asm
);
1059 End
= getSymbolAddress(*Data
.End
, Asm
);
1061 report_fatal_error("Data region not terminated");
1063 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data
.Kind
1064 << " start: " << Start
<< "(" << Data
.Start
->getName()
1065 << ")" << " end: " << End
<< "(" << Data
.End
->getName()
1066 << ")" << " size: " << End
- Start
<< "\n");
1067 W
.write
<uint32_t>(Start
);
1068 W
.write
<uint16_t>(End
- Start
);
1069 W
.write
<uint16_t>(Data
.Kind
);
1072 // Write out the loh commands, if there is one.
1075 unsigned Start
= W
.OS
.tell();
1077 LOHContainer
.emit(Asm
, *this);
1078 // Pad to a multiple of the pointer size.
1080 offsetToAlignment(LOHRawSize
, is64Bit() ? Align(8) : Align(4)));
1081 assert(W
.OS
.tell() - Start
== LOHSize
);
1084 // Write the symbol table data, if used.
1086 // Write the indirect symbol entries.
1087 for (auto &ISD
: IndirectSymbols
) {
1088 // Indirect symbols in the non-lazy symbol pointer section have some
1089 // special handling.
1090 const MCSectionMachO
&Section
=
1091 static_cast<const MCSectionMachO
&>(*ISD
.Section
);
1092 if (Section
.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS
) {
1093 // If this symbol is defined and internal, mark it as such.
1094 if (ISD
.Symbol
->isDefined() && !ISD
.Symbol
->isExternal()) {
1095 uint32_t Flags
= MachO::INDIRECT_SYMBOL_LOCAL
;
1096 if (ISD
.Symbol
->isAbsolute())
1097 Flags
|= MachO::INDIRECT_SYMBOL_ABS
;
1098 W
.write
<uint32_t>(Flags
);
1103 W
.write
<uint32_t>(ISD
.Symbol
->getIndex());
1106 // FIXME: Check that offsets match computed ones.
1108 // Write the symbol table entries.
1109 for (auto *SymbolData
:
1110 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
1111 for (MachSymbolData
&Entry
: *SymbolData
)
1112 writeNlist(Entry
, Asm
);
1114 // Write the string table.
1115 StringTable
.write(W
.OS
);
1118 return NumBytesWritten();