1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCDirectives.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCFixupKindInfo.h"
19 #include "llvm/MC/MCFragment.h"
20 #include "llvm/MC/MCMachObjectWriter.h"
21 #include "llvm/MC/MCObjectWriter.h"
22 #include "llvm/MC/MCSection.h"
23 #include "llvm/MC/MCSectionMachO.h"
24 #include "llvm/MC/MCSymbol.h"
25 #include "llvm/MC/MCSymbolMachO.h"
26 #include "llvm/MC/MCValue.h"
27 #include "llvm/Support/Casting.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Support/raw_ostream.h"
41 #define DEBUG_TYPE "mc"
43 void MachObjectWriter::reset() {
45 IndirectSymBase
.clear();
47 LocalSymbolData
.clear();
48 ExternalSymbolData
.clear();
49 UndefinedSymbolData
.clear();
50 MCObjectWriter::reset();
53 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol
&S
) {
54 // Undefined symbols are always extern.
58 // References to weak definitions require external relocation entries; the
59 // definition may not always be the one in the same object file.
60 if (cast
<MCSymbolMachO
>(S
).isWeakDefinition())
63 // Otherwise, we can use an internal relocation.
67 bool MachObjectWriter::
68 MachSymbolData::operator<(const MachSymbolData
&RHS
) const {
69 return Symbol
->getName() < RHS
.Symbol
->getName();
72 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler
&Asm
, unsigned Kind
) {
73 const MCFixupKindInfo
&FKI
= Asm
.getBackend().getFixupKindInfo(
76 return FKI
.Flags
& MCFixupKindInfo::FKF_IsPCRel
;
79 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment
*Fragment
,
80 const MCAsmLayout
&Layout
) const {
81 return getSectionAddress(Fragment
->getParent()) +
82 Layout
.getFragmentOffset(Fragment
);
85 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol
&S
,
86 const MCAsmLayout
&Layout
) const {
87 // If this is a variable, then recursively evaluate now.
89 if (const MCConstantExpr
*C
=
90 dyn_cast
<const MCConstantExpr
>(S
.getVariableValue()))
94 if (!S
.getVariableValue()->evaluateAsRelocatable(Target
, &Layout
, nullptr))
95 report_fatal_error("unable to evaluate offset for variable '" +
98 // Verify that any used symbols are defined.
99 if (Target
.getSymA() && Target
.getSymA()->getSymbol().isUndefined())
100 report_fatal_error("unable to evaluate offset to undefined symbol '" +
101 Target
.getSymA()->getSymbol().getName() + "'");
102 if (Target
.getSymB() && Target
.getSymB()->getSymbol().isUndefined())
103 report_fatal_error("unable to evaluate offset to undefined symbol '" +
104 Target
.getSymB()->getSymbol().getName() + "'");
106 uint64_t Address
= Target
.getConstant();
107 if (Target
.getSymA())
108 Address
+= getSymbolAddress(Target
.getSymA()->getSymbol(), Layout
);
109 if (Target
.getSymB())
110 Address
+= getSymbolAddress(Target
.getSymB()->getSymbol(), Layout
);
114 return getSectionAddress(S
.getFragment()->getParent()) +
115 Layout
.getSymbolOffset(S
);
118 uint64_t MachObjectWriter::getPaddingSize(const MCSection
*Sec
,
119 const MCAsmLayout
&Layout
) const {
120 uint64_t EndAddr
= getSectionAddress(Sec
) + Layout
.getSectionAddressSize(Sec
);
121 unsigned Next
= Sec
->getLayoutOrder() + 1;
122 if (Next
>= Layout
.getSectionOrder().size())
125 const MCSection
&NextSec
= *Layout
.getSectionOrder()[Next
];
126 if (NextSec
.isVirtualSection())
128 return OffsetToAlignment(EndAddr
, NextSec
.getAlignment());
131 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type
,
132 unsigned NumLoadCommands
,
133 unsigned LoadCommandsSize
,
134 bool SubsectionsViaSymbols
) {
137 if (SubsectionsViaSymbols
)
138 Flags
|= MachO::MH_SUBSECTIONS_VIA_SYMBOLS
;
140 // struct mach_header (28 bytes) or
141 // struct mach_header_64 (32 bytes)
143 uint64_t Start
= W
.OS
.tell();
146 W
.write
<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64
: MachO::MH_MAGIC
);
148 W
.write
<uint32_t>(TargetObjectWriter
->getCPUType());
149 W
.write
<uint32_t>(TargetObjectWriter
->getCPUSubtype());
151 W
.write
<uint32_t>(Type
);
152 W
.write
<uint32_t>(NumLoadCommands
);
153 W
.write
<uint32_t>(LoadCommandsSize
);
154 W
.write
<uint32_t>(Flags
);
156 W
.write
<uint32_t>(0); // reserved
158 assert(W
.OS
.tell() - Start
== (is64Bit() ? sizeof(MachO::mach_header_64
)
159 : sizeof(MachO::mach_header
)));
162 void MachObjectWriter::writeWithPadding(StringRef Str
, uint64_t Size
) {
163 assert(Size
>= Str
.size());
165 W
.OS
.write_zeros(Size
- Str
.size());
168 /// writeSegmentLoadCommand - Write a segment load command.
170 /// \param NumSections The number of sections in this segment.
171 /// \param SectionDataSize The total size of the sections.
172 void MachObjectWriter::writeSegmentLoadCommand(
173 StringRef Name
, unsigned NumSections
, uint64_t VMAddr
, uint64_t VMSize
,
174 uint64_t SectionDataStartOffset
, uint64_t SectionDataSize
, uint32_t MaxProt
,
176 // struct segment_command (56 bytes) or
177 // struct segment_command_64 (72 bytes)
179 uint64_t Start
= W
.OS
.tell();
182 unsigned SegmentLoadCommandSize
=
183 is64Bit() ? sizeof(MachO::segment_command_64
):
184 sizeof(MachO::segment_command
);
185 W
.write
<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64
: MachO::LC_SEGMENT
);
186 W
.write
<uint32_t>(SegmentLoadCommandSize
+
187 NumSections
* (is64Bit() ? sizeof(MachO::section_64
) :
188 sizeof(MachO::section
)));
190 writeWithPadding(Name
, 16);
192 W
.write
<uint64_t>(VMAddr
); // vmaddr
193 W
.write
<uint64_t>(VMSize
); // vmsize
194 W
.write
<uint64_t>(SectionDataStartOffset
); // file offset
195 W
.write
<uint64_t>(SectionDataSize
); // file size
197 W
.write
<uint32_t>(VMAddr
); // vmaddr
198 W
.write
<uint32_t>(VMSize
); // vmsize
199 W
.write
<uint32_t>(SectionDataStartOffset
); // file offset
200 W
.write
<uint32_t>(SectionDataSize
); // file size
203 W
.write
<uint32_t>(MaxProt
);
205 W
.write
<uint32_t>(InitProt
);
206 W
.write
<uint32_t>(NumSections
);
207 W
.write
<uint32_t>(0); // flags
209 assert(W
.OS
.tell() - Start
== SegmentLoadCommandSize
);
212 void MachObjectWriter::writeSection(const MCAsmLayout
&Layout
,
213 const MCSection
&Sec
, uint64_t VMAddr
,
214 uint64_t FileOffset
, unsigned Flags
,
215 uint64_t RelocationsStart
,
216 unsigned NumRelocations
) {
217 uint64_t SectionSize
= Layout
.getSectionAddressSize(&Sec
);
218 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(Sec
);
220 // The offset is unused for virtual sections.
221 if (Section
.isVirtualSection()) {
222 assert(Layout
.getSectionFileSize(&Sec
) == 0 && "Invalid file size!");
226 // struct section (68 bytes) or
227 // struct section_64 (80 bytes)
229 uint64_t Start
= W
.OS
.tell();
232 writeWithPadding(Section
.getSectionName(), 16);
233 writeWithPadding(Section
.getSegmentName(), 16);
235 W
.write
<uint64_t>(VMAddr
); // address
236 W
.write
<uint64_t>(SectionSize
); // size
238 W
.write
<uint32_t>(VMAddr
); // address
239 W
.write
<uint32_t>(SectionSize
); // size
241 W
.write
<uint32_t>(FileOffset
);
243 assert(isPowerOf2_32(Section
.getAlignment()) && "Invalid alignment!");
244 W
.write
<uint32_t>(Log2_32(Section
.getAlignment()));
245 W
.write
<uint32_t>(NumRelocations
? RelocationsStart
: 0);
246 W
.write
<uint32_t>(NumRelocations
);
247 W
.write
<uint32_t>(Flags
);
248 W
.write
<uint32_t>(IndirectSymBase
.lookup(&Sec
)); // reserved1
249 W
.write
<uint32_t>(Section
.getStubSize()); // reserved2
251 W
.write
<uint32_t>(0); // reserved3
253 assert(W
.OS
.tell() - Start
==
254 (is64Bit() ? sizeof(MachO::section_64
) : sizeof(MachO::section
)));
257 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset
,
259 uint32_t StringTableOffset
,
260 uint32_t StringTableSize
) {
261 // struct symtab_command (24 bytes)
263 uint64_t Start
= W
.OS
.tell();
266 W
.write
<uint32_t>(MachO::LC_SYMTAB
);
267 W
.write
<uint32_t>(sizeof(MachO::symtab_command
));
268 W
.write
<uint32_t>(SymbolOffset
);
269 W
.write
<uint32_t>(NumSymbols
);
270 W
.write
<uint32_t>(StringTableOffset
);
271 W
.write
<uint32_t>(StringTableSize
);
273 assert(W
.OS
.tell() - Start
== sizeof(MachO::symtab_command
));
276 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol
,
277 uint32_t NumLocalSymbols
,
278 uint32_t FirstExternalSymbol
,
279 uint32_t NumExternalSymbols
,
280 uint32_t FirstUndefinedSymbol
,
281 uint32_t NumUndefinedSymbols
,
282 uint32_t IndirectSymbolOffset
,
283 uint32_t NumIndirectSymbols
) {
284 // struct dysymtab_command (80 bytes)
286 uint64_t Start
= W
.OS
.tell();
289 W
.write
<uint32_t>(MachO::LC_DYSYMTAB
);
290 W
.write
<uint32_t>(sizeof(MachO::dysymtab_command
));
291 W
.write
<uint32_t>(FirstLocalSymbol
);
292 W
.write
<uint32_t>(NumLocalSymbols
);
293 W
.write
<uint32_t>(FirstExternalSymbol
);
294 W
.write
<uint32_t>(NumExternalSymbols
);
295 W
.write
<uint32_t>(FirstUndefinedSymbol
);
296 W
.write
<uint32_t>(NumUndefinedSymbols
);
297 W
.write
<uint32_t>(0); // tocoff
298 W
.write
<uint32_t>(0); // ntoc
299 W
.write
<uint32_t>(0); // modtaboff
300 W
.write
<uint32_t>(0); // nmodtab
301 W
.write
<uint32_t>(0); // extrefsymoff
302 W
.write
<uint32_t>(0); // nextrefsyms
303 W
.write
<uint32_t>(IndirectSymbolOffset
);
304 W
.write
<uint32_t>(NumIndirectSymbols
);
305 W
.write
<uint32_t>(0); // extreloff
306 W
.write
<uint32_t>(0); // nextrel
307 W
.write
<uint32_t>(0); // locreloff
308 W
.write
<uint32_t>(0); // nlocrel
310 assert(W
.OS
.tell() - Start
== sizeof(MachO::dysymtab_command
));
313 MachObjectWriter::MachSymbolData
*
314 MachObjectWriter::findSymbolData(const MCSymbol
&Sym
) {
315 for (auto *SymbolData
:
316 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
317 for (MachSymbolData
&Entry
: *SymbolData
)
318 if (Entry
.Symbol
== &Sym
)
324 const MCSymbol
&MachObjectWriter::findAliasedSymbol(const MCSymbol
&Sym
) const {
325 const MCSymbol
*S
= &Sym
;
326 while (S
->isVariable()) {
327 const MCExpr
*Value
= S
->getVariableValue();
328 const auto *Ref
= dyn_cast
<MCSymbolRefExpr
>(Value
);
331 S
= &Ref
->getSymbol();
336 void MachObjectWriter::writeNlist(MachSymbolData
&MSD
,
337 const MCAsmLayout
&Layout
) {
338 const MCSymbol
*Symbol
= MSD
.Symbol
;
339 const MCSymbol
&Data
= *Symbol
;
340 const MCSymbol
*AliasedSymbol
= &findAliasedSymbol(*Symbol
);
341 uint8_t SectionIndex
= MSD
.SectionIndex
;
343 uint64_t Address
= 0;
344 bool IsAlias
= Symbol
!= AliasedSymbol
;
346 const MCSymbol
&OrigSymbol
= *Symbol
;
347 MachSymbolData
*AliaseeInfo
;
349 AliaseeInfo
= findSymbolData(*AliasedSymbol
);
351 SectionIndex
= AliaseeInfo
->SectionIndex
;
352 Symbol
= AliasedSymbol
;
353 // FIXME: Should this update Data as well?
356 // Set the N_TYPE bits. See <mach-o/nlist.h>.
358 // FIXME: Are the prebound or indirect fields possible here?
359 if (IsAlias
&& Symbol
->isUndefined())
360 Type
= MachO::N_INDR
;
361 else if (Symbol
->isUndefined())
362 Type
= MachO::N_UNDF
;
363 else if (Symbol
->isAbsolute())
366 Type
= MachO::N_SECT
;
368 // FIXME: Set STAB bits.
370 if (Data
.isPrivateExtern())
371 Type
|= MachO::N_PEXT
;
374 if (Data
.isExternal() || (!IsAlias
&& Symbol
->isUndefined()))
375 Type
|= MachO::N_EXT
;
377 // Compute the symbol address.
378 if (IsAlias
&& Symbol
->isUndefined())
379 Address
= AliaseeInfo
->StringIndex
;
380 else if (Symbol
->isDefined())
381 Address
= getSymbolAddress(OrigSymbol
, Layout
);
382 else if (Symbol
->isCommon()) {
383 // Common symbols are encoded with the size in the address
384 // field, and their alignment in the flags.
385 Address
= Symbol
->getCommonSize();
388 // struct nlist (12 bytes)
390 W
.write
<uint32_t>(MSD
.StringIndex
);
392 W
.OS
<< char(SectionIndex
);
394 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
396 bool EncodeAsAltEntry
=
397 IsAlias
&& cast
<MCSymbolMachO
>(OrigSymbol
).isAltEntry();
398 W
.write
<uint16_t>(cast
<MCSymbolMachO
>(Symbol
)->getEncodedFlags(EncodeAsAltEntry
));
400 W
.write
<uint64_t>(Address
);
402 W
.write
<uint32_t>(Address
);
405 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type
,
408 uint64_t Start
= W
.OS
.tell();
411 W
.write
<uint32_t>(Type
);
412 W
.write
<uint32_t>(sizeof(MachO::linkedit_data_command
));
413 W
.write
<uint32_t>(DataOffset
);
414 W
.write
<uint32_t>(DataSize
);
416 assert(W
.OS
.tell() - Start
== sizeof(MachO::linkedit_data_command
));
419 static unsigned ComputeLinkerOptionsLoadCommandSize(
420 const std::vector
<std::string
> &Options
, bool is64Bit
)
422 unsigned Size
= sizeof(MachO::linker_option_command
);
423 for (const std::string
&Option
: Options
)
424 Size
+= Option
.size() + 1;
425 return alignTo(Size
, is64Bit
? 8 : 4);
428 void MachObjectWriter::writeLinkerOptionsLoadCommand(
429 const std::vector
<std::string
> &Options
)
431 unsigned Size
= ComputeLinkerOptionsLoadCommandSize(Options
, is64Bit());
432 uint64_t Start
= W
.OS
.tell();
435 W
.write
<uint32_t>(MachO::LC_LINKER_OPTION
);
436 W
.write
<uint32_t>(Size
);
437 W
.write
<uint32_t>(Options
.size());
438 uint64_t BytesWritten
= sizeof(MachO::linker_option_command
);
439 for (const std::string
&Option
: Options
) {
440 // Write each string, including the null byte.
441 W
.OS
<< Option
<< '\0';
442 BytesWritten
+= Option
.size() + 1;
445 // Pad to a multiple of the pointer size.
446 W
.OS
.write_zeros(OffsetToAlignment(BytesWritten
, is64Bit() ? 8 : 4));
448 assert(W
.OS
.tell() - Start
== Size
);
451 void MachObjectWriter::recordRelocation(MCAssembler
&Asm
,
452 const MCAsmLayout
&Layout
,
453 const MCFragment
*Fragment
,
454 const MCFixup
&Fixup
, MCValue Target
,
455 uint64_t &FixedValue
) {
456 TargetObjectWriter
->recordRelocation(this, Asm
, Layout
, Fragment
, Fixup
,
460 void MachObjectWriter::bindIndirectSymbols(MCAssembler
&Asm
) {
461 // This is the point where 'as' creates actual symbols for indirect symbols
462 // (in the following two passes). It would be easier for us to do this sooner
463 // when we see the attribute, but that makes getting the order in the symbol
464 // table much more complicated than it is worth.
466 // FIXME: Revisit this when the dust settles.
468 // Report errors for use of .indirect_symbol not in a symbol pointer section
470 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
471 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
) {
472 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
474 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
475 Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
476 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
&&
477 Section
.getType() != MachO::S_SYMBOL_STUBS
) {
478 MCSymbol
&Symbol
= *it
->Symbol
;
479 report_fatal_error("indirect symbol '" + Symbol
.getName() +
480 "' not in a symbol pointer or stub section");
484 // Bind non-lazy symbol pointers first.
485 unsigned IndirectIndex
= 0;
486 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
487 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
, ++IndirectIndex
) {
488 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
490 if (Section
.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS
&&
491 Section
.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS
)
494 // Initialize the section indirect symbol base, if necessary.
495 IndirectSymBase
.insert(std::make_pair(it
->Section
, IndirectIndex
));
497 Asm
.registerSymbol(*it
->Symbol
);
500 // Then lazy symbol pointers and symbol stubs.
502 for (MCAssembler::indirect_symbol_iterator it
= Asm
.indirect_symbol_begin(),
503 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
, ++IndirectIndex
) {
504 const MCSectionMachO
&Section
= cast
<MCSectionMachO
>(*it
->Section
);
506 if (Section
.getType() != MachO::S_LAZY_SYMBOL_POINTERS
&&
507 Section
.getType() != MachO::S_SYMBOL_STUBS
)
510 // Initialize the section indirect symbol base, if necessary.
511 IndirectSymBase
.insert(std::make_pair(it
->Section
, IndirectIndex
));
513 // Set the symbol type to undefined lazy, but only on construction.
515 // FIXME: Do not hardcode.
517 Asm
.registerSymbol(*it
->Symbol
, &Created
);
519 cast
<MCSymbolMachO
>(it
->Symbol
)->setReferenceTypeUndefinedLazy(true);
523 /// computeSymbolTable - Compute the symbol table data
524 void MachObjectWriter::computeSymbolTable(
525 MCAssembler
&Asm
, std::vector
<MachSymbolData
> &LocalSymbolData
,
526 std::vector
<MachSymbolData
> &ExternalSymbolData
,
527 std::vector
<MachSymbolData
> &UndefinedSymbolData
) {
528 // Build section lookup table.
529 DenseMap
<const MCSection
*, uint8_t> SectionIndexMap
;
531 for (MCAssembler::iterator it
= Asm
.begin(),
532 ie
= Asm
.end(); it
!= ie
; ++it
, ++Index
)
533 SectionIndexMap
[&*it
] = Index
;
534 assert(Index
<= 256 && "Too many sections!");
536 // Build the string table.
537 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
538 if (!Asm
.isSymbolLinkerVisible(Symbol
))
541 StringTable
.add(Symbol
.getName());
543 StringTable
.finalize();
545 // Build the symbol arrays but only for non-local symbols.
547 // The particular order that we collect and then sort the symbols is chosen to
548 // match 'as'. Even though it doesn't matter for correctness, this is
549 // important for letting us diff .o files.
550 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
551 // Ignore non-linker visible symbols.
552 if (!Asm
.isSymbolLinkerVisible(Symbol
))
555 if (!Symbol
.isExternal() && !Symbol
.isUndefined())
559 MSD
.Symbol
= &Symbol
;
560 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
562 if (Symbol
.isUndefined()) {
563 MSD
.SectionIndex
= 0;
564 UndefinedSymbolData
.push_back(MSD
);
565 } else if (Symbol
.isAbsolute()) {
566 MSD
.SectionIndex
= 0;
567 ExternalSymbolData
.push_back(MSD
);
569 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
570 assert(MSD
.SectionIndex
&& "Invalid section index!");
571 ExternalSymbolData
.push_back(MSD
);
575 // Now add the data for local symbols.
576 for (const MCSymbol
&Symbol
: Asm
.symbols()) {
577 // Ignore non-linker visible symbols.
578 if (!Asm
.isSymbolLinkerVisible(Symbol
))
581 if (Symbol
.isExternal() || Symbol
.isUndefined())
585 MSD
.Symbol
= &Symbol
;
586 MSD
.StringIndex
= StringTable
.getOffset(Symbol
.getName());
588 if (Symbol
.isAbsolute()) {
589 MSD
.SectionIndex
= 0;
590 LocalSymbolData
.push_back(MSD
);
592 MSD
.SectionIndex
= SectionIndexMap
.lookup(&Symbol
.getSection());
593 assert(MSD
.SectionIndex
&& "Invalid section index!");
594 LocalSymbolData
.push_back(MSD
);
598 // External and undefined symbols are required to be in lexicographic order.
599 llvm::sort(ExternalSymbolData
);
600 llvm::sort(UndefinedSymbolData
);
602 // Set the symbol indices.
604 for (auto *SymbolData
:
605 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
606 for (MachSymbolData
&Entry
: *SymbolData
)
607 Entry
.Symbol
->setIndex(Index
++);
609 for (const MCSection
&Section
: Asm
) {
610 for (RelAndSymbol
&Rel
: Relocations
[&Section
]) {
614 // Set the Index and the IsExtern bit.
615 unsigned Index
= Rel
.Sym
->getIndex();
616 assert(isInt
<24>(Index
));
617 if (W
.Endian
== support::little
)
618 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& (~0U << 24)) | Index
| (1 << 27);
620 Rel
.MRE
.r_word1
= (Rel
.MRE
.r_word1
& 0xff) | Index
<< 8 | (1 << 4);
625 void MachObjectWriter::computeSectionAddresses(const MCAssembler
&Asm
,
626 const MCAsmLayout
&Layout
) {
627 uint64_t StartAddress
= 0;
628 for (const MCSection
*Sec
: Layout
.getSectionOrder()) {
629 StartAddress
= alignTo(StartAddress
, Sec
->getAlignment());
630 SectionAddress
[Sec
] = StartAddress
;
631 StartAddress
+= Layout
.getSectionAddressSize(Sec
);
633 // Explicitly pad the section to match the alignment requirements of the
634 // following one. This is for 'gas' compatibility, it shouldn't
635 /// strictly be necessary.
636 StartAddress
+= getPaddingSize(Sec
, Layout
);
640 void MachObjectWriter::executePostLayoutBinding(MCAssembler
&Asm
,
641 const MCAsmLayout
&Layout
) {
642 computeSectionAddresses(Asm
, Layout
);
644 // Create symbol data for any indirect symbols.
645 bindIndirectSymbols(Asm
);
648 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
649 const MCAssembler
&Asm
, const MCSymbol
&A
, const MCSymbol
&B
,
651 // FIXME: We don't handle things like
654 if (A
.isVariable() || B
.isVariable())
656 return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm
, A
, B
,
660 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
661 const MCAssembler
&Asm
, const MCSymbol
&SymA
, const MCFragment
&FB
,
662 bool InSet
, bool IsPCRel
) const {
666 // The effective address is
667 // addr(atom(A)) + offset(A)
668 // - addr(atom(B)) - offset(B)
669 // and the offsets are not relocatable, so the fixup is fully resolved when
670 // addr(atom(A)) - addr(atom(B)) == 0.
671 const MCSymbol
&SA
= findAliasedSymbol(SymA
);
672 const MCSection
&SecA
= SA
.getSection();
673 const MCSection
&SecB
= *FB
.getParent();
676 // The simple (Darwin, except on x86_64) way of dealing with this was to
677 // assume that any reference to a temporary symbol *must* be a temporary
678 // symbol in the same atom, unless the sections differ. Therefore, any PCrel
679 // relocation to a temporary symbol (in the same section) is fully
680 // resolved. This also works in conjunction with absolutized .set, which
681 // requires the compiler to use .set to absolutize the differences between
682 // symbols which the compiler knows to be assembly time constants, so we
683 // don't need to worry about considering symbol differences fully resolved.
685 // If the file isn't using sub-sections-via-symbols, we can make the
686 // same assumptions about any symbol that we normally make about
689 bool hasReliableSymbolDifference
= isX86_64();
690 if (!hasReliableSymbolDifference
) {
691 if (!SA
.isInSection() || &SecA
!= &SecB
||
692 (!SA
.isTemporary() && FB
.getAtom() != SA
.getFragment()->getAtom() &&
693 Asm
.getSubsectionsViaSymbols()))
697 // For Darwin x86_64, there is one special case when the reference IsPCRel.
698 // If the fragment with the reference does not have a base symbol but meets
699 // the simple way of dealing with this, in that it is a temporary symbol in
700 // the same atom then it is assumed to be fully resolved. This is needed so
701 // a relocation entry is not created and so the static linker does not
702 // mess up the reference later.
703 else if(!FB
.getAtom() &&
704 SA
.isTemporary() && SA
.isInSection() && &SecA
== &SecB
){
709 // If they are not in the same section, we can't compute the diff.
713 const MCFragment
*FA
= SA
.getFragment();
715 // Bail if the symbol has no fragment.
719 // If the atoms are the same, they are guaranteed to have the same address.
720 if (FA
->getAtom() == FB
.getAtom())
723 // Otherwise, we can't prove this is fully resolved.
727 static MachO::LoadCommandType
getLCFromMCVM(MCVersionMinType Type
) {
729 case MCVM_OSXVersionMin
: return MachO::LC_VERSION_MIN_MACOSX
;
730 case MCVM_IOSVersionMin
: return MachO::LC_VERSION_MIN_IPHONEOS
;
731 case MCVM_TvOSVersionMin
: return MachO::LC_VERSION_MIN_TVOS
;
732 case MCVM_WatchOSVersionMin
: return MachO::LC_VERSION_MIN_WATCHOS
;
734 llvm_unreachable("Invalid mc version min type");
737 uint64_t MachObjectWriter::writeObject(MCAssembler
&Asm
,
738 const MCAsmLayout
&Layout
) {
739 uint64_t StartOffset
= W
.OS
.tell();
741 // Compute symbol table information and bind symbol indices.
742 computeSymbolTable(Asm
, LocalSymbolData
, ExternalSymbolData
,
743 UndefinedSymbolData
);
745 unsigned NumSections
= Asm
.size();
746 const MCAssembler::VersionInfoType
&VersionInfo
=
747 Layout
.getAssembler().getVersionInfo();
749 // The section data starts after the header, the segment load command (and
750 // section headers) and the symbol table.
751 unsigned NumLoadCommands
= 1;
752 uint64_t LoadCommandsSize
= is64Bit() ?
753 sizeof(MachO::segment_command_64
) + NumSections
* sizeof(MachO::section_64
):
754 sizeof(MachO::segment_command
) + NumSections
* sizeof(MachO::section
);
756 // Add the deployment target version info load command size, if used.
757 if (VersionInfo
.Major
!= 0) {
759 if (VersionInfo
.EmitBuildVersion
)
760 LoadCommandsSize
+= sizeof(MachO::build_version_command
);
762 LoadCommandsSize
+= sizeof(MachO::version_min_command
);
765 // Add the data-in-code load command size, if used.
766 unsigned NumDataRegions
= Asm
.getDataRegions().size();
767 if (NumDataRegions
) {
769 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
772 // Add the loh load command size, if used.
773 uint64_t LOHRawSize
= Asm
.getLOHContainer().getEmitSize(*this, Layout
);
774 uint64_t LOHSize
= alignTo(LOHRawSize
, is64Bit() ? 8 : 4);
777 LoadCommandsSize
+= sizeof(MachO::linkedit_data_command
);
780 // Add the symbol table load command sizes, if used.
781 unsigned NumSymbols
= LocalSymbolData
.size() + ExternalSymbolData
.size() +
782 UndefinedSymbolData
.size();
784 NumLoadCommands
+= 2;
785 LoadCommandsSize
+= (sizeof(MachO::symtab_command
) +
786 sizeof(MachO::dysymtab_command
));
789 // Add the linker option load commands sizes.
790 for (const auto &Option
: Asm
.getLinkerOptions()) {
792 LoadCommandsSize
+= ComputeLinkerOptionsLoadCommandSize(Option
, is64Bit());
795 // Compute the total size of the section data, as well as its file size and vm
797 uint64_t SectionDataStart
= (is64Bit() ? sizeof(MachO::mach_header_64
) :
798 sizeof(MachO::mach_header
)) + LoadCommandsSize
;
799 uint64_t SectionDataSize
= 0;
800 uint64_t SectionDataFileSize
= 0;
802 for (const MCSection
&Sec
: Asm
) {
803 uint64_t Address
= getSectionAddress(&Sec
);
804 uint64_t Size
= Layout
.getSectionAddressSize(&Sec
);
805 uint64_t FileSize
= Layout
.getSectionFileSize(&Sec
);
806 FileSize
+= getPaddingSize(&Sec
, Layout
);
808 VMSize
= std::max(VMSize
, Address
+ Size
);
810 if (Sec
.isVirtualSection())
813 SectionDataSize
= std::max(SectionDataSize
, Address
+ Size
);
814 SectionDataFileSize
= std::max(SectionDataFileSize
, Address
+ FileSize
);
817 // The section data is padded to 4 bytes.
819 // FIXME: Is this machine dependent?
820 unsigned SectionDataPadding
= OffsetToAlignment(SectionDataFileSize
, 4);
821 SectionDataFileSize
+= SectionDataPadding
;
823 // Write the prolog, starting with the header and load command...
824 writeHeader(MachO::MH_OBJECT
, NumLoadCommands
, LoadCommandsSize
,
825 Asm
.getSubsectionsViaSymbols());
827 MachO::VM_PROT_READ
| MachO::VM_PROT_WRITE
| MachO::VM_PROT_EXECUTE
;
828 writeSegmentLoadCommand("", NumSections
, 0, VMSize
, SectionDataStart
,
829 SectionDataSize
, Prot
, Prot
);
831 // ... and then the section headers.
832 uint64_t RelocTableEnd
= SectionDataStart
+ SectionDataFileSize
;
833 for (const MCSection
&Section
: Asm
) {
834 const auto &Sec
= cast
<MCSectionMachO
>(Section
);
835 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
836 unsigned NumRelocs
= Relocs
.size();
837 uint64_t SectionStart
= SectionDataStart
+ getSectionAddress(&Sec
);
838 unsigned Flags
= Sec
.getTypeAndAttributes();
839 if (Sec
.hasInstructions())
840 Flags
|= MachO::S_ATTR_SOME_INSTRUCTIONS
;
841 writeSection(Layout
, Sec
, getSectionAddress(&Sec
), SectionStart
, Flags
,
842 RelocTableEnd
, NumRelocs
);
843 RelocTableEnd
+= NumRelocs
* sizeof(MachO::any_relocation_info
);
846 // Write out the deployment target information, if it's available.
847 if (VersionInfo
.Major
!= 0) {
848 auto EncodeVersion
= [](VersionTuple V
) -> uint32_t {
849 assert(!V
.empty() && "empty version");
850 unsigned Update
= V
.getSubminor() ? *V
.getSubminor() : 0;
851 unsigned Minor
= V
.getMinor() ? *V
.getMinor() : 0;
852 assert(Update
< 256 && "unencodable update target version");
853 assert(Minor
< 256 && "unencodable minor target version");
854 assert(V
.getMajor() < 65536 && "unencodable major target version");
855 return Update
| (Minor
<< 8) | (V
.getMajor() << 16);
857 uint32_t EncodedVersion
= EncodeVersion(
858 VersionTuple(VersionInfo
.Major
, VersionInfo
.Minor
, VersionInfo
.Update
));
859 uint32_t SDKVersion
= !VersionInfo
.SDKVersion
.empty()
860 ? EncodeVersion(VersionInfo
.SDKVersion
)
862 if (VersionInfo
.EmitBuildVersion
) {
863 // FIXME: Currently empty tools. Add clang version in the future.
864 W
.write
<uint32_t>(MachO::LC_BUILD_VERSION
);
865 W
.write
<uint32_t>(sizeof(MachO::build_version_command
));
866 W
.write
<uint32_t>(VersionInfo
.TypeOrPlatform
.Platform
);
867 W
.write
<uint32_t>(EncodedVersion
);
868 W
.write
<uint32_t>(SDKVersion
);
869 W
.write
<uint32_t>(0); // Empty tools list.
871 MachO::LoadCommandType LCType
872 = getLCFromMCVM(VersionInfo
.TypeOrPlatform
.Type
);
873 W
.write
<uint32_t>(LCType
);
874 W
.write
<uint32_t>(sizeof(MachO::version_min_command
));
875 W
.write
<uint32_t>(EncodedVersion
);
876 W
.write
<uint32_t>(SDKVersion
);
880 // Write the data-in-code load command, if used.
881 uint64_t DataInCodeTableEnd
= RelocTableEnd
+ NumDataRegions
* 8;
882 if (NumDataRegions
) {
883 uint64_t DataRegionsOffset
= RelocTableEnd
;
884 uint64_t DataRegionsSize
= NumDataRegions
* 8;
885 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE
, DataRegionsOffset
,
889 // Write the loh load command, if used.
890 uint64_t LOHTableEnd
= DataInCodeTableEnd
+ LOHSize
;
892 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT
,
893 DataInCodeTableEnd
, LOHSize
);
895 // Write the symbol table load command, if used.
897 unsigned FirstLocalSymbol
= 0;
898 unsigned NumLocalSymbols
= LocalSymbolData
.size();
899 unsigned FirstExternalSymbol
= FirstLocalSymbol
+ NumLocalSymbols
;
900 unsigned NumExternalSymbols
= ExternalSymbolData
.size();
901 unsigned FirstUndefinedSymbol
= FirstExternalSymbol
+ NumExternalSymbols
;
902 unsigned NumUndefinedSymbols
= UndefinedSymbolData
.size();
903 unsigned NumIndirectSymbols
= Asm
.indirect_symbol_size();
904 unsigned NumSymTabSymbols
=
905 NumLocalSymbols
+ NumExternalSymbols
+ NumUndefinedSymbols
;
906 uint64_t IndirectSymbolSize
= NumIndirectSymbols
* 4;
907 uint64_t IndirectSymbolOffset
= 0;
909 // If used, the indirect symbols are written after the section data.
910 if (NumIndirectSymbols
)
911 IndirectSymbolOffset
= LOHTableEnd
;
913 // The symbol table is written after the indirect symbol data.
914 uint64_t SymbolTableOffset
= LOHTableEnd
+ IndirectSymbolSize
;
916 // The string table is written after symbol table.
917 uint64_t StringTableOffset
=
918 SymbolTableOffset
+ NumSymTabSymbols
* (is64Bit() ?
919 sizeof(MachO::nlist_64
) :
920 sizeof(MachO::nlist
));
921 writeSymtabLoadCommand(SymbolTableOffset
, NumSymTabSymbols
,
922 StringTableOffset
, StringTable
.getSize());
924 writeDysymtabLoadCommand(FirstLocalSymbol
, NumLocalSymbols
,
925 FirstExternalSymbol
, NumExternalSymbols
,
926 FirstUndefinedSymbol
, NumUndefinedSymbols
,
927 IndirectSymbolOffset
, NumIndirectSymbols
);
930 // Write the linker options load commands.
931 for (const auto &Option
: Asm
.getLinkerOptions())
932 writeLinkerOptionsLoadCommand(Option
);
934 // Write the actual section data.
935 for (const MCSection
&Sec
: Asm
) {
936 Asm
.writeSectionData(W
.OS
, &Sec
, Layout
);
938 uint64_t Pad
= getPaddingSize(&Sec
, Layout
);
939 W
.OS
.write_zeros(Pad
);
942 // Write the extra padding.
943 W
.OS
.write_zeros(SectionDataPadding
);
945 // Write the relocation entries.
946 for (const MCSection
&Sec
: Asm
) {
947 // Write the section relocation entries, in reverse order to match 'as'
948 // (approximately, the exact algorithm is more complicated than this).
949 std::vector
<RelAndSymbol
> &Relocs
= Relocations
[&Sec
];
950 for (const RelAndSymbol
&Rel
: make_range(Relocs
.rbegin(), Relocs
.rend())) {
951 W
.write
<uint32_t>(Rel
.MRE
.r_word0
);
952 W
.write
<uint32_t>(Rel
.MRE
.r_word1
);
956 // Write out the data-in-code region payload, if there is one.
957 for (MCAssembler::const_data_region_iterator
958 it
= Asm
.data_region_begin(), ie
= Asm
.data_region_end();
960 const DataRegionData
*Data
= &(*it
);
961 uint64_t Start
= getSymbolAddress(*Data
->Start
, Layout
);
964 End
= getSymbolAddress(*Data
->End
, Layout
);
966 report_fatal_error("Data region not terminated");
968 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data
->Kind
969 << " start: " << Start
<< "(" << Data
->Start
->getName()
971 << " end: " << End
<< "(" << Data
->End
->getName() << ")"
972 << " size: " << End
- Start
<< "\n");
973 W
.write
<uint32_t>(Start
);
974 W
.write
<uint16_t>(End
- Start
);
975 W
.write
<uint16_t>(Data
->Kind
);
978 // Write out the loh commands, if there is one.
981 unsigned Start
= W
.OS
.tell();
983 Asm
.getLOHContainer().emit(*this, Layout
);
984 // Pad to a multiple of the pointer size.
985 W
.OS
.write_zeros(OffsetToAlignment(LOHRawSize
, is64Bit() ? 8 : 4));
986 assert(W
.OS
.tell() - Start
== LOHSize
);
989 // Write the symbol table data, if used.
991 // Write the indirect symbol entries.
992 for (MCAssembler::const_indirect_symbol_iterator
993 it
= Asm
.indirect_symbol_begin(),
994 ie
= Asm
.indirect_symbol_end(); it
!= ie
; ++it
) {
995 // Indirect symbols in the non-lazy symbol pointer section have some
997 const MCSectionMachO
&Section
=
998 static_cast<const MCSectionMachO
&>(*it
->Section
);
999 if (Section
.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS
) {
1000 // If this symbol is defined and internal, mark it as such.
1001 if (it
->Symbol
->isDefined() && !it
->Symbol
->isExternal()) {
1002 uint32_t Flags
= MachO::INDIRECT_SYMBOL_LOCAL
;
1003 if (it
->Symbol
->isAbsolute())
1004 Flags
|= MachO::INDIRECT_SYMBOL_ABS
;
1005 W
.write
<uint32_t>(Flags
);
1010 W
.write
<uint32_t>(it
->Symbol
->getIndex());
1013 // FIXME: Check that offsets match computed ones.
1015 // Write the symbol table entries.
1016 for (auto *SymbolData
:
1017 {&LocalSymbolData
, &ExternalSymbolData
, &UndefinedSymbolData
})
1018 for (MachSymbolData
&Entry
: *SymbolData
)
1019 writeNlist(Entry
, Layout
);
1021 // Write the string table.
1022 StringTable
.write(W
.OS
);
1025 return W
.OS
.tell() - StartOffset
;
1028 std::unique_ptr
<MCObjectWriter
>
1029 llvm::createMachObjectWriter(std::unique_ptr
<MCMachObjectTargetWriter
> MOTW
,
1030 raw_pwrite_stream
&OS
, bool IsLittleEndian
) {
1031 return llvm::make_unique
<MachObjectWriter
>(std::move(MOTW
), OS
,