1 //===- InputFiles.cpp -----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains functions to parse Mach-O object files. In this comment,
10 // we describe the Mach-O file structure and how we parse it.
12 // Mach-O is not very different from ELF or COFF. The notion of symbols,
13 // sections and relocations exists in Mach-O as it does in ELF and COFF.
15 // Perhaps the notion that is new to those who know ELF/COFF is "subsections".
16 // In ELF/COFF, sections are an atomic unit of data copied from input files to
17 // output files. When we merge or garbage-collect sections, we treat each
18 // section as an atomic unit. In Mach-O, that's not the case. Sections can
19 // consist of multiple subsections, and subsections are a unit of merging and
20 // garbage-collecting. Therefore, Mach-O's subsections are more similar to
21 // ELF/COFF's sections than Mach-O's sections are.
23 // A section can have multiple symbols. A symbol that does not have the
24 // N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
25 // definition, a symbol is always present at the beginning of each subsection. A
26 // symbol with N_ALT_ENTRY attribute does not start a new subsection and can
27 // point to a middle of a subsection.
29 // The notion of subsections also affects how relocations are represented in
30 // Mach-O. All references within a section need to be explicitly represented as
31 // relocations if they refer to different subsections, because we obviously need
32 // to fix up addresses if subsections are laid out in an output file differently
33 // than they were in object files. To represent that, Mach-O relocations can
34 // refer to an unnamed location via its address. Scattered relocations (those
35 // with the R_SCATTERED bit set) always refer to unnamed locations.
36 // Non-scattered relocations refer to an unnamed location if r_extern is not set
37 // and r_symbolnum is zero.
39 // Without the above differences, I think you can use your knowledge about ELF
40 // and COFF for Mach-O.
42 //===----------------------------------------------------------------------===//
44 #include "InputFiles.h"
49 #include "ExportTrie.h"
50 #include "InputSection.h"
51 #include "MachOStructs.h"
53 #include "OutputSection.h"
54 #include "OutputSegment.h"
55 #include "SymbolTable.h"
57 #include "SyntheticSections.h"
60 #include "lld/Common/CommonLinkerContext.h"
61 #include "lld/Common/DWARF.h"
62 #include "lld/Common/Reproduce.h"
63 #include "llvm/ADT/iterator.h"
64 #include "llvm/BinaryFormat/MachO.h"
65 #include "llvm/LTO/LTO.h"
66 #include "llvm/Support/BinaryStreamReader.h"
67 #include "llvm/Support/Endian.h"
68 #include "llvm/Support/LEB128.h"
69 #include "llvm/Support/MemoryBuffer.h"
70 #include "llvm/Support/Path.h"
71 #include "llvm/Support/TarWriter.h"
72 #include "llvm/Support/TimeProfiler.h"
73 #include "llvm/TextAPI/Architecture.h"
74 #include "llvm/TextAPI/InterfaceFile.h"
77 #include <type_traits>
80 using namespace llvm::MachO
;
81 using namespace llvm::support::endian
;
82 using namespace llvm::sys
;
84 using namespace lld::macho
;
86 // Returns "<internal>", "foo.a(bar.o)", or "baz.o".
87 std::string
lld::toString(const InputFile
*f
) {
91 // Multiple dylibs can be defined in one .tbd file.
92 if (const auto *dylibFile
= dyn_cast
<DylibFile
>(f
))
93 if (f
->getName().ends_with(".tbd"))
94 return (f
->getName() + "(" + dylibFile
->installName
+ ")").str();
96 if (f
->archiveName
.empty())
97 return std::string(f
->getName());
98 return (f
->archiveName
+ "(" + path::filename(f
->getName()) + ")").str();
101 std::string
lld::toString(const Section
&sec
) {
102 return (toString(sec
.file
) + ":(" + sec
.name
+ ")").str();
105 SetVector
<InputFile
*> macho::inputFiles
;
106 std::unique_ptr
<TarWriter
> macho::tar
;
107 int InputFile::idCount
= 0;
109 static VersionTuple
decodeVersion(uint32_t version
) {
110 unsigned major
= version
>> 16;
111 unsigned minor
= (version
>> 8) & 0xffu
;
112 unsigned subMinor
= version
& 0xffu
;
113 return VersionTuple(major
, minor
, subMinor
);
116 static std::vector
<PlatformInfo
> getPlatformInfos(const InputFile
*input
) {
117 if (!isa
<ObjFile
>(input
) && !isa
<DylibFile
>(input
))
120 const char *hdr
= input
->mb
.getBufferStart();
122 // "Zippered" object files can have multiple LC_BUILD_VERSION load commands.
123 std::vector
<PlatformInfo
> platformInfos
;
124 for (auto *cmd
: findCommands
<build_version_command
>(hdr
, LC_BUILD_VERSION
)) {
126 info
.target
.Platform
= static_cast<PlatformType
>(cmd
->platform
);
127 info
.target
.MinDeployment
= decodeVersion(cmd
->minos
);
128 platformInfos
.emplace_back(std::move(info
));
130 for (auto *cmd
: findCommands
<version_min_command
>(
131 hdr
, LC_VERSION_MIN_MACOSX
, LC_VERSION_MIN_IPHONEOS
,
132 LC_VERSION_MIN_TVOS
, LC_VERSION_MIN_WATCHOS
)) {
135 case LC_VERSION_MIN_MACOSX
:
136 info
.target
.Platform
= PLATFORM_MACOS
;
138 case LC_VERSION_MIN_IPHONEOS
:
139 info
.target
.Platform
= PLATFORM_IOS
;
141 case LC_VERSION_MIN_TVOS
:
142 info
.target
.Platform
= PLATFORM_TVOS
;
144 case LC_VERSION_MIN_WATCHOS
:
145 info
.target
.Platform
= PLATFORM_WATCHOS
;
148 info
.target
.MinDeployment
= decodeVersion(cmd
->version
);
149 platformInfos
.emplace_back(std::move(info
));
152 return platformInfos
;
155 static bool checkCompatibility(const InputFile
*input
) {
156 std::vector
<PlatformInfo
> platformInfos
= getPlatformInfos(input
);
157 if (platformInfos
.empty())
160 auto it
= find_if(platformInfos
, [&](const PlatformInfo
&info
) {
161 return removeSimulator(info
.target
.Platform
) ==
162 removeSimulator(config
->platform());
164 if (it
== platformInfos
.end()) {
165 std::string platformNames
;
166 raw_string_ostream
os(platformNames
);
169 [&](const PlatformInfo
&info
) {
170 os
<< getPlatformName(info
.target
.Platform
);
173 error(toString(input
) + " has platform " + platformNames
+
174 Twine(", which is different from target platform ") +
175 getPlatformName(config
->platform()));
179 if (it
->target
.MinDeployment
> config
->platformInfo
.target
.MinDeployment
)
180 warn(toString(input
) + " has version " +
181 it
->target
.MinDeployment
.getAsString() +
182 ", which is newer than target minimum of " +
183 config
->platformInfo
.target
.MinDeployment
.getAsString());
188 template <class Header
>
189 static bool compatWithTargetArch(const InputFile
*file
, const Header
*hdr
) {
191 std::tie(cpuType
, std::ignore
) = getCPUTypeFromArchitecture(config
->arch());
193 if (hdr
->cputype
!= cpuType
) {
195 getArchitectureFromCpuType(hdr
->cputype
, hdr
->cpusubtype
);
196 auto msg
= config
->errorForArchMismatch
197 ? static_cast<void (*)(const Twine
&)>(error
)
200 msg(toString(file
) + " has architecture " + getArchitectureName(arch
) +
201 " which is incompatible with target architecture " +
202 getArchitectureName(config
->arch()));
206 return checkCompatibility(file
);
209 // This cache mostly exists to store system libraries (and .tbds) as they're
210 // loaded, rather than the input archives, which are already cached at a higher
211 // level, and other files like the filelist that are only read once.
212 // Theoretically this caching could be more efficient by hoisting it, but that
213 // would require altering many callers to track the state.
214 DenseMap
<CachedHashStringRef
, MemoryBufferRef
> macho::cachedReads
;
215 // Open a given file path and return it as a memory-mapped file.
216 std::optional
<MemoryBufferRef
> macho::readFile(StringRef path
) {
217 CachedHashStringRef
key(path
);
218 auto entry
= cachedReads
.find(key
);
219 if (entry
!= cachedReads
.end())
220 return entry
->second
;
222 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> mbOrErr
= MemoryBuffer::getFile(path
);
223 if (std::error_code ec
= mbOrErr
.getError()) {
224 error("cannot open " + path
+ ": " + ec
.message());
228 std::unique_ptr
<MemoryBuffer
> &mb
= *mbOrErr
;
229 MemoryBufferRef mbref
= mb
->getMemBufferRef();
230 make
<std::unique_ptr
<MemoryBuffer
>>(std::move(mb
)); // take mb ownership
232 // If this is a regular non-fat file, return it.
233 const char *buf
= mbref
.getBufferStart();
234 const auto *hdr
= reinterpret_cast<const fat_header
*>(buf
);
235 if (mbref
.getBufferSize() < sizeof(uint32_t) ||
236 read32be(&hdr
->magic
) != FAT_MAGIC
) {
238 tar
->append(relativeToRoot(path
), mbref
.getBuffer());
239 return cachedReads
[key
] = mbref
;
242 llvm::BumpPtrAllocator
&bAlloc
= lld::bAlloc();
244 // Object files and archive files may be fat files, which contain multiple
245 // real files for different CPU ISAs. Here, we search for a file that matches
246 // with the current link target and returns it as a MemoryBufferRef.
247 const auto *arch
= reinterpret_cast<const fat_arch
*>(buf
+ sizeof(*hdr
));
248 auto getArchName
= [](uint32_t cpuType
, uint32_t cpuSubtype
) {
249 return getArchitectureName(getArchitectureFromCpuType(cpuType
, cpuSubtype
));
252 std::vector
<StringRef
> archs
;
253 for (uint32_t i
= 0, n
= read32be(&hdr
->nfat_arch
); i
< n
; ++i
) {
254 if (reinterpret_cast<const char *>(arch
+ i
+ 1) >
255 buf
+ mbref
.getBufferSize()) {
256 error(path
+ ": fat_arch struct extends beyond end of file");
260 uint32_t cpuType
= read32be(&arch
[i
].cputype
);
261 uint32_t cpuSubtype
=
262 read32be(&arch
[i
].cpusubtype
) & ~MachO::CPU_SUBTYPE_MASK
;
264 // FIXME: LD64 has a more complex fallback logic here.
265 // Consider implementing that as well?
266 if (cpuType
!= static_cast<uint32_t>(target
->cpuType
) ||
267 cpuSubtype
!= target
->cpuSubtype
) {
268 archs
.emplace_back(getArchName(cpuType
, cpuSubtype
));
272 uint32_t offset
= read32be(&arch
[i
].offset
);
273 uint32_t size
= read32be(&arch
[i
].size
);
274 if (offset
+ size
> mbref
.getBufferSize())
275 error(path
+ ": slice extends beyond end of file");
277 tar
->append(relativeToRoot(path
), mbref
.getBuffer());
278 return cachedReads
[key
] = MemoryBufferRef(StringRef(buf
+ offset
, size
),
282 auto targetArchName
= getArchName(target
->cpuType
, target
->cpuSubtype
);
283 warn(path
+ ": ignoring file because it is universal (" + join(archs
, ",") +
284 ") but does not contain the " + targetArchName
+ " architecture");
288 InputFile::InputFile(Kind kind
, const InterfaceFile
&interface
)
289 : id(idCount
++), fileKind(kind
), name(saver().save(interface
.getPath())) {}
291 // Some sections comprise of fixed-size records, so instead of splitting them at
292 // symbol boundaries, we split them based on size. Records are distinct from
293 // literals in that they may contain references to other sections, instead of
294 // being leaf nodes in the InputSection graph.
296 // Note that "record" is a term I came up with. In contrast, "literal" is a term
297 // used by the Mach-O format.
298 static std::optional
<size_t> getRecordSize(StringRef segname
, StringRef name
) {
299 if (name
== section_names::compactUnwind
) {
300 if (segname
== segment_names::ld
)
301 return target
->wordSize
== 8 ? 32 : 20;
303 if (!config
->dedupStrings
)
306 if (name
== section_names::cfString
&& segname
== segment_names::data
)
307 return target
->wordSize
== 8 ? 32 : 16;
309 if (config
->icfLevel
== ICFLevel::none
)
312 if (name
== section_names::objcClassRefs
&& segname
== segment_names::data
)
313 return target
->wordSize
;
315 if (name
== section_names::objcSelrefs
&& segname
== segment_names::data
)
316 return target
->wordSize
;
320 static Error
parseCallGraph(ArrayRef
<uint8_t> data
,
321 std::vector
<CallGraphEntry
> &callGraph
) {
322 TimeTraceScope
timeScope("Parsing call graph section");
323 BinaryStreamReader
reader(data
, llvm::endianness::little
);
324 while (!reader
.empty()) {
325 uint32_t fromIndex
, toIndex
;
327 if (Error err
= reader
.readInteger(fromIndex
))
329 if (Error err
= reader
.readInteger(toIndex
))
331 if (Error err
= reader
.readInteger(count
))
333 callGraph
.emplace_back(fromIndex
, toIndex
, count
);
335 return Error::success();
338 // Parse the sequence of sections within a single LC_SEGMENT(_64).
339 // Split each section into subsections.
340 template <class SectionHeader
>
341 void ObjFile::parseSections(ArrayRef
<SectionHeader
> sectionHeaders
) {
342 sections
.reserve(sectionHeaders
.size());
343 auto *buf
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart());
345 for (const SectionHeader
&sec
: sectionHeaders
) {
347 StringRef(sec
.sectname
, strnlen(sec
.sectname
, sizeof(sec
.sectname
)));
349 StringRef(sec
.segname
, strnlen(sec
.segname
, sizeof(sec
.segname
)));
350 sections
.push_back(make
<Section
>(this, segname
, name
, sec
.flags
, sec
.addr
));
351 if (sec
.align
>= 32) {
352 error("alignment " + std::to_string(sec
.align
) + " of section " + name
+
356 Section
§ion
= *sections
.back();
357 uint32_t align
= 1 << sec
.align
;
358 ArrayRef
<uint8_t> data
= {isZeroFill(sec
.flags
) ? nullptr
360 static_cast<size_t>(sec
.size
)};
362 auto splitRecords
= [&](size_t recordSize
) -> void {
365 Subsections
&subsections
= section
.subsections
;
366 subsections
.reserve(data
.size() / recordSize
);
367 for (uint64_t off
= 0; off
< data
.size(); off
+= recordSize
) {
368 auto *isec
= make
<ConcatInputSection
>(
369 section
, data
.slice(off
, std::min(data
.size(), recordSize
)), align
);
370 subsections
.push_back({off
, isec
});
372 section
.doneSplitting
= true;
375 if (sectionType(sec
.flags
) == S_CSTRING_LITERALS
) {
377 fatal(toString(this) + ": " + sec
.segname
+ "," + sec
.sectname
+
378 " contains relocations, which is unsupported");
380 name
== section_names::objcMethname
|| config
->dedupStrings
;
382 make
<CStringInputSection
>(section
, data
, align
, dedupLiterals
);
383 // FIXME: parallelize this?
384 cast
<CStringInputSection
>(isec
)->splitIntoPieces();
385 section
.subsections
.push_back({0, isec
});
386 } else if (isWordLiteralSection(sec
.flags
)) {
388 fatal(toString(this) + ": " + sec
.segname
+ "," + sec
.sectname
+
389 " contains relocations, which is unsupported");
390 InputSection
*isec
= make
<WordLiteralInputSection
>(section
, data
, align
);
391 section
.subsections
.push_back({0, isec
});
392 } else if (auto recordSize
= getRecordSize(segname
, name
)) {
393 splitRecords(*recordSize
);
394 } else if (name
== section_names::ehFrame
&&
395 segname
== segment_names::text
) {
396 splitEhFrames(data
, *sections
.back());
397 } else if (segname
== segment_names::llvm
) {
398 if (config
->callGraphProfileSort
&& name
== section_names::cgProfile
)
399 checkError(parseCallGraph(data
, callGraph
));
400 // ld64 does not appear to emit contents from sections within the __LLVM
401 // segment. Symbols within those sections point to bitcode metadata
402 // instead of actual symbols. Global symbols within those sections could
403 // have the same name without causing duplicate symbol errors. To avoid
404 // spurious duplicate symbol errors, we do not parse these sections.
405 // TODO: Evaluate whether the bitcode metadata is needed.
406 } else if (name
== section_names::objCImageInfo
&&
407 segname
== segment_names::data
) {
408 objCImageInfo
= data
;
410 if (name
== section_names::addrSig
)
411 addrSigSection
= sections
.back();
413 auto *isec
= make
<ConcatInputSection
>(section
, data
, align
);
414 if (isDebugSection(isec
->getFlags()) &&
415 isec
->getSegName() == segment_names::dwarf
) {
416 // Instead of emitting DWARF sections, we emit STABS symbols to the
417 // object files that contain them. We filter them out early to avoid
418 // parsing their relocations unnecessarily.
419 debugSections
.push_back(isec
);
421 section
.subsections
.push_back({0, isec
});
427 void ObjFile::splitEhFrames(ArrayRef
<uint8_t> data
, Section
&ehFrameSection
) {
428 EhReader
reader(this, data
, /*dataOff=*/0);
430 while (off
< reader
.size()) {
431 uint64_t frameOff
= off
;
432 uint64_t length
= reader
.readLength(&off
);
435 uint64_t fullLength
= length
+ (off
- frameOff
);
437 // We hard-code an alignment of 1 here because we don't actually want our
438 // EH frames to be aligned to the section alignment. EH frame decoders don't
439 // expect this alignment. Moreover, each EH frame must start where the
440 // previous one ends, and where it ends is indicated by the length field.
441 // Unless we update the length field (troublesome), we should keep the
443 // Note that we still want to preserve the alignment of the overall section,
444 // just not of the individual EH frames.
445 ehFrameSection
.subsections
.push_back(
446 {frameOff
, make
<ConcatInputSection
>(ehFrameSection
,
447 data
.slice(frameOff
, fullLength
),
450 ehFrameSection
.doneSplitting
= true;
454 static Section
*findContainingSection(const std::vector
<Section
*> §ions
,
456 static_assert(std::is_same
<uint64_t, T
>::value
||
457 std::is_same
<uint32_t, T
>::value
,
458 "unexpected type for offset");
459 auto it
= std::prev(llvm::upper_bound(
461 [](uint64_t value
, const Section
*sec
) { return value
< sec
->addr
; }));
462 *offset
-= (*it
)->addr
;
466 // Find the subsection corresponding to the greatest section offset that is <=
467 // that of the given offset.
469 // offset: an offset relative to the start of the original InputSection (before
470 // any subsection splitting has occurred). It will be updated to represent the
471 // same location as an offset relative to the start of the containing
474 static InputSection
*findContainingSubsection(const Section
§ion
,
476 static_assert(std::is_same
<uint64_t, T
>::value
||
477 std::is_same
<uint32_t, T
>::value
,
478 "unexpected type for offset");
479 auto it
= std::prev(llvm::upper_bound(
480 section
.subsections
, *offset
,
481 [](uint64_t value
, Subsection subsec
) { return value
< subsec
.offset
; }));
482 *offset
-= it
->offset
;
486 // Find a symbol at offset `off` within `isec`.
487 static Defined
*findSymbolAtOffset(const ConcatInputSection
*isec
,
489 auto it
= llvm::lower_bound(isec
->symbols
, off
, [](Defined
*d
, uint64_t off
) {
490 return d
->value
< off
;
492 // The offset should point at the exact address of a symbol (with no addend.)
493 if (it
== isec
->symbols
.end() || (*it
)->value
!= off
) {
494 assert(isec
->wasCoalesced
);
500 template <class SectionHeader
>
501 static bool validateRelocationInfo(InputFile
*file
, const SectionHeader
&sec
,
502 relocation_info rel
) {
503 const RelocAttrs
&relocAttrs
= target
->getRelocAttrs(rel
.r_type
);
505 auto message
= [relocAttrs
, file
, sec
, rel
, &valid
](const Twine
&diagnostic
) {
507 return (relocAttrs
.name
+ " relocation " + diagnostic
+ " at offset " +
508 std::to_string(rel
.r_address
) + " of " + sec
.segname
+ "," +
509 sec
.sectname
+ " in " + toString(file
))
513 if (!relocAttrs
.hasAttr(RelocAttrBits::LOCAL
) && !rel
.r_extern
)
514 error(message("must be extern"));
515 if (relocAttrs
.hasAttr(RelocAttrBits::PCREL
) != rel
.r_pcrel
)
516 error(message(Twine("must ") + (rel
.r_pcrel
? "not " : "") +
518 if (isThreadLocalVariables(sec
.flags
) &&
519 !relocAttrs
.hasAttr(RelocAttrBits::UNSIGNED
))
520 error(message("not allowed in thread-local section, must be UNSIGNED"));
521 if (rel
.r_length
< 2 || rel
.r_length
> 3 ||
522 !relocAttrs
.hasAttr(static_cast<RelocAttrBits
>(1 << rel
.r_length
))) {
523 static SmallVector
<StringRef
, 4> widths
{"0", "4", "8", "4 or 8"};
524 error(message("has width " + std::to_string(1 << rel
.r_length
) +
525 " bytes, but must be " +
526 widths
[(static_cast<int>(relocAttrs
.bits
) >> 2) & 3] +
532 template <class SectionHeader
>
533 void ObjFile::parseRelocations(ArrayRef
<SectionHeader
> sectionHeaders
,
534 const SectionHeader
&sec
, Section
§ion
) {
535 auto *buf
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart());
536 ArrayRef
<relocation_info
> relInfos(
537 reinterpret_cast<const relocation_info
*>(buf
+ sec
.reloff
), sec
.nreloc
);
539 Subsections
&subsections
= section
.subsections
;
540 auto subsecIt
= subsections
.rbegin();
541 for (size_t i
= 0; i
< relInfos
.size(); i
++) {
542 // Paired relocations serve as Mach-O's method for attaching a
543 // supplemental datum to a primary relocation record. ELF does not
544 // need them because the *_RELOC_RELA records contain the extra
545 // addend field, vs. *_RELOC_REL which omit the addend.
547 // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
548 // and the paired *_RELOC_UNSIGNED record holds the minuend. The
549 // datum for each is a symbolic address. The result is the offset
550 // between two addresses.
552 // The ARM64_RELOC_ADDEND record holds the addend, and the paired
553 // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
554 // base symbolic address.
556 // Note: X86 does not use *_RELOC_ADDEND because it can embed an addend into
557 // the instruction stream. On X86, a relocatable address field always
558 // occupies an entire contiguous sequence of byte(s), so there is no need to
559 // merge opcode bits with address bits. Therefore, it's easy and convenient
560 // to store addends in the instruction-stream bytes that would otherwise
561 // contain zeroes. By contrast, RISC ISAs such as ARM64 mix opcode bits with
562 // address bits so that bitwise arithmetic is necessary to extract and
563 // insert them. Storing addends in the instruction stream is possible, but
564 // inconvenient and more costly at link time.
566 relocation_info relInfo
= relInfos
[i
];
568 target
->hasAttr(relInfo
.r_type
, RelocAttrBits::SUBTRAHEND
);
569 int64_t pairedAddend
= 0;
570 if (target
->hasAttr(relInfo
.r_type
, RelocAttrBits::ADDEND
)) {
571 pairedAddend
= SignExtend64
<24>(relInfo
.r_symbolnum
);
572 relInfo
= relInfos
[++i
];
574 assert(i
< relInfos
.size());
575 if (!validateRelocationInfo(this, sec
, relInfo
))
577 if (relInfo
.r_address
& R_SCATTERED
)
578 fatal("TODO: Scattered relocations not supported");
580 int64_t embeddedAddend
= target
->getEmbeddedAddend(mb
, sec
.offset
, relInfo
);
581 assert(!(embeddedAddend
&& pairedAddend
));
582 int64_t totalAddend
= pairedAddend
+ embeddedAddend
;
584 r
.type
= relInfo
.r_type
;
585 r
.pcrel
= relInfo
.r_pcrel
;
586 r
.length
= relInfo
.r_length
;
587 r
.offset
= relInfo
.r_address
;
588 if (relInfo
.r_extern
) {
589 r
.referent
= symbols
[relInfo
.r_symbolnum
];
590 r
.addend
= isSubtrahend
? 0 : totalAddend
;
592 assert(!isSubtrahend
);
593 const SectionHeader
&referentSecHead
=
594 sectionHeaders
[relInfo
.r_symbolnum
- 1];
595 uint64_t referentOffset
;
596 if (relInfo
.r_pcrel
) {
597 // The implicit addend for pcrel section relocations is the pcrel offset
598 // in terms of the addresses in the input file. Here we adjust it so
599 // that it describes the offset from the start of the referent section.
600 // FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
601 // have pcrel section relocations. We may want to factor this out into
602 // the arch-specific .cpp file.
603 assert(target
->hasAttr(r
.type
, RelocAttrBits::BYTE4
));
604 referentOffset
= sec
.addr
+ relInfo
.r_address
+ 4 + totalAddend
-
605 referentSecHead
.addr
;
607 // The addend for a non-pcrel relocation is its absolute address.
608 referentOffset
= totalAddend
- referentSecHead
.addr
;
610 r
.referent
= findContainingSubsection(*sections
[relInfo
.r_symbolnum
- 1],
612 r
.addend
= referentOffset
;
615 // Find the subsection that this relocation belongs to.
616 // Though not required by the Mach-O format, clang and gcc seem to emit
617 // relocations in order, so let's take advantage of it. However, ld64 emits
618 // unsorted relocations (in `-r` mode), so we have a fallback for that
620 InputSection
*subsec
;
621 while (subsecIt
!= subsections
.rend() && subsecIt
->offset
> r
.offset
)
623 if (subsecIt
== subsections
.rend() ||
624 subsecIt
->offset
+ subsecIt
->isec
->getSize() <= r
.offset
) {
625 subsec
= findContainingSubsection(section
, &r
.offset
);
626 // Now that we know the relocs are unsorted, avoid trying the 'fast path'
627 // for the other relocations.
628 subsecIt
= subsections
.rend();
630 subsec
= subsecIt
->isec
;
631 r
.offset
-= subsecIt
->offset
;
633 subsec
->relocs
.push_back(r
);
636 relocation_info minuendInfo
= relInfos
[++i
];
637 // SUBTRACTOR relocations should always be followed by an UNSIGNED one
638 // attached to the same address.
639 assert(target
->hasAttr(minuendInfo
.r_type
, RelocAttrBits::UNSIGNED
) &&
640 relInfo
.r_address
== minuendInfo
.r_address
);
642 p
.type
= minuendInfo
.r_type
;
643 if (minuendInfo
.r_extern
) {
644 p
.referent
= symbols
[minuendInfo
.r_symbolnum
];
645 p
.addend
= totalAddend
;
647 uint64_t referentOffset
=
648 totalAddend
- sectionHeaders
[minuendInfo
.r_symbolnum
- 1].addr
;
649 p
.referent
= findContainingSubsection(
650 *sections
[minuendInfo
.r_symbolnum
- 1], &referentOffset
);
651 p
.addend
= referentOffset
;
653 subsec
->relocs
.push_back(p
);
658 template <class NList
>
659 static macho::Symbol
*createDefined(const NList
&sym
, StringRef name
,
660 InputSection
*isec
, uint64_t value
,
661 uint64_t size
, bool forceHidden
) {
662 // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
663 // N_EXT: Global symbols. These go in the symbol table during the link,
664 // and also in the export table of the output so that the dynamic
666 // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped. These go in the
667 // symbol table during the link so that duplicates are
668 // either reported (for non-weak symbols) or merged
669 // (for weak symbols), but they do not go in the export
670 // table of the output.
671 // N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
672 // object files) may produce them. LLD does not yet support -r.
673 // These are translation-unit scoped, identical to the `0` case.
674 // 0: Translation-unit scoped. These are not in the symbol table during
675 // link, and not in the export table of the output either.
676 bool isWeakDefCanBeHidden
=
677 (sym
.n_desc
& (N_WEAK_DEF
| N_WEAK_REF
)) == (N_WEAK_DEF
| N_WEAK_REF
);
679 assert(!(sym
.n_desc
& N_ARM_THUMB_DEF
) && "ARM32 arch is not supported");
681 if (sym
.n_type
& N_EXT
) {
682 // -load_hidden makes us treat global symbols as linkage unit scoped.
683 // Duplicates are reported but the symbol does not go in the export trie.
684 bool isPrivateExtern
= sym
.n_type
& N_PEXT
|| forceHidden
;
686 // lld's behavior for merging symbols is slightly different from ld64:
687 // ld64 picks the winning symbol based on several criteria (see
688 // pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
689 // just merges metadata and keeps the contents of the first symbol
690 // with that name (see SymbolTable::addDefined). For:
691 // * inline function F in a TU built with -fvisibility-inlines-hidden
692 // * and inline function F in another TU built without that flag
693 // ld64 will pick the one from the file built without
694 // -fvisibility-inlines-hidden.
695 // lld will instead pick the one listed first on the link command line and
696 // give it visibility as if the function was built without
697 // -fvisibility-inlines-hidden.
698 // If both functions have the same contents, this will have the same
699 // behavior. If not, it won't, but the input had an ODR violation in
702 // Similarly, merging a symbol
703 // that's isPrivateExtern and not isWeakDefCanBeHidden with one
704 // that's not isPrivateExtern but isWeakDefCanBeHidden technically
705 // should produce one
706 // that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
707 // with ld64's semantics, because it means the non-private-extern
708 // definition will continue to take priority if more private extern
709 // definitions are encountered. With lld's semantics there's no observable
710 // difference between a symbol that's isWeakDefCanBeHidden(autohide) or one
711 // that's privateExtern -- neither makes it into the dynamic symbol table,
712 // unless the autohide symbol is explicitly exported.
713 // But if a symbol is both privateExtern and autohide then it can't
715 // So we nullify the autohide flag when privateExtern is present
716 // and promote the symbol to privateExtern when it is not already.
717 if (isWeakDefCanBeHidden
&& isPrivateExtern
)
718 isWeakDefCanBeHidden
= false;
719 else if (isWeakDefCanBeHidden
)
720 isPrivateExtern
= true;
721 return symtab
->addDefined(
722 name
, isec
->getFile(), isec
, value
, size
, sym
.n_desc
& N_WEAK_DEF
,
723 isPrivateExtern
, sym
.n_desc
& REFERENCED_DYNAMICALLY
,
724 sym
.n_desc
& N_NO_DEAD_STRIP
, isWeakDefCanBeHidden
);
726 bool includeInSymtab
= !isPrivateLabel(name
) && !isEhFrameSection(isec
);
727 return make
<Defined
>(
728 name
, isec
->getFile(), isec
, value
, size
, sym
.n_desc
& N_WEAK_DEF
,
729 /*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab
,
730 sym
.n_desc
& REFERENCED_DYNAMICALLY
, sym
.n_desc
& N_NO_DEAD_STRIP
);
733 // Absolute symbols are defined symbols that do not have an associated
734 // InputSection. They cannot be weak.
735 template <class NList
>
736 static macho::Symbol
*createAbsolute(const NList
&sym
, InputFile
*file
,
737 StringRef name
, bool forceHidden
) {
738 assert(!(sym
.n_desc
& N_ARM_THUMB_DEF
) && "ARM32 arch is not supported");
740 if (sym
.n_type
& N_EXT
) {
741 bool isPrivateExtern
= sym
.n_type
& N_PEXT
|| forceHidden
;
742 return symtab
->addDefined(name
, file
, nullptr, sym
.n_value
, /*size=*/0,
743 /*isWeakDef=*/false, isPrivateExtern
,
744 /*isReferencedDynamically=*/false,
745 sym
.n_desc
& N_NO_DEAD_STRIP
,
746 /*isWeakDefCanBeHidden=*/false);
748 return make
<Defined
>(name
, file
, nullptr, sym
.n_value
, /*size=*/0,
750 /*isExternal=*/false, /*isPrivateExtern=*/false,
751 /*includeInSymtab=*/true,
752 /*isReferencedDynamically=*/false,
753 sym
.n_desc
& N_NO_DEAD_STRIP
);
756 template <class NList
>
757 macho::Symbol
*ObjFile::parseNonSectionSymbol(const NList
&sym
,
758 const char *strtab
) {
759 StringRef name
= StringRef(strtab
+ sym
.n_strx
);
760 uint8_t type
= sym
.n_type
& N_TYPE
;
761 bool isPrivateExtern
= sym
.n_type
& N_PEXT
|| forceHidden
;
764 return sym
.n_value
== 0
765 ? symtab
->addUndefined(name
, this, sym
.n_desc
& N_WEAK_REF
)
766 : symtab
->addCommon(name
, this, sym
.n_value
,
767 1 << GET_COMM_ALIGN(sym
.n_desc
),
770 return createAbsolute(sym
, this, name
, forceHidden
);
772 // Not much point in making local aliases -- relocs in the current file can
773 // just refer to the actual symbol itself. ld64 ignores these symbols too.
774 if (!(sym
.n_type
& N_EXT
))
776 StringRef aliasedName
= StringRef(strtab
+ sym
.n_value
);
777 // isPrivateExtern is the only symbol flag that has an impact on the final
779 auto *alias
= make
<AliasSymbol
>(this, name
, aliasedName
, isPrivateExtern
);
780 aliases
.push_back(alias
);
784 error("TODO: support symbols of type N_PBUD");
788 "N_SECT symbols should not be passed to parseNonSectionSymbol");
790 llvm_unreachable("invalid symbol type");
794 template <class NList
> static bool isUndef(const NList
&sym
) {
795 return (sym
.n_type
& N_TYPE
) == N_UNDF
&& sym
.n_value
== 0;
799 void ObjFile::parseSymbols(ArrayRef
<typename
LP::section
> sectionHeaders
,
800 ArrayRef
<typename
LP::nlist
> nList
,
801 const char *strtab
, bool subsectionsViaSymbols
) {
802 using NList
= typename
LP::nlist
;
804 // Groups indices of the symbols by the sections that contain them.
805 std::vector
<std::vector
<uint32_t>> symbolsBySection(sections
.size());
806 symbols
.resize(nList
.size());
807 SmallVector
<unsigned, 32> undefineds
;
808 for (uint32_t i
= 0; i
< nList
.size(); ++i
) {
809 const NList
&sym
= nList
[i
];
811 // Ignore debug symbols for now.
812 // FIXME: may need special handling.
813 if (sym
.n_type
& N_STAB
)
816 if ((sym
.n_type
& N_TYPE
) == N_SECT
) {
817 Subsections
&subsections
= sections
[sym
.n_sect
- 1]->subsections
;
818 // parseSections() may have chosen not to parse this section.
819 if (subsections
.empty())
821 symbolsBySection
[sym
.n_sect
- 1].push_back(i
);
822 } else if (isUndef(sym
)) {
823 undefineds
.push_back(i
);
825 symbols
[i
] = parseNonSectionSymbol(sym
, strtab
);
829 for (size_t i
= 0; i
< sections
.size(); ++i
) {
830 Subsections
&subsections
= sections
[i
]->subsections
;
831 if (subsections
.empty())
833 std::vector
<uint32_t> &symbolIndices
= symbolsBySection
[i
];
834 uint64_t sectionAddr
= sectionHeaders
[i
].addr
;
835 uint32_t sectionAlign
= 1u << sectionHeaders
[i
].align
;
837 // Some sections have already been split into subsections during
838 // parseSections(), so we simply need to match Symbols to the corresponding
840 if (sections
[i
]->doneSplitting
) {
841 for (size_t j
= 0; j
< symbolIndices
.size(); ++j
) {
842 const uint32_t symIndex
= symbolIndices
[j
];
843 const NList
&sym
= nList
[symIndex
];
844 StringRef name
= strtab
+ sym
.n_strx
;
845 uint64_t symbolOffset
= sym
.n_value
- sectionAddr
;
847 findContainingSubsection(*sections
[i
], &symbolOffset
);
848 if (symbolOffset
!= 0) {
849 error(toString(*sections
[i
]) + ": symbol " + name
+
850 " at misaligned offset");
854 createDefined(sym
, name
, isec
, 0, isec
->getSize(), forceHidden
);
858 sections
[i
]->doneSplitting
= true;
860 auto getSymName
= [strtab
](const NList
& sym
) -> StringRef
{
861 return StringRef(strtab
+ sym
.n_strx
);
864 // Calculate symbol sizes and create subsections by splitting the sections
865 // along symbol boundaries.
866 // We populate subsections by repeatedly splitting the last (highest
867 // address) subsection.
868 llvm::stable_sort(symbolIndices
, [&](uint32_t lhs
, uint32_t rhs
) {
869 // Put extern weak symbols after other symbols at the same address so
870 // that weak symbol coalescing works correctly. See
871 // SymbolTable::addDefined() for details.
872 if (nList
[lhs
].n_value
== nList
[rhs
].n_value
&&
873 nList
[lhs
].n_type
& N_EXT
&& nList
[rhs
].n_type
& N_EXT
)
874 return !(nList
[lhs
].n_desc
& N_WEAK_DEF
) && (nList
[rhs
].n_desc
& N_WEAK_DEF
);
875 return nList
[lhs
].n_value
< nList
[rhs
].n_value
;
877 for (size_t j
= 0; j
< symbolIndices
.size(); ++j
) {
878 const uint32_t symIndex
= symbolIndices
[j
];
879 const NList
&sym
= nList
[symIndex
];
880 StringRef name
= getSymName(sym
);
881 Subsection
&subsec
= subsections
.back();
882 InputSection
*isec
= subsec
.isec
;
884 uint64_t subsecAddr
= sectionAddr
+ subsec
.offset
;
885 size_t symbolOffset
= sym
.n_value
- subsecAddr
;
886 uint64_t symbolSize
=
887 j
+ 1 < symbolIndices
.size()
888 ? nList
[symbolIndices
[j
+ 1]].n_value
- sym
.n_value
889 : isec
->data
.size() - symbolOffset
;
890 // There are 4 cases where we do not need to create a new subsection:
891 // 1. If the input file does not use subsections-via-symbols.
892 // 2. Multiple symbols at the same address only induce one subsection.
893 // (The symbolOffset == 0 check covers both this case as well as
894 // the first loop iteration.)
895 // 3. Alternative entry points do not induce new subsections.
896 // 4. If we have a literal section (e.g. __cstring and __literal4).
897 if (!subsectionsViaSymbols
|| symbolOffset
== 0 ||
898 sym
.n_desc
& N_ALT_ENTRY
|| !isa
<ConcatInputSection
>(isec
)) {
899 isec
->hasAltEntry
= symbolOffset
!= 0;
900 symbols
[symIndex
] = createDefined(sym
, name
, isec
, symbolOffset
,
901 symbolSize
, forceHidden
);
904 auto *concatIsec
= cast
<ConcatInputSection
>(isec
);
906 auto *nextIsec
= make
<ConcatInputSection
>(*concatIsec
);
907 nextIsec
->wasCoalesced
= false;
908 if (isZeroFill(isec
->getFlags())) {
909 // Zero-fill sections have NULL data.data() non-zero data.size()
910 nextIsec
->data
= {nullptr, isec
->data
.size() - symbolOffset
};
911 isec
->data
= {nullptr, symbolOffset
};
913 nextIsec
->data
= isec
->data
.slice(symbolOffset
);
914 isec
->data
= isec
->data
.slice(0, symbolOffset
);
917 // By construction, the symbol will be at offset zero in the new
919 symbols
[symIndex
] = createDefined(sym
, name
, nextIsec
, /*value=*/0,
920 symbolSize
, forceHidden
);
921 // TODO: ld64 appears to preserve the original alignment as well as each
922 // subsection's offset from the last aligned address. We should consider
923 // emulating that behavior.
924 nextIsec
->align
= MinAlign(sectionAlign
, sym
.n_value
);
925 subsections
.push_back({sym
.n_value
- sectionAddr
, nextIsec
});
929 // Undefined symbols can trigger recursive fetch from Archives due to
930 // LazySymbols. Process defined symbols first so that the relative order
931 // between a defined symbol and an undefined symbol does not change the
932 // symbol resolution behavior. In addition, a set of interconnected symbols
933 // will all be resolved to the same file, instead of being resolved to
935 for (unsigned i
: undefineds
)
936 symbols
[i
] = parseNonSectionSymbol(nList
[i
], strtab
);
939 OpaqueFile::OpaqueFile(MemoryBufferRef mb
, StringRef segName
,
941 : InputFile(OpaqueKind
, mb
) {
942 const auto *buf
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart());
943 ArrayRef
<uint8_t> data
= {buf
, mb
.getBufferSize()};
944 sections
.push_back(make
<Section
>(/*file=*/this, segName
.take_front(16),
945 sectName
.take_front(16),
946 /*flags=*/0, /*addr=*/0));
947 Section
§ion
= *sections
.back();
948 ConcatInputSection
*isec
= make
<ConcatInputSection
>(section
, data
);
950 section
.subsections
.push_back({0, isec
});
954 void ObjFile::parseLinkerOptions(SmallVectorImpl
<StringRef
> &LCLinkerOptions
) {
955 using Header
= typename
LP::mach_header
;
956 auto *hdr
= reinterpret_cast<const Header
*>(mb
.getBufferStart());
958 for (auto *cmd
: findCommands
<linker_option_command
>(hdr
, LC_LINKER_OPTION
)) {
959 StringRef data
{reinterpret_cast<const char *>(cmd
+ 1),
960 cmd
->cmdsize
- sizeof(linker_option_command
)};
961 parseLCLinkerOption(LCLinkerOptions
, this, cmd
->count
, data
);
965 SmallVector
<StringRef
> macho::unprocessedLCLinkerOptions
;
966 ObjFile::ObjFile(MemoryBufferRef mb
, uint32_t modTime
, StringRef archiveName
,
967 bool lazy
, bool forceHidden
, bool compatArch
,
968 bool builtFromBitcode
)
969 : InputFile(ObjKind
, mb
, lazy
), modTime(modTime
), forceHidden(forceHidden
),
970 builtFromBitcode(builtFromBitcode
) {
971 this->archiveName
= std::string(archiveName
);
972 this->compatArch
= compatArch
;
974 if (target
->wordSize
== 8)
979 if (target
->wordSize
== 8)
986 template <class LP
> void ObjFile::parse() {
987 using Header
= typename
LP::mach_header
;
988 using SegmentCommand
= typename
LP::segment_command
;
989 using SectionHeader
= typename
LP::section
;
990 using NList
= typename
LP::nlist
;
992 auto *buf
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart());
993 auto *hdr
= reinterpret_cast<const Header
*>(mb
.getBufferStart());
995 // If we've already checked the arch, then don't need to check again.
998 if (!(compatArch
= compatWithTargetArch(this, hdr
)))
1001 // We will resolve LC linker options once all native objects are loaded after
1003 SmallVector
<StringRef
, 4> LCLinkerOptions
;
1004 parseLinkerOptions
<LP
>(LCLinkerOptions
);
1005 unprocessedLCLinkerOptions
.append(LCLinkerOptions
);
1007 ArrayRef
<SectionHeader
> sectionHeaders
;
1008 if (const load_command
*cmd
= findCommand(hdr
, LP::segmentLCType
)) {
1009 auto *c
= reinterpret_cast<const SegmentCommand
*>(cmd
);
1010 sectionHeaders
= ArrayRef
<SectionHeader
>{
1011 reinterpret_cast<const SectionHeader
*>(c
+ 1), c
->nsects
};
1012 parseSections(sectionHeaders
);
1015 // TODO: Error on missing LC_SYMTAB?
1016 if (const load_command
*cmd
= findCommand(hdr
, LC_SYMTAB
)) {
1017 auto *c
= reinterpret_cast<const symtab_command
*>(cmd
);
1018 ArrayRef
<NList
> nList(reinterpret_cast<const NList
*>(buf
+ c
->symoff
),
1020 const char *strtab
= reinterpret_cast<const char *>(buf
) + c
->stroff
;
1021 bool subsectionsViaSymbols
= hdr
->flags
& MH_SUBSECTIONS_VIA_SYMBOLS
;
1022 parseSymbols
<LP
>(sectionHeaders
, nList
, strtab
, subsectionsViaSymbols
);
1025 // The relocations may refer to the symbols, so we parse them after we have
1026 // parsed all the symbols.
1027 for (size_t i
= 0, n
= sections
.size(); i
< n
; ++i
)
1028 if (!sections
[i
]->subsections
.empty())
1029 parseRelocations(sectionHeaders
, sectionHeaders
[i
], *sections
[i
]);
1033 Section
*ehFrameSection
= nullptr;
1034 Section
*compactUnwindSection
= nullptr;
1035 for (Section
*sec
: sections
) {
1036 Section
**s
= StringSwitch
<Section
**>(sec
->name
)
1037 .Case(section_names::compactUnwind
, &compactUnwindSection
)
1038 .Case(section_names::ehFrame
, &ehFrameSection
)
1043 if (compactUnwindSection
)
1044 registerCompactUnwind(*compactUnwindSection
);
1046 registerEhFrames(*ehFrameSection
);
1049 template <class LP
> void ObjFile::parseLazy() {
1050 using Header
= typename
LP::mach_header
;
1051 using NList
= typename
LP::nlist
;
1053 auto *buf
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart());
1054 auto *hdr
= reinterpret_cast<const Header
*>(mb
.getBufferStart());
1058 if (!(compatArch
= compatWithTargetArch(this, hdr
)))
1061 const load_command
*cmd
= findCommand(hdr
, LC_SYMTAB
);
1064 auto *c
= reinterpret_cast<const symtab_command
*>(cmd
);
1065 ArrayRef
<NList
> nList(reinterpret_cast<const NList
*>(buf
+ c
->symoff
),
1067 const char *strtab
= reinterpret_cast<const char *>(buf
) + c
->stroff
;
1068 symbols
.resize(nList
.size());
1069 for (const auto &[i
, sym
] : llvm::enumerate(nList
)) {
1070 if ((sym
.n_type
& N_EXT
) && !isUndef(sym
)) {
1071 // TODO: Bound checking
1072 StringRef name
= strtab
+ sym
.n_strx
;
1073 symbols
[i
] = symtab
->addLazyObject(name
, *this);
1080 void ObjFile::parseDebugInfo() {
1081 std::unique_ptr
<DwarfObject
> dObj
= DwarfObject::create(this);
1085 // We do not re-use the context from getDwarf() here as that function
1086 // constructs an expensive DWARFCache object.
1087 auto *ctx
= make
<DWARFContext
>(
1088 std::move(dObj
), "",
1090 warn(toString(this) + ": " + toString(std::move(err
)));
1092 [&](Error warning
) {
1093 warn(toString(this) + ": " + toString(std::move(warning
)));
1096 // TODO: Since object files can contain a lot of DWARF info, we should verify
1097 // that we are parsing just the info we need
1098 const DWARFContext::compile_unit_range
&units
= ctx
->compile_units();
1099 // FIXME: There can be more than one compile unit per object file. See
1101 auto it
= units
.begin();
1102 compileUnit
= it
!= units
.end() ? it
->get() : nullptr;
1105 ArrayRef
<data_in_code_entry
> ObjFile::getDataInCode() const {
1106 const auto *buf
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart());
1107 const load_command
*cmd
= findCommand(buf
, LC_DATA_IN_CODE
);
1110 const auto *c
= reinterpret_cast<const linkedit_data_command
*>(cmd
);
1111 return {reinterpret_cast<const data_in_code_entry
*>(buf
+ c
->dataoff
),
1112 c
->datasize
/ sizeof(data_in_code_entry
)};
1115 ArrayRef
<uint8_t> ObjFile::getOptimizationHints() const {
1116 const auto *buf
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart());
1118 findCommand
<linkedit_data_command
>(buf
, LC_LINKER_OPTIMIZATION_HINT
))
1119 return {buf
+ cmd
->dataoff
, cmd
->datasize
};
1123 // Create pointers from symbols to their associated compact unwind entries.
1124 void ObjFile::registerCompactUnwind(Section
&compactUnwindSection
) {
1125 for (const Subsection
&subsection
: compactUnwindSection
.subsections
) {
1126 ConcatInputSection
*isec
= cast
<ConcatInputSection
>(subsection
.isec
);
1127 // Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed
1128 // their addends in its data. Thus if ICF operated naively and compared the
1129 // entire contents of each CUE, entries with identical unwind info but e.g.
1130 // belonging to different functions would never be considered equivalent. To
1131 // work around this problem, we remove some parts of the data containing the
1132 // embedded addends. In particular, we remove the function address and LSDA
1133 // pointers. Since these locations are at the start and end of the entry,
1134 // we can do this using a simple, efficient slice rather than performing a
1135 // copy. We are not losing any information here because the embedded
1136 // addends have already been parsed in the corresponding Reloc structs.
1138 // Removing these pointers would not be safe if they were pointers to
1139 // absolute symbols. In that case, there would be no corresponding
1140 // relocation. However, (AFAIK) MC cannot emit references to absolute
1141 // symbols for either the function address or the LSDA. However, it *can* do
1142 // so for the personality pointer, so we are not slicing that field away.
1144 // Note that we do not adjust the offsets of the corresponding relocations;
1145 // instead, we rely on `relocateCompactUnwind()` to correctly handle these
1146 // truncated input sections.
1147 isec
->data
= isec
->data
.slice(target
->wordSize
, 8 + target
->wordSize
);
1148 uint32_t encoding
= read32le(isec
->data
.data() + sizeof(uint32_t));
1149 // llvm-mc omits CU entries for functions that need DWARF encoding, but
1150 // `ld -r` doesn't. We can ignore them because we will re-synthesize these
1151 // CU entries from the DWARF info during the output phase.
1152 if ((encoding
& static_cast<uint32_t>(UNWIND_MODE_MASK
)) ==
1153 target
->modeDwarfEncoding
)
1156 ConcatInputSection
*referentIsec
;
1157 for (auto it
= isec
->relocs
.begin(); it
!= isec
->relocs
.end();) {
1159 // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
1160 if (r
.offset
!= 0) {
1164 uint64_t add
= r
.addend
;
1165 if (auto *sym
= cast_or_null
<Defined
>(r
.referent
.dyn_cast
<Symbol
*>())) {
1166 // Check whether the symbol defined in this file is the prevailing one.
1167 // Skip if it is e.g. a weak def that didn't prevail.
1168 if (sym
->getFile() != this) {
1173 referentIsec
= cast
<ConcatInputSection
>(sym
->isec());
1176 cast
<ConcatInputSection
>(r
.referent
.dyn_cast
<InputSection
*>());
1178 // Unwind info lives in __DATA, and finalization of __TEXT will occur
1179 // before finalization of __DATA. Moreover, the finalization of unwind
1180 // info depends on the exact addresses that it references. So it is safe
1181 // for compact unwind to reference addresses in __TEXT, but not addresses
1182 // in any other segment.
1183 if (referentIsec
->getSegName() != segment_names::text
)
1184 error(isec
->getLocation(r
.offset
) + " references section " +
1185 referentIsec
->getName() + " which is not in segment __TEXT");
1186 // The functionAddress relocations are typically section relocations.
1187 // However, unwind info operates on a per-symbol basis, so we search for
1188 // the function symbol here.
1189 Defined
*d
= findSymbolAtOffset(referentIsec
, add
);
1194 d
->originalUnwindEntry
= isec
;
1195 // Now that the symbol points to the unwind entry, we can remove the reloc
1196 // that points from the unwind entry back to the symbol.
1198 // First, the symbol keeps the unwind entry alive (and not vice versa), so
1199 // this keeps dead-stripping simple.
1201 // Moreover, it reduces the work that ICF needs to do to figure out if
1202 // functions with unwind info are foldable.
1204 // However, this does make it possible for ICF to fold CUEs that point to
1205 // distinct functions (if the CUEs are otherwise identical).
1206 // UnwindInfoSection takes care of this by re-duplicating the CUEs so that
1207 // each one can hold a distinct functionAddress value.
1209 // Given that clang emits relocations in reverse order of address, this
1210 // relocation should be at the end of the vector for most of our input
1211 // object files, so this erase() is typically an O(1) operation.
1212 it
= isec
->relocs
.erase(it
);
1218 macho::Symbol
*personalitySymbol
= nullptr;
1219 bool fdesHaveAug
= false;
1220 uint8_t lsdaPtrSize
= 0; // 0 => no LSDA
1221 uint8_t funcPtrSize
= 0;
1224 static uint8_t pointerEncodingToSize(uint8_t enc
) {
1225 switch (enc
& 0xf) {
1226 case dwarf::DW_EH_PE_absptr
:
1227 return target
->wordSize
;
1228 case dwarf::DW_EH_PE_sdata4
:
1230 case dwarf::DW_EH_PE_sdata8
:
1231 // ld64 doesn't actually support sdata8, but this seems simple enough...
1238 static CIE
parseCIE(const InputSection
*isec
, const EhReader
&reader
,
1240 // Handling the full generality of possible DWARF encodings would be a major
1241 // pain. We instead take advantage of our knowledge of how llvm-mc encodes
1242 // DWARF and handle just that.
1243 constexpr uint8_t expectedPersonalityEnc
=
1244 dwarf::DW_EH_PE_pcrel
| dwarf::DW_EH_PE_indirect
| dwarf::DW_EH_PE_sdata4
;
1247 uint8_t version
= reader
.readByte(&off
);
1248 if (version
!= 1 && version
!= 3)
1249 fatal("Expected CIE version of 1 or 3, got " + Twine(version
));
1250 StringRef aug
= reader
.readString(&off
);
1251 reader
.skipLeb128(&off
); // skip code alignment
1252 reader
.skipLeb128(&off
); // skip data alignment
1253 reader
.skipLeb128(&off
); // skip return address register
1254 reader
.skipLeb128(&off
); // skip aug data length
1255 uint64_t personalityAddrOff
= 0;
1256 for (char c
: aug
) {
1259 cie
.fdesHaveAug
= true;
1262 uint8_t personalityEnc
= reader
.readByte(&off
);
1263 if (personalityEnc
!= expectedPersonalityEnc
)
1264 reader
.failOn(off
, "unexpected personality encoding 0x" +
1265 Twine::utohexstr(personalityEnc
));
1266 personalityAddrOff
= off
;
1271 uint8_t lsdaEnc
= reader
.readByte(&off
);
1272 cie
.lsdaPtrSize
= pointerEncodingToSize(lsdaEnc
);
1273 if (cie
.lsdaPtrSize
== 0)
1274 reader
.failOn(off
, "unexpected LSDA encoding 0x" +
1275 Twine::utohexstr(lsdaEnc
));
1279 uint8_t pointerEnc
= reader
.readByte(&off
);
1280 cie
.funcPtrSize
= pointerEncodingToSize(pointerEnc
);
1281 if (cie
.funcPtrSize
== 0 || !(pointerEnc
& dwarf::DW_EH_PE_pcrel
))
1282 reader
.failOn(off
, "unexpected pointer encoding 0x" +
1283 Twine::utohexstr(pointerEnc
));
1290 if (personalityAddrOff
!= 0) {
1291 const auto *personalityReloc
= isec
->getRelocAt(personalityAddrOff
);
1292 if (!personalityReloc
)
1293 reader
.failOn(off
, "Failed to locate relocation for personality symbol");
1294 cie
.personalitySymbol
= personalityReloc
->referent
.get
<macho::Symbol
*>();
1299 // EH frame target addresses may be encoded as pcrel offsets. However, instead
1300 // of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
1301 // This function recovers the target address from the subtractors, essentially
1302 // performing the inverse operation of EhRelocator.
1304 // Concretely, we expect our relocations to write the value of `PC -
1305 // target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
1306 // points to a symbol plus an addend.
1308 // It is important that the minuend relocation point to a symbol within the
1309 // same section as the fixup value, since sections may get moved around.
1311 // For example, for arm64, llvm-mc emits relocations for the target function
1318 // ... multiple FDEs ...
1320 // <target function address - (ltmp + pcrel offset)>
1323 // If any of the FDEs in `multiple FDEs` get dead-stripped, then `FDE start`
1324 // will move to an earlier address, and `ltmp + pcrel offset` will no longer
1325 // reflect an accurate pcrel value. To avoid this problem, we "canonicalize"
1326 // our relocation by adding an `EH_Frame` symbol at `FDE start`, and updating
1327 // the reloc to be `target function address - (EH_Frame + new pcrel offset)`.
1329 // If `Invert` is set, then we instead expect `target_addr - PC` to be written
1331 template <bool Invert
= false>
1333 targetSymFromCanonicalSubtractor(const InputSection
*isec
,
1334 std::vector
<macho::Reloc
>::iterator relocIt
) {
1335 macho::Reloc
&subtrahend
= *relocIt
;
1336 macho::Reloc
&minuend
= *std::next(relocIt
);
1337 assert(target
->hasAttr(subtrahend
.type
, RelocAttrBits::SUBTRAHEND
));
1338 assert(target
->hasAttr(minuend
.type
, RelocAttrBits::UNSIGNED
));
1339 // Note: pcSym may *not* be exactly at the PC; there's usually a non-zero
1341 auto *pcSym
= cast
<Defined
>(subtrahend
.referent
.get
<macho::Symbol
*>());
1343 cast_or_null
<Defined
>(minuend
.referent
.dyn_cast
<macho::Symbol
*>());
1346 cast
<ConcatInputSection
>(minuend
.referent
.get
<InputSection
*>());
1347 target
= findSymbolAtOffset(targetIsec
, minuend
.addend
);
1350 std::swap(pcSym
, target
);
1351 if (pcSym
->isec() == isec
) {
1352 if (pcSym
->value
- (Invert
? -1 : 1) * minuend
.addend
!= subtrahend
.offset
)
1353 fatal("invalid FDE relocation in __eh_frame");
1355 // Ensure the pcReloc points to a symbol within the current EH frame.
1356 // HACK: we should really verify that the original relocation's semantics
1357 // are preserved. In particular, we should have
1358 // `oldSym->value + oldOffset == newSym + newOffset`. However, we don't
1359 // have an easy way to access the offsets from this point in the code; some
1360 // refactoring is needed for that.
1361 macho::Reloc
&pcReloc
= Invert
? minuend
: subtrahend
;
1362 pcReloc
.referent
= isec
->symbols
[0];
1363 assert(isec
->symbols
[0]->value
== 0);
1364 minuend
.addend
= pcReloc
.offset
* (Invert
? 1LL : -1LL);
1369 Defined
*findSymbolAtAddress(const std::vector
<Section
*> §ions
,
1371 Section
*sec
= findContainingSection(sections
, &addr
);
1372 auto *isec
= cast
<ConcatInputSection
>(findContainingSubsection(*sec
, &addr
));
1373 return findSymbolAtOffset(isec
, addr
);
1376 // For symbols that don't have compact unwind info, associate them with the more
1377 // general-purpose (and verbose) DWARF unwind info found in __eh_frame.
1379 // This requires us to parse the contents of __eh_frame. See EhFrame.h for a
1380 // description of its format.
1382 // While parsing, we also look for what MC calls "abs-ified" relocations -- they
1383 // are relocations which are implicitly encoded as offsets in the section data.
1384 // We convert them into explicit Reloc structs so that the EH frames can be
1385 // handled just like a regular ConcatInputSection later in our output phase.
1387 // We also need to handle the case where our input object file has explicit
1388 // relocations. This is the case when e.g. it's the output of `ld -r`. We only
1389 // look for the "abs-ified" relocation if an explicit relocation is absent.
1390 void ObjFile::registerEhFrames(Section
&ehFrameSection
) {
1391 DenseMap
<const InputSection
*, CIE
> cieMap
;
1392 for (const Subsection
&subsec
: ehFrameSection
.subsections
) {
1393 auto *isec
= cast
<ConcatInputSection
>(subsec
.isec
);
1394 uint64_t isecOff
= subsec
.offset
;
1396 // Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
1397 // that all EH frames have an associated symbol so that we can generate
1398 // subtractor relocs that reference them.
1399 if (isec
->symbols
.size() == 0)
1400 make
<Defined
>("EH_Frame", isec
->getFile(), isec
, /*value=*/0,
1401 isec
->getSize(), /*isWeakDef=*/false, /*isExternal=*/false,
1402 /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
1403 /*isReferencedDynamically=*/false,
1404 /*noDeadStrip=*/false);
1405 else if (isec
->symbols
[0]->value
!= 0)
1406 fatal("found symbol at unexpected offset in __eh_frame");
1408 EhReader
reader(this, isec
->data
, subsec
.offset
);
1409 size_t dataOff
= 0; // Offset from the start of the EH frame.
1410 reader
.skipValidLength(&dataOff
); // readLength() already validated this.
1411 // cieOffOff is the offset from the start of the EH frame to the cieOff
1412 // value, which is itself an offset from the current PC to a CIE.
1413 const size_t cieOffOff
= dataOff
;
1415 EhRelocator
ehRelocator(isec
);
1416 auto cieOffRelocIt
= llvm::find_if(
1417 isec
->relocs
, [=](const Reloc
&r
) { return r
.offset
== cieOffOff
; });
1418 InputSection
*cieIsec
= nullptr;
1419 if (cieOffRelocIt
!= isec
->relocs
.end()) {
1420 // We already have an explicit relocation for the CIE offset.
1422 targetSymFromCanonicalSubtractor
</*Invert=*/true>(isec
, cieOffRelocIt
)
1424 dataOff
+= sizeof(uint32_t);
1426 // If we haven't found a relocation, then the CIE offset is most likely
1427 // embedded in the section data (AKA an "abs-ified" reloc.). Parse that
1428 // and generate a Reloc struct.
1429 uint32_t cieMinuend
= reader
.readU32(&dataOff
);
1430 if (cieMinuend
== 0) {
1433 uint32_t cieOff
= isecOff
+ dataOff
- cieMinuend
;
1434 cieIsec
= findContainingSubsection(ehFrameSection
, &cieOff
);
1435 if (cieIsec
== nullptr)
1436 fatal("failed to find CIE");
1438 if (cieIsec
!= isec
)
1439 ehRelocator
.makeNegativePcRel(cieOffOff
, cieIsec
->symbols
[0],
1442 if (cieIsec
== isec
) {
1443 cieMap
[cieIsec
] = parseCIE(isec
, reader
, dataOff
);
1447 assert(cieMap
.count(cieIsec
));
1448 const CIE
&cie
= cieMap
[cieIsec
];
1449 // Offset of the function address within the EH frame.
1450 const size_t funcAddrOff
= dataOff
;
1451 uint64_t funcAddr
= reader
.readPointer(&dataOff
, cie
.funcPtrSize
) +
1452 ehFrameSection
.addr
+ isecOff
+ funcAddrOff
;
1453 uint32_t funcLength
= reader
.readPointer(&dataOff
, cie
.funcPtrSize
);
1454 size_t lsdaAddrOff
= 0; // Offset of the LSDA address within the EH frame.
1455 std::optional
<uint64_t> lsdaAddrOpt
;
1456 if (cie
.fdesHaveAug
) {
1457 reader
.skipLeb128(&dataOff
);
1458 lsdaAddrOff
= dataOff
;
1459 if (cie
.lsdaPtrSize
!= 0) {
1460 uint64_t lsdaOff
= reader
.readPointer(&dataOff
, cie
.lsdaPtrSize
);
1461 if (lsdaOff
!= 0) // FIXME possible to test this?
1462 lsdaAddrOpt
= ehFrameSection
.addr
+ isecOff
+ lsdaAddrOff
+ lsdaOff
;
1466 auto funcAddrRelocIt
= isec
->relocs
.end();
1467 auto lsdaAddrRelocIt
= isec
->relocs
.end();
1468 for (auto it
= isec
->relocs
.begin(); it
!= isec
->relocs
.end(); ++it
) {
1469 if (it
->offset
== funcAddrOff
)
1470 funcAddrRelocIt
= it
++; // Found subtrahend; skip over minuend reloc
1471 else if (lsdaAddrOpt
&& it
->offset
== lsdaAddrOff
)
1472 lsdaAddrRelocIt
= it
++; // Found subtrahend; skip over minuend reloc
1476 if (funcAddrRelocIt
!= isec
->relocs
.end()) {
1477 funcSym
= targetSymFromCanonicalSubtractor(isec
, funcAddrRelocIt
);
1478 // Canonicalize the symbol. If there are multiple symbols at the same
1479 // address, we want both `registerEhFrame` and `registerCompactUnwind`
1480 // to register the unwind entry under same symbol.
1481 // This is not particularly efficient, but we should run into this case
1482 // infrequently (only when handling the output of `ld -r`).
1483 if (funcSym
->isec())
1484 funcSym
= findSymbolAtOffset(cast
<ConcatInputSection
>(funcSym
->isec()),
1487 funcSym
= findSymbolAtAddress(sections
, funcAddr
);
1488 ehRelocator
.makePcRel(funcAddrOff
, funcSym
, target
->p2WordSize
);
1490 // The symbol has been coalesced, or already has a compact unwind entry.
1491 if (!funcSym
|| funcSym
->getFile() != this || funcSym
->unwindEntry()) {
1492 // We must prune unused FDEs for correctness, so we cannot rely on
1493 // -dead_strip being enabled.
1498 InputSection
*lsdaIsec
= nullptr;
1499 if (lsdaAddrRelocIt
!= isec
->relocs
.end()) {
1501 targetSymFromCanonicalSubtractor(isec
, lsdaAddrRelocIt
)->isec();
1502 } else if (lsdaAddrOpt
) {
1503 uint64_t lsdaAddr
= *lsdaAddrOpt
;
1504 Section
*sec
= findContainingSection(sections
, &lsdaAddr
);
1506 cast
<ConcatInputSection
>(findContainingSubsection(*sec
, &lsdaAddr
));
1507 ehRelocator
.makePcRel(lsdaAddrOff
, lsdaIsec
, target
->p2WordSize
);
1510 fdes
[isec
] = {funcLength
, cie
.personalitySymbol
, lsdaIsec
};
1511 funcSym
->originalUnwindEntry
= isec
;
1512 ehRelocator
.commit();
1515 // __eh_frame is marked as S_ATTR_LIVE_SUPPORT in input files, because FDEs
1516 // are normally required to be kept alive if they reference a live symbol.
1517 // However, we've explicitly created a dependency from a symbol to its FDE, so
1518 // dead-stripping will just work as usual, and S_ATTR_LIVE_SUPPORT will only
1519 // serve to incorrectly prevent us from dead-stripping duplicate FDEs for a
1520 // live symbol (e.g. if there were multiple weak copies). Remove this flag to
1521 // let dead-stripping proceed correctly.
1522 ehFrameSection
.flags
&= ~S_ATTR_LIVE_SUPPORT
;
1525 std::string
ObjFile::sourceFile() const {
1526 const char *unitName
= compileUnit
->getUnitDIE().getShortName();
1527 // DWARF allows DW_AT_name to be absolute, in which case nothing should be
1528 // prepended. As for the styles, debug info can contain paths from any OS, not
1529 // necessarily an OS we're currently running on. Moreover different
1530 // compilation units can be compiled on different operating systems and linked
1532 if (sys::path::is_absolute(unitName
, llvm::sys::path::Style::posix
) ||
1533 sys::path::is_absolute(unitName
, llvm::sys::path::Style::windows
))
1535 SmallString
<261> dir(compileUnit
->getCompilationDir());
1536 StringRef sep
= sys::path::get_separator();
1537 // We don't use `path::append` here because we want an empty `dir` to result
1538 // in an absolute path. `append` would give us a relative path for that case.
1539 if (!dir
.ends_with(sep
))
1541 return (dir
+ unitName
).str();
1544 lld::DWARFCache
*ObjFile::getDwarf() {
1545 llvm::call_once(initDwarf
, [this]() {
1546 auto dwObj
= DwarfObject::create(this);
1549 dwarfCache
= std::make_unique
<DWARFCache
>(std::make_unique
<DWARFContext
>(
1550 std::move(dwObj
), "",
1551 [&](Error err
) { warn(getName() + ": " + toString(std::move(err
))); },
1552 [&](Error warning
) {
1553 warn(getName() + ": " + toString(std::move(warning
)));
1557 return dwarfCache
.get();
1559 // The path can point to either a dylib or a .tbd file.
1560 static DylibFile
*loadDylib(StringRef path
, DylibFile
*umbrella
) {
1561 std::optional
<MemoryBufferRef
> mbref
= readFile(path
);
1563 error("could not read dylib file at " + path
);
1566 return loadDylib(*mbref
, umbrella
);
1569 // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
1570 // the first document storing child pointers to the rest of them. When we are
1571 // processing a given TBD file, we store that top-level document in
1572 // currentTopLevelTapi. When processing re-exports, we search its children for
1573 // potentially matching documents in the same TBD file. Note that the children
1574 // themselves don't point to further documents, i.e. this is a two-level tree.
1576 // Re-exports can either refer to on-disk files, or to documents within .tbd
1578 static DylibFile
*findDylib(StringRef path
, DylibFile
*umbrella
,
1579 const InterfaceFile
*currentTopLevelTapi
) {
1581 // 1. Install name basename in -F / -L directories.
1583 StringRef stem
= path::stem(path
);
1584 SmallString
<128> frameworkName
;
1585 path::append(frameworkName
, path::Style::posix
, stem
+ ".framework", stem
);
1586 bool isFramework
= path
.ends_with(frameworkName
);
1588 for (StringRef dir
: config
->frameworkSearchPaths
) {
1589 SmallString
<128> candidate
= dir
;
1590 path::append(candidate
, frameworkName
);
1591 if (std::optional
<StringRef
> dylibPath
=
1592 resolveDylibPath(candidate
.str()))
1593 return loadDylib(*dylibPath
, umbrella
);
1595 } else if (std::optional
<StringRef
> dylibPath
= findPathCombination(
1596 stem
, config
->librarySearchPaths
, {".tbd", ".dylib", ".so"}))
1597 return loadDylib(*dylibPath
, umbrella
);
1600 // 2. As absolute path.
1601 if (path::is_absolute(path
, path::Style::posix
))
1602 for (StringRef root
: config
->systemLibraryRoots
)
1603 if (std::optional
<StringRef
> dylibPath
=
1604 resolveDylibPath((root
+ path
).str()))
1605 return loadDylib(*dylibPath
, umbrella
);
1607 // 3. As relative path.
1609 // TODO: Handle -dylib_file
1611 // Replace @executable_path, @loader_path, @rpath prefixes in install name.
1612 SmallString
<128> newPath
;
1613 if (config
->outputType
== MH_EXECUTE
&&
1614 path
.consume_front("@executable_path/")) {
1615 // ld64 allows overriding this with the undocumented flag -executable_path.
1616 // lld doesn't currently implement that flag.
1617 // FIXME: Consider using finalOutput instead of outputFile.
1618 path::append(newPath
, path::parent_path(config
->outputFile
), path
);
1620 } else if (path
.consume_front("@loader_path/")) {
1621 fs::real_path(umbrella
->getName(), newPath
);
1622 path::remove_filename(newPath
);
1623 path::append(newPath
, path
);
1625 } else if (path
.starts_with("@rpath/")) {
1626 for (StringRef rpath
: umbrella
->rpaths
) {
1628 if (rpath
.consume_front("@loader_path/")) {
1629 fs::real_path(umbrella
->getName(), newPath
);
1630 path::remove_filename(newPath
);
1632 path::append(newPath
, rpath
, path
.drop_front(strlen("@rpath/")));
1633 if (std::optional
<StringRef
> dylibPath
= resolveDylibPath(newPath
.str()))
1634 return loadDylib(*dylibPath
, umbrella
);
1638 // FIXME: Should this be further up?
1639 if (currentTopLevelTapi
) {
1640 for (InterfaceFile
&child
:
1641 make_pointee_range(currentTopLevelTapi
->documents())) {
1642 assert(child
.documents().empty());
1643 if (path
== child
.getInstallName()) {
1644 auto *file
= make
<DylibFile
>(child
, umbrella
, /*isBundleLoader=*/false,
1645 /*explicitlyLinked=*/false);
1646 file
->parseReexports(child
);
1652 if (std::optional
<StringRef
> dylibPath
= resolveDylibPath(path
))
1653 return loadDylib(*dylibPath
, umbrella
);
1658 // If a re-exported dylib is public (lives in /usr/lib or
1659 // /System/Library/Frameworks), then it is considered implicitly linked: we
1660 // should bind to its symbols directly instead of via the re-exporting umbrella
1662 static bool isImplicitlyLinked(StringRef path
) {
1663 if (!config
->implicitDylibs
)
1666 if (path::parent_path(path
) == "/usr/lib")
1669 // Match /System/Library/Frameworks/$FOO.framework/**/$FOO
1670 if (path
.consume_front("/System/Library/Frameworks/")) {
1671 StringRef frameworkName
= path
.take_until([](char c
) { return c
== '.'; });
1672 return path::filename(path
) == frameworkName
;
1678 void DylibFile::loadReexport(StringRef path
, DylibFile
*umbrella
,
1679 const InterfaceFile
*currentTopLevelTapi
) {
1680 DylibFile
*reexport
= findDylib(path
, umbrella
, currentTopLevelTapi
);
1682 error(toString(this) + ": unable to locate re-export with install name " +
1686 DylibFile::DylibFile(MemoryBufferRef mb
, DylibFile
*umbrella
,
1687 bool isBundleLoader
, bool explicitlyLinked
)
1688 : InputFile(DylibKind
, mb
), refState(RefState::Unreferenced
),
1689 explicitlyLinked(explicitlyLinked
), isBundleLoader(isBundleLoader
) {
1690 assert(!isBundleLoader
|| !umbrella
);
1691 if (umbrella
== nullptr)
1693 this->umbrella
= umbrella
;
1695 auto *hdr
= reinterpret_cast<const mach_header
*>(mb
.getBufferStart());
1697 // Initialize installName.
1698 if (const load_command
*cmd
= findCommand(hdr
, LC_ID_DYLIB
)) {
1699 auto *c
= reinterpret_cast<const dylib_command
*>(cmd
);
1700 currentVersion
= read32le(&c
->dylib
.current_version
);
1701 compatibilityVersion
= read32le(&c
->dylib
.compatibility_version
);
1703 reinterpret_cast<const char *>(cmd
) + read32le(&c
->dylib
.name
);
1704 } else if (!isBundleLoader
) {
1705 // macho_executable and macho_bundle don't have LC_ID_DYLIB,
1707 error(toString(this) + ": dylib missing LC_ID_DYLIB load command");
1711 if (config
->printEachFile
)
1712 message(toString(this));
1713 inputFiles
.insert(this);
1715 deadStrippable
= hdr
->flags
& MH_DEAD_STRIPPABLE_DYLIB
;
1717 if (!checkCompatibility(this))
1720 checkAppExtensionSafety(hdr
->flags
& MH_APP_EXTENSION_SAFE
);
1722 for (auto *cmd
: findCommands
<rpath_command
>(hdr
, LC_RPATH
)) {
1723 StringRef rpath
{reinterpret_cast<const char *>(cmd
) + cmd
->path
};
1724 rpaths
.push_back(rpath
);
1727 // Initialize symbols.
1728 bool canBeImplicitlyLinked
= findCommand(hdr
, LC_SUB_CLIENT
) == nullptr;
1729 exportingFile
= (canBeImplicitlyLinked
&& isImplicitlyLinked(installName
))
1733 const auto *dyldInfo
= findCommand
<dyld_info_command
>(hdr
, LC_DYLD_INFO_ONLY
);
1734 const auto *exportsTrie
=
1735 findCommand
<linkedit_data_command
>(hdr
, LC_DYLD_EXPORTS_TRIE
);
1736 if (dyldInfo
&& exportsTrie
) {
1737 // It's unclear what should happen in this case. Maybe we should only error
1738 // out if the two load commands refer to different data?
1739 error(toString(this) +
1740 ": dylib has both LC_DYLD_INFO_ONLY and LC_DYLD_EXPORTS_TRIE");
1745 parseExportedSymbols(dyldInfo
->export_off
, dyldInfo
->export_size
);
1746 } else if (exportsTrie
) {
1747 parseExportedSymbols(exportsTrie
->dataoff
, exportsTrie
->datasize
);
1749 error("No LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE found in " +
1754 void DylibFile::parseExportedSymbols(uint32_t offset
, uint32_t size
) {
1760 auto *buf
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart());
1761 std::vector
<TrieEntry
> entries
;
1762 // Find all the $ld$* symbols to process first.
1763 parseTrie(buf
+ offset
, size
, [&](const Twine
&name
, uint64_t flags
) {
1764 StringRef savedName
= saver().save(name
);
1765 if (handleLDSymbol(savedName
))
1767 entries
.push_back({savedName
, flags
});
1770 // Process the "normal" symbols.
1771 for (TrieEntry
&entry
: entries
) {
1772 if (exportingFile
->hiddenSymbols
.contains(CachedHashStringRef(entry
.name
)))
1775 bool isWeakDef
= entry
.flags
& EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION
;
1776 bool isTlv
= entry
.flags
& EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL
;
1779 symtab
->addDylib(entry
.name
, exportingFile
, isWeakDef
, isTlv
));
1783 void DylibFile::parseLoadCommands(MemoryBufferRef mb
) {
1784 auto *hdr
= reinterpret_cast<const mach_header
*>(mb
.getBufferStart());
1785 const uint8_t *p
= reinterpret_cast<const uint8_t *>(mb
.getBufferStart()) +
1787 for (uint32_t i
= 0, n
= hdr
->ncmds
; i
< n
; ++i
) {
1788 auto *cmd
= reinterpret_cast<const load_command
*>(p
);
1791 if (!(hdr
->flags
& MH_NO_REEXPORTED_DYLIBS
) &&
1792 cmd
->cmd
== LC_REEXPORT_DYLIB
) {
1793 const auto *c
= reinterpret_cast<const dylib_command
*>(cmd
);
1794 StringRef reexportPath
=
1795 reinterpret_cast<const char *>(c
) + read32le(&c
->dylib
.name
);
1796 loadReexport(reexportPath
, exportingFile
, nullptr);
1799 // FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB,
1800 // LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with
1801 // MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)?
1802 if (config
->namespaceKind
== NamespaceKind::flat
&&
1803 cmd
->cmd
== LC_LOAD_DYLIB
) {
1804 const auto *c
= reinterpret_cast<const dylib_command
*>(cmd
);
1805 StringRef dylibPath
=
1806 reinterpret_cast<const char *>(c
) + read32le(&c
->dylib
.name
);
1807 DylibFile
*dylib
= findDylib(dylibPath
, umbrella
, nullptr);
1809 error(Twine("unable to locate library '") + dylibPath
+
1810 "' loaded from '" + toString(this) + "' for -flat_namespace");
1815 // Some versions of Xcode ship with .tbd files that don't have the right
1816 // platform settings.
1817 constexpr std::array
<StringRef
, 3> skipPlatformChecks
{
1818 "/usr/lib/system/libsystem_kernel.dylib",
1819 "/usr/lib/system/libsystem_platform.dylib",
1820 "/usr/lib/system/libsystem_pthread.dylib"};
1822 static bool skipPlatformCheckForCatalyst(const InterfaceFile
&interface
,
1823 bool explicitlyLinked
) {
1824 // Catalyst outputs can link against implicitly linked macOS-only libraries.
1825 if (config
->platform() != PLATFORM_MACCATALYST
|| explicitlyLinked
)
1827 return is_contained(interface
.targets(),
1828 MachO::Target(config
->arch(), PLATFORM_MACOS
));
1831 static bool isArchABICompatible(ArchitectureSet archSet
,
1832 Architecture targetArch
) {
1834 uint32_t targetCpuType
;
1835 std::tie(targetCpuType
, std::ignore
) = getCPUTypeFromArchitecture(targetArch
);
1837 return llvm::any_of(archSet
, [&](const auto &p
) {
1838 std::tie(cpuType
, std::ignore
) = getCPUTypeFromArchitecture(p
);
1839 return cpuType
== targetCpuType
;
1843 static bool isTargetPlatformArchCompatible(
1844 InterfaceFile::const_target_range interfaceTargets
, Target target
) {
1845 if (is_contained(interfaceTargets
, target
))
1848 if (config
->forceExactCpuSubtypeMatch
)
1851 ArchitectureSet archSet
;
1852 for (const auto &p
: interfaceTargets
)
1853 if (p
.Platform
== target
.Platform
)
1854 archSet
.set(p
.Arch
);
1855 if (archSet
.empty())
1858 return isArchABICompatible(archSet
, target
.Arch
);
1861 DylibFile::DylibFile(const InterfaceFile
&interface
, DylibFile
*umbrella
,
1862 bool isBundleLoader
, bool explicitlyLinked
)
1863 : InputFile(DylibKind
, interface
), refState(RefState::Unreferenced
),
1864 explicitlyLinked(explicitlyLinked
), isBundleLoader(isBundleLoader
) {
1865 // FIXME: Add test for the missing TBD code path.
1867 if (umbrella
== nullptr)
1869 this->umbrella
= umbrella
;
1871 installName
= saver().save(interface
.getInstallName());
1872 compatibilityVersion
= interface
.getCompatibilityVersion().rawValue();
1873 currentVersion
= interface
.getCurrentVersion().rawValue();
1875 if (config
->printEachFile
)
1876 message(toString(this));
1877 inputFiles
.insert(this);
1879 if (!is_contained(skipPlatformChecks
, installName
) &&
1880 !isTargetPlatformArchCompatible(interface
.targets(),
1881 config
->platformInfo
.target
) &&
1882 !skipPlatformCheckForCatalyst(interface
, explicitlyLinked
)) {
1883 error(toString(this) + " is incompatible with " +
1884 std::string(config
->platformInfo
.target
));
1888 checkAppExtensionSafety(interface
.isApplicationExtensionSafe());
1890 bool canBeImplicitlyLinked
= interface
.allowableClients().size() == 0;
1891 exportingFile
= (canBeImplicitlyLinked
&& isImplicitlyLinked(installName
))
1894 auto addSymbol
= [&](const llvm::MachO::Symbol
&symbol
,
1895 const Twine
&name
) -> void {
1896 StringRef savedName
= saver().save(name
);
1897 if (exportingFile
->hiddenSymbols
.contains(CachedHashStringRef(savedName
)))
1900 symbols
.push_back(symtab
->addDylib(savedName
, exportingFile
,
1901 symbol
.isWeakDefined(),
1902 symbol
.isThreadLocalValue()));
1905 std::vector
<const llvm::MachO::Symbol
*> normalSymbols
;
1906 normalSymbols
.reserve(interface
.symbolsCount());
1907 for (const auto *symbol
: interface
.symbols()) {
1908 if (!isArchABICompatible(symbol
->getArchitectures(), config
->arch()))
1910 if (handleLDSymbol(symbol
->getName()))
1913 switch (symbol
->getKind()) {
1914 case EncodeKind::GlobalSymbol
:
1915 case EncodeKind::ObjectiveCClass
:
1916 case EncodeKind::ObjectiveCClassEHType
:
1917 case EncodeKind::ObjectiveCInstanceVariable
:
1918 normalSymbols
.push_back(symbol
);
1921 // interface.symbols() order is non-deterministic.
1922 llvm::sort(normalSymbols
,
1923 [](auto *l
, auto *r
) { return l
->getName() < r
->getName(); });
1925 // TODO(compnerd) filter out symbols based on the target platform
1926 for (const auto *symbol
: normalSymbols
) {
1927 switch (symbol
->getKind()) {
1928 case EncodeKind::GlobalSymbol
:
1929 addSymbol(*symbol
, symbol
->getName());
1931 case EncodeKind::ObjectiveCClass
:
1932 // XXX ld64 only creates these symbols when -ObjC is passed in. We may
1933 // want to emulate that.
1934 addSymbol(*symbol
, objc::symbol_names::klass
+ symbol
->getName());
1935 addSymbol(*symbol
, objc::symbol_names::metaclass
+ symbol
->getName());
1937 case EncodeKind::ObjectiveCClassEHType
:
1938 addSymbol(*symbol
, objc::symbol_names::ehtype
+ symbol
->getName());
1940 case EncodeKind::ObjectiveCInstanceVariable
:
1941 addSymbol(*symbol
, objc::symbol_names::ivar
+ symbol
->getName());
1947 DylibFile::DylibFile(DylibFile
*umbrella
)
1948 : InputFile(DylibKind
, MemoryBufferRef
{}), refState(RefState::Unreferenced
),
1949 explicitlyLinked(false), isBundleLoader(false) {
1950 if (umbrella
== nullptr)
1952 this->umbrella
= umbrella
;
1955 void DylibFile::parseReexports(const InterfaceFile
&interface
) {
1956 const InterfaceFile
*topLevel
=
1957 interface
.getParent() == nullptr ? &interface
: interface
.getParent();
1958 for (const InterfaceFileRef
&intfRef
: interface
.reexportedLibraries()) {
1959 InterfaceFile::const_target_range targets
= intfRef
.targets();
1960 if (is_contained(skipPlatformChecks
, intfRef
.getInstallName()) ||
1961 isTargetPlatformArchCompatible(targets
, config
->platformInfo
.target
))
1962 loadReexport(intfRef
.getInstallName(), exportingFile
, topLevel
);
1966 bool DylibFile::isExplicitlyLinked() const {
1967 if (!explicitlyLinked
)
1970 // If this dylib was explicitly linked, but at least one of the symbols
1971 // of the synthetic dylibs it created via $ld$previous symbols is
1972 // referenced, then that synthetic dylib fulfils the explicit linkedness
1973 // and we can deadstrip this dylib if it's unreferenced.
1974 for (const auto *dylib
: extraDylibs
)
1975 if (dylib
->isReferenced())
1981 DylibFile
*DylibFile::getSyntheticDylib(StringRef installName
,
1982 uint32_t currentVersion
,
1983 uint32_t compatVersion
) {
1984 for (DylibFile
*dylib
: extraDylibs
)
1985 if (dylib
->installName
== installName
) {
1986 // FIXME: Check what to do if different $ld$previous symbols
1987 // request the same dylib, but with different versions.
1991 auto *dylib
= make
<DylibFile
>(umbrella
== this ? nullptr : umbrella
);
1992 dylib
->installName
= saver().save(installName
);
1993 dylib
->currentVersion
= currentVersion
;
1994 dylib
->compatibilityVersion
= compatVersion
;
1995 extraDylibs
.push_back(dylib
);
1999 // $ld$ symbols modify the properties/behavior of the library (e.g. its install
2000 // name, compatibility version or hide/add symbols) for specific target
2002 bool DylibFile::handleLDSymbol(StringRef originalName
) {
2003 if (!originalName
.starts_with("$ld$"))
2008 std::tie(action
, name
) = originalName
.drop_front(strlen("$ld$")).split('$');
2009 if (action
== "previous")
2010 handleLDPreviousSymbol(name
, originalName
);
2011 else if (action
== "install_name")
2012 handleLDInstallNameSymbol(name
, originalName
);
2013 else if (action
== "hide")
2014 handleLDHideSymbol(name
, originalName
);
2018 void DylibFile::handleLDPreviousSymbol(StringRef name
, StringRef originalName
) {
2019 // originalName: $ld$ previous $ <installname> $ <compatversion> $
2020 // <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $
2021 StringRef installName
;
2022 StringRef compatVersion
;
2023 StringRef platformStr
;
2024 StringRef startVersion
;
2025 StringRef endVersion
;
2026 StringRef symbolName
;
2029 std::tie(installName
, name
) = name
.split('$');
2030 std::tie(compatVersion
, name
) = name
.split('$');
2031 std::tie(platformStr
, name
) = name
.split('$');
2032 std::tie(startVersion
, name
) = name
.split('$');
2033 std::tie(endVersion
, name
) = name
.split('$');
2034 std::tie(symbolName
, rest
) = name
.rsplit('$');
2036 // FIXME: Does this do the right thing for zippered files?
2038 if (platformStr
.getAsInteger(10, platform
) ||
2039 platform
!= static_cast<unsigned>(config
->platform()))
2043 if (start
.tryParse(startVersion
)) {
2044 warn(toString(this) + ": failed to parse start version, symbol '" +
2045 originalName
+ "' ignored");
2049 if (end
.tryParse(endVersion
)) {
2050 warn(toString(this) + ": failed to parse end version, symbol '" +
2051 originalName
+ "' ignored");
2054 if (config
->platformInfo
.target
.MinDeployment
< start
||
2055 config
->platformInfo
.target
.MinDeployment
>= end
)
2058 // Initialized to compatibilityVersion for the symbolName branch below.
2059 uint32_t newCompatibilityVersion
= compatibilityVersion
;
2060 uint32_t newCurrentVersionForSymbol
= currentVersion
;
2061 if (!compatVersion
.empty()) {
2062 VersionTuple cVersion
;
2063 if (cVersion
.tryParse(compatVersion
)) {
2064 warn(toString(this) +
2065 ": failed to parse compatibility version, symbol '" + originalName
+
2069 newCompatibilityVersion
= encodeVersion(cVersion
);
2070 newCurrentVersionForSymbol
= newCompatibilityVersion
;
2073 if (!symbolName
.empty()) {
2074 // A $ld$previous$ symbol with symbol name adds a symbol with that name to
2075 // a dylib with given name and version.
2076 auto *dylib
= getSyntheticDylib(installName
, newCurrentVersionForSymbol
,
2077 newCompatibilityVersion
);
2079 // The tbd file usually contains the $ld$previous symbol for an old version,
2080 // and then the symbol itself later, for newer deployment targets, like so:
2082 // '$ld$previous$/Another$$1$3.0$14.0$_zzz$',
2085 // Since the symbols are sorted, adding them to the symtab in the given
2086 // order means the $ld$previous version of _zzz will prevail, as desired.
2087 dylib
->symbols
.push_back(symtab
->addDylib(
2088 saver().save(symbolName
), dylib
, /*isWeakDef=*/false, /*isTlv=*/false));
2092 // A $ld$previous$ symbol without symbol name modifies the dylib it's in.
2093 this->installName
= saver().save(installName
);
2094 this->compatibilityVersion
= newCompatibilityVersion
;
2097 void DylibFile::handleLDInstallNameSymbol(StringRef name
,
2098 StringRef originalName
) {
2099 // originalName: $ld$ install_name $ os<version> $ install_name
2100 StringRef condition
, installName
;
2101 std::tie(condition
, installName
) = name
.split('$');
2102 VersionTuple version
;
2103 if (!condition
.consume_front("os") || version
.tryParse(condition
))
2104 warn(toString(this) + ": failed to parse os version, symbol '" +
2105 originalName
+ "' ignored");
2106 else if (version
== config
->platformInfo
.target
.MinDeployment
)
2107 this->installName
= saver().save(installName
);
2110 void DylibFile::handleLDHideSymbol(StringRef name
, StringRef originalName
) {
2111 StringRef symbolName
;
2112 bool shouldHide
= true;
2113 if (name
.starts_with("os")) {
2114 // If it's hidden based on versions.
2115 name
= name
.drop_front(2);
2116 StringRef minVersion
;
2117 std::tie(minVersion
, symbolName
) = name
.split('$');
2118 VersionTuple versionTup
;
2119 if (versionTup
.tryParse(minVersion
)) {
2120 warn(toString(this) + ": failed to parse hidden version, symbol `" + originalName
+
2124 shouldHide
= versionTup
== config
->platformInfo
.target
.MinDeployment
;
2130 exportingFile
->hiddenSymbols
.insert(CachedHashStringRef(symbolName
));
2133 void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe
) const {
2134 if (config
->applicationExtension
&& !dylibIsAppExtensionSafe
)
2135 warn("using '-application_extension' with unsafe dylib: " + toString(this));
2138 ArchiveFile::ArchiveFile(std::unique_ptr
<object::Archive
> &&f
, bool forceHidden
)
2139 : InputFile(ArchiveKind
, f
->getMemoryBufferRef()), file(std::move(f
)),
2140 forceHidden(forceHidden
) {}
2142 void ArchiveFile::addLazySymbols() {
2143 // Avoid calling getMemoryBufferRef() on zero-symbol archive
2144 // since that crashes.
2145 if (file
->isEmpty() || file
->getNumberOfSymbols() == 0)
2148 Error err
= Error::success();
2149 auto child
= file
->child_begin(err
);
2150 // Ignore the I/O error here - will be reported later.
2152 Expected
<MemoryBufferRef
> mbOrErr
= child
->getMemoryBufferRef();
2154 llvm::consumeError(mbOrErr
.takeError());
2156 if (identify_magic(mbOrErr
->getBuffer()) == file_magic::macho_object
) {
2157 if (target
->wordSize
== 8)
2158 compatArch
= compatWithTargetArch(
2159 this, reinterpret_cast<const LP64::mach_header
*>(
2160 mbOrErr
->getBufferStart()));
2162 compatArch
= compatWithTargetArch(
2163 this, reinterpret_cast<const ILP32::mach_header
*>(
2164 mbOrErr
->getBufferStart()));
2171 for (const object::Archive::Symbol
&sym
: file
->symbols())
2172 symtab
->addLazyArchive(sym
.getName(), this, sym
);
2175 static Expected
<InputFile
*>
2176 loadArchiveMember(MemoryBufferRef mb
, uint32_t modTime
, StringRef archiveName
,
2177 uint64_t offsetInArchive
, bool forceHidden
, bool compatArch
) {
2178 if (config
->zeroModTime
)
2181 switch (identify_magic(mb
.getBuffer())) {
2182 case file_magic::macho_object
:
2183 return make
<ObjFile
>(mb
, modTime
, archiveName
, /*lazy=*/false, forceHidden
,
2185 case file_magic::bitcode
:
2186 return make
<BitcodeFile
>(mb
, archiveName
, offsetInArchive
, /*lazy=*/false,
2187 forceHidden
, compatArch
);
2189 return createStringError(inconvertibleErrorCode(),
2190 mb
.getBufferIdentifier() +
2191 " has unhandled file type");
2195 Error
ArchiveFile::fetch(const object::Archive::Child
&c
, StringRef reason
) {
2196 if (!seen
.insert(c
.getChildOffset()).second
)
2197 return Error::success();
2199 Expected
<MemoryBufferRef
> mb
= c
.getMemoryBufferRef();
2201 return mb
.takeError();
2203 Expected
<TimePoint
<std::chrono::seconds
>> modTime
= c
.getLastModified();
2205 return modTime
.takeError();
2207 Expected
<InputFile
*> file
=
2208 loadArchiveMember(*mb
, toTimeT(*modTime
), getName(), c
.getChildOffset(),
2209 forceHidden
, compatArch
);
2212 return file
.takeError();
2214 inputFiles
.insert(*file
);
2215 printArchiveMemberLoad(reason
, *file
);
2216 return Error::success();
2219 void ArchiveFile::fetch(const object::Archive::Symbol
&sym
) {
2220 object::Archive::Child c
=
2221 CHECK(sym
.getMember(), toString(this) +
2222 ": could not get the member defining symbol " +
2223 toMachOString(sym
));
2225 // `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
2226 // and become invalid after that call. Copy it to the stack so we can refer
2228 const object::Archive::Symbol symCopy
= sym
;
2230 // ld64 doesn't demangle sym here even with -demangle.
2231 // Match that: intentionally don't call toMachOString().
2232 if (Error e
= fetch(c
, symCopy
.getName()))
2233 error(toString(this) + ": could not get the member defining symbol " +
2234 toMachOString(symCopy
) + ": " + toString(std::move(e
)));
2237 static macho::Symbol
*createBitcodeSymbol(const lto::InputFile::Symbol
&objSym
,
2238 BitcodeFile
&file
) {
2239 StringRef name
= saver().save(objSym
.getName());
2241 if (objSym
.isUndefined())
2242 return symtab
->addUndefined(name
, &file
, /*isWeakRef=*/objSym
.isWeak());
2244 // TODO: Write a test demonstrating why computing isPrivateExtern before
2245 // LTO compilation is important.
2246 bool isPrivateExtern
= false;
2247 switch (objSym
.getVisibility()) {
2248 case GlobalValue::HiddenVisibility
:
2249 isPrivateExtern
= true;
2251 case GlobalValue::ProtectedVisibility
:
2252 error(name
+ " has protected visibility, which is not supported by Mach-O");
2254 case GlobalValue::DefaultVisibility
:
2257 isPrivateExtern
= isPrivateExtern
|| objSym
.canBeOmittedFromSymbolTable() ||
2260 if (objSym
.isCommon())
2261 return symtab
->addCommon(name
, &file
, objSym
.getCommonSize(),
2262 objSym
.getCommonAlignment(), isPrivateExtern
);
2264 return symtab
->addDefined(name
, &file
, /*isec=*/nullptr, /*value=*/0,
2265 /*size=*/0, objSym
.isWeak(), isPrivateExtern
,
2266 /*isReferencedDynamically=*/false,
2267 /*noDeadStrip=*/false,
2268 /*isWeakDefCanBeHidden=*/false);
2271 BitcodeFile::BitcodeFile(MemoryBufferRef mb
, StringRef archiveName
,
2272 uint64_t offsetInArchive
, bool lazy
, bool forceHidden
,
2274 : InputFile(BitcodeKind
, mb
, lazy
), forceHidden(forceHidden
) {
2275 this->archiveName
= std::string(archiveName
);
2276 this->compatArch
= compatArch
;
2277 std::string path
= mb
.getBufferIdentifier().str();
2278 if (config
->thinLTOIndexOnly
)
2279 path
= replaceThinLTOSuffix(mb
.getBufferIdentifier());
2281 // If the parent archive already determines that the arch is not compat with
2282 // target, then just return.
2286 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
2287 // name. If two members with the same name are provided, this causes a
2288 // collision and ThinLTO can't proceed.
2289 // So, we append the archive name to disambiguate two members with the same
2290 // name from multiple different archives, and offset within the archive to
2291 // disambiguate two members of the same name from a single archive.
2292 MemoryBufferRef
mbref(mb
.getBuffer(),
2293 saver().save(archiveName
.empty()
2295 : archiveName
+ "(" +
2296 sys::path::filename(path
) + ")" +
2297 utostr(offsetInArchive
)));
2298 obj
= check(lto::InputFile::create(mbref
));
2305 void BitcodeFile::parse() {
2306 // Convert LTO Symbols to LLD Symbols in order to perform resolution. The
2307 // "winning" symbol will then be marked as Prevailing at LTO compilation
2309 symbols
.resize(obj
->symbols().size());
2311 // Process defined symbols first. See the comment at the end of
2312 // ObjFile<>::parseSymbols.
2313 for (auto it
: llvm::enumerate(obj
->symbols()))
2314 if (!it
.value().isUndefined())
2315 symbols
[it
.index()] = createBitcodeSymbol(it
.value(), *this);
2316 for (auto it
: llvm::enumerate(obj
->symbols()))
2317 if (it
.value().isUndefined())
2318 symbols
[it
.index()] = createBitcodeSymbol(it
.value(), *this);
2321 void BitcodeFile::parseLazy() {
2322 symbols
.resize(obj
->symbols().size());
2323 for (const auto &[i
, objSym
] : llvm::enumerate(obj
->symbols())) {
2324 if (!objSym
.isUndefined()) {
2325 symbols
[i
] = symtab
->addLazyObject(saver().save(objSym
.getName()), *this);
2332 std::string
macho::replaceThinLTOSuffix(StringRef path
) {
2333 auto [suffix
, repl
] = config
->thinLTOObjectSuffixReplace
;
2334 if (path
.consume_back(suffix
))
2335 return (path
+ repl
).str();
2336 return std::string(path
);
2339 void macho::extract(InputFile
&file
, StringRef reason
) {
2344 printArchiveMemberLoad(reason
, &file
);
2345 if (auto *bitcode
= dyn_cast
<BitcodeFile
>(&file
)) {
2348 auto &f
= cast
<ObjFile
>(file
);
2349 if (target
->wordSize
== 8)
2356 template void ObjFile::parse
<LP64
>();