1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_MACHO_INPUT_FILES_H
10 #define LLD_MACHO_INPUT_FILES_H
12 #include "MachOStructs.h"
15 #include "lld/Common/DWARF.h"
16 #include "lld/Common/LLVM.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/CachedHashString.h"
19 #include "llvm/ADT/DenseSet.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/BinaryFormat/MachO.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/Object/Archive.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/TextAPI/TextAPIReader.h"
44 class ConcatInputSection
;
49 enum class RefState
: uint8_t;
51 // If --reproduce option is given, all input files are written
52 // to this tar archive.
53 extern std::unique_ptr
<llvm::TarWriter
> tar
;
55 // If .subsections_via_symbols is set, each InputSection will be split along
56 // symbol boundaries. The field offset represents the offset of the subsection
57 // from the start of the original pre-split InputSection.
60 InputSection
*isec
= nullptr;
63 using Subsections
= std::vector
<Subsection
>;
73 Subsections subsections
;
75 Section(InputFile
*file
, StringRef segname
, StringRef name
, uint32_t flags
,
77 : file(file
), segname(segname
), name(name
), flags(flags
), addr(addr
) {}
78 // Ensure pointers to Sections are never invalidated.
79 Section(const Section
&) = delete;
80 Section
&operator=(const Section
&) = delete;
81 Section(Section
&&) = delete;
82 Section
&operator=(Section
&&) = delete;
85 // Whether we have already split this section into individual subsections.
86 // For sections that cannot be split (e.g. literal sections), this is always
88 bool doneSplitting
= false;
92 // Represents a call graph profile edge.
93 struct CallGraphEntry
{
94 // The index of the caller in the symbol table.
96 // The index of the callee in the symbol table.
98 // Number of calls from callee to caller in the profile.
101 CallGraphEntry(uint32_t fromIndex
, uint32_t toIndex
, uint64_t count
)
102 : fromIndex(fromIndex
), toIndex(toIndex
), count(count
) {}
115 virtual ~InputFile() = default;
116 Kind
kind() const { return fileKind
; }
117 StringRef
getName() const { return name
; }
118 static void resetIdCount() { idCount
= 0; }
122 std::vector
<Symbol
*> symbols
;
123 std::vector
<Section
*> sections
;
124 ArrayRef
<uint8_t> objCImageInfo
;
126 // If not empty, this stores the name of the archive containing this file.
127 // We use this string for creating error messages.
128 std::string archiveName
;
130 // Provides an easy way to sort InputFiles deterministically.
133 // True if this is a lazy ObjFile or BitcodeFile.
137 InputFile(Kind kind
, MemoryBufferRef mb
, bool lazy
= false)
138 : mb(mb
), id(idCount
++), lazy(lazy
), fileKind(kind
),
139 name(mb
.getBufferIdentifier()) {}
141 InputFile(Kind
, const llvm::MachO::InterfaceFile
&);
143 // If true, this input's arch is compatible with target.
144 bool compatArch
= true;
148 const StringRef name
;
160 class ObjFile final
: public InputFile
{
162 ObjFile(MemoryBufferRef mb
, uint32_t modTime
, StringRef archiveName
,
163 bool lazy
= false, bool forceHidden
= false, bool compatArch
= true,
164 bool builtFromBitcode
= false);
165 ArrayRef
<llvm::MachO::data_in_code_entry
> getDataInCode() const;
166 ArrayRef
<uint8_t> getOptimizationHints() const;
167 template <class LP
> void parse();
169 void parseLinkerOptions(llvm::SmallVectorImpl
<StringRef
> &LinkerOptions
);
171 static bool classof(const InputFile
*f
) { return f
->kind() == ObjKind
; }
173 std::string
sourceFile() const;
174 // Parses line table information for diagnostics. compileUnit should be used
175 // for other purposes.
176 lld::DWARFCache
*getDwarf();
178 llvm::DWARFUnit
*compileUnit
= nullptr;
179 std::unique_ptr
<lld::DWARFCache
> dwarfCache
;
180 Section
*addrSigSection
= nullptr;
181 const uint32_t modTime
;
183 bool builtFromBitcode
;
184 std::vector
<ConcatInputSection
*> debugSections
;
185 std::vector
<CallGraphEntry
> callGraph
;
186 llvm::DenseMap
<ConcatInputSection
*, FDE
> fdes
;
187 std::vector
<AliasSymbol
*> aliases
;
190 llvm::once_flag initDwarf
;
191 template <class LP
> void parseLazy();
192 template <class SectionHeader
> void parseSections(ArrayRef
<SectionHeader
>);
194 void parseSymbols(ArrayRef
<typename
LP::section
> sectionHeaders
,
195 ArrayRef
<typename
LP::nlist
> nList
, const char *strtab
,
196 bool subsectionsViaSymbols
);
197 template <class NList
>
198 Symbol
*parseNonSectionSymbol(const NList
&sym
, const char *strtab
);
199 template <class SectionHeader
>
200 void parseRelocations(ArrayRef
<SectionHeader
> sectionHeaders
,
201 const SectionHeader
&, Section
&);
202 void parseDebugInfo();
203 void splitEhFrames(ArrayRef
<uint8_t> dataArr
, Section
&ehFrameSection
);
204 void registerCompactUnwind(Section
&compactUnwindSection
);
205 void registerEhFrames(Section
&ehFrameSection
);
208 // command-line -sectcreate file
209 class OpaqueFile final
: public InputFile
{
211 OpaqueFile(MemoryBufferRef mb
, StringRef segName
, StringRef sectName
);
212 static bool classof(const InputFile
*f
) { return f
->kind() == OpaqueKind
; }
215 // .dylib or .tbd file
216 class DylibFile final
: public InputFile
{
218 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
219 // symbols in those sub-libraries will be available under the umbrella
220 // library's namespace. Those sub-libraries can also have their own
221 // re-exports. When loading a re-exported dylib, `umbrella` should be set to
222 // the root dylib to ensure symbols in the child library are correctly bound
223 // to the root. On the other hand, if a dylib is being directly loaded
224 // (through an -lfoo flag), then `umbrella` should be a nullptr.
225 explicit DylibFile(MemoryBufferRef mb
, DylibFile
*umbrella
,
226 bool isBundleLoader
, bool explicitlyLinked
);
227 explicit DylibFile(const llvm::MachO::InterfaceFile
&interface
,
228 DylibFile
*umbrella
, bool isBundleLoader
,
229 bool explicitlyLinked
);
230 explicit DylibFile(DylibFile
*umbrella
);
232 void parseLoadCommands(MemoryBufferRef mb
);
233 void parseReexports(const llvm::MachO::InterfaceFile
&interface
);
234 bool isReferenced() const { return numReferencedSymbols
> 0; }
235 bool isExplicitlyLinked() const;
236 void setExplicitlyLinked() { explicitlyLinked
= true; }
238 static bool classof(const InputFile
*f
) { return f
->kind() == DylibKind
; }
240 StringRef installName
;
241 DylibFile
*exportingFile
= nullptr;
243 SmallVector
<StringRef
, 2> rpaths
;
244 uint32_t compatibilityVersion
= 0;
245 uint32_t currentVersion
= 0;
246 int64_t ordinal
= 0; // Ordinal numbering starts from 1, so 0 is a sentinel
247 unsigned numReferencedSymbols
= 0;
249 bool reexport
= false;
250 bool forceNeeded
= false;
251 bool forceWeakImport
= false;
252 bool deadStrippable
= false;
255 bool explicitlyLinked
= false; // Access via isExplicitlyLinked().
258 // An executable can be used as a bundle loader that will load the output
259 // file being linked, and that contains symbols referenced, but not
260 // implemented in the bundle. When used like this, it is very similar
261 // to a dylib, so we've used the same class to represent it.
264 // Synthetic Dylib objects created by $ld$previous symbols in this dylib.
265 // Usually empty. These synthetic dylibs won't have synthetic dylibs
267 SmallVector
<DylibFile
*, 2> extraDylibs
;
270 DylibFile
*getSyntheticDylib(StringRef installName
, uint32_t currentVersion
,
271 uint32_t compatVersion
);
273 bool handleLDSymbol(StringRef originalName
);
274 void handleLDPreviousSymbol(StringRef name
, StringRef originalName
);
275 void handleLDInstallNameSymbol(StringRef name
, StringRef originalName
);
276 void handleLDHideSymbol(StringRef name
, StringRef originalName
);
277 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe
) const;
278 void parseExportedSymbols(uint32_t offset
, uint32_t size
);
279 void loadReexport(StringRef path
, DylibFile
*umbrella
,
280 const llvm::MachO::InterfaceFile
*currentTopLevelTapi
);
282 llvm::DenseSet
<llvm::CachedHashStringRef
> hiddenSymbols
;
286 class ArchiveFile final
: public InputFile
{
288 explicit ArchiveFile(std::unique_ptr
<llvm::object::Archive
> &&file
,
290 void addLazySymbols();
291 void fetch(const llvm::object::Archive::Symbol
&);
292 // LLD normally doesn't use Error for error-handling, but the underlying
293 // Archive library does, so this is the cleanest way to wrap it.
294 Error
fetch(const llvm::object::Archive::Child
&, StringRef reason
);
295 const llvm::object::Archive
&getArchive() const { return *file
; };
296 static bool classof(const InputFile
*f
) { return f
->kind() == ArchiveKind
; }
299 std::unique_ptr
<llvm::object::Archive
> file
;
300 // Keep track of children fetched from the archive by tracking
301 // which address offsets have been fetched already.
302 llvm::DenseSet
<uint64_t> seen
;
303 // Load all symbols with hidden visibility (-load_hidden).
307 class BitcodeFile final
: public InputFile
{
309 explicit BitcodeFile(MemoryBufferRef mb
, StringRef archiveName
,
310 uint64_t offsetInArchive
, bool lazy
= false,
311 bool forceHidden
= false, bool compatArch
= true);
312 static bool classof(const InputFile
*f
) { return f
->kind() == BitcodeKind
; }
315 std::unique_ptr
<llvm::lto::InputFile
> obj
;
322 extern llvm::SetVector
<InputFile
*> inputFiles
;
323 extern llvm::DenseMap
<llvm::CachedHashStringRef
, MemoryBufferRef
> cachedReads
;
324 extern llvm::SmallVector
<StringRef
> unprocessedLCLinkerOptions
;
326 std::optional
<MemoryBufferRef
> readFile(StringRef path
);
328 void extract(InputFile
&file
, StringRef reason
);
332 template <class CommandType
, class... Types
>
333 std::vector
<const CommandType
*>
334 findCommands(const void *anyHdr
, size_t maxCommands
, Types
... types
) {
335 std::vector
<const CommandType
*> cmds
;
336 std::initializer_list
<uint32_t> typesList
{types
...};
337 const auto *hdr
= reinterpret_cast<const llvm::MachO::mach_header
*>(anyHdr
);
339 reinterpret_cast<const uint8_t *>(hdr
) + target
->headerSize
;
340 for (uint32_t i
= 0, n
= hdr
->ncmds
; i
< n
; ++i
) {
341 auto *cmd
= reinterpret_cast<const CommandType
*>(p
);
342 if (llvm::is_contained(typesList
, cmd
->cmd
)) {
344 if (cmds
.size() == maxCommands
)
352 } // namespace detail
354 // anyHdr should be a pointer to either mach_header or mach_header_64
355 template <class CommandType
= llvm::MachO::load_command
, class... Types
>
356 const CommandType
*findCommand(const void *anyHdr
, Types
... types
) {
357 std::vector
<const CommandType
*> cmds
=
358 detail::findCommands
<CommandType
>(anyHdr
, 1, types
...);
359 return cmds
.size() ? cmds
[0] : nullptr;
362 template <class CommandType
= llvm::MachO::load_command
, class... Types
>
363 std::vector
<const CommandType
*> findCommands(const void *anyHdr
,
365 return detail::findCommands
<CommandType
>(anyHdr
, 0, types
...);
368 std::string
replaceThinLTOSuffix(StringRef path
);
371 std::string
toString(const macho::InputFile
*file
);
372 std::string
toString(const macho::Section
&);