1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_MACHO_INPUT_FILES_H
10 #define LLD_MACHO_INPUT_FILES_H
12 #include "MachOStructs.h"
15 #include "lld/Common/DWARF.h"
16 #include "lld/Common/LLVM.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/CachedHashString.h"
19 #include "llvm/ADT/DenseSet.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/BinaryFormat/MachO.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/Object/Archive.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/TextAPI/TextAPIReader.h"
44 class ConcatInputSection
;
48 enum class RefState
: uint8_t;
50 // If --reproduce option is given, all input files are written
51 // to this tar archive.
52 extern std::unique_ptr
<llvm::TarWriter
> tar
;
54 // If .subsections_via_symbols is set, each InputSection will be split along
55 // symbol boundaries. The field offset represents the offset of the subsection
56 // from the start of the original pre-split InputSection.
59 InputSection
*isec
= nullptr;
62 using Subsections
= std::vector
<Subsection
>;
72 Subsections subsections
;
74 Section(InputFile
*file
, StringRef segname
, StringRef name
, uint32_t flags
,
76 : file(file
), segname(segname
), name(name
), flags(flags
), addr(addr
) {}
77 // Ensure pointers to Sections are never invalidated.
78 Section(const Section
&) = delete;
79 Section
&operator=(const Section
&) = delete;
80 Section(Section
&&) = delete;
81 Section
&operator=(Section
&&) = delete;
84 // Whether we have already split this section into individual subsections.
85 // For sections that cannot be split (e.g. literal sections), this is always
87 bool doneSplitting
= false;
91 // Represents a call graph profile edge.
92 struct CallGraphEntry
{
93 // The index of the caller in the symbol table.
95 // The index of the callee in the symbol table.
97 // Number of calls from callee to caller in the profile.
100 CallGraphEntry(uint32_t fromIndex
, uint32_t toIndex
, uint64_t count
)
101 : fromIndex(fromIndex
), toIndex(toIndex
), count(count
) {}
114 virtual ~InputFile() = default;
115 Kind
kind() const { return fileKind
; }
116 StringRef
getName() const { return name
; }
117 static void resetIdCount() { idCount
= 0; }
121 std::vector
<Symbol
*> symbols
;
122 std::vector
<Section
*> sections
;
123 ArrayRef
<uint8_t> objCImageInfo
;
125 // If not empty, this stores the name of the archive containing this file.
126 // We use this string for creating error messages.
127 std::string archiveName
;
129 // Provides an easy way to sort InputFiles deterministically.
132 // True if this is a lazy ObjFile or BitcodeFile.
136 InputFile(Kind kind
, MemoryBufferRef mb
, bool lazy
= false)
137 : mb(mb
), id(idCount
++), lazy(lazy
), fileKind(kind
),
138 name(mb
.getBufferIdentifier()) {}
140 InputFile(Kind
, const llvm::MachO::InterfaceFile
&);
144 const StringRef name
;
156 class ObjFile final
: public InputFile
{
158 ObjFile(MemoryBufferRef mb
, uint32_t modTime
, StringRef archiveName
,
159 bool lazy
= false, bool forceHidden
= false);
160 ArrayRef
<llvm::MachO::data_in_code_entry
> getDataInCode() const;
161 template <class LP
> void parse();
163 static bool classof(const InputFile
*f
) { return f
->kind() == ObjKind
; }
165 std::string
sourceFile() const;
166 // Parses line table information for diagnostics. compileUnit should be used
167 // for other purposes.
168 lld::DWARFCache
*getDwarf();
170 llvm::DWARFUnit
*compileUnit
= nullptr;
171 std::unique_ptr
<lld::DWARFCache
> dwarfCache
;
172 Section
*addrSigSection
= nullptr;
173 const uint32_t modTime
;
175 std::vector
<ConcatInputSection
*> debugSections
;
176 std::vector
<CallGraphEntry
> callGraph
;
177 llvm::DenseMap
<ConcatInputSection
*, FDE
> fdes
;
178 std::vector
<OptimizationHint
> optimizationHints
;
181 llvm::once_flag initDwarf
;
182 template <class LP
> void parseLazy();
183 template <class SectionHeader
> void parseSections(ArrayRef
<SectionHeader
>);
185 void parseSymbols(ArrayRef
<typename
LP::section
> sectionHeaders
,
186 ArrayRef
<typename
LP::nlist
> nList
, const char *strtab
,
187 bool subsectionsViaSymbols
);
188 template <class NList
>
189 Symbol
*parseNonSectionSymbol(const NList
&sym
, StringRef name
);
190 template <class SectionHeader
>
191 void parseRelocations(ArrayRef
<SectionHeader
> sectionHeaders
,
192 const SectionHeader
&, Section
&);
193 void parseDebugInfo();
194 void parseOptimizationHints(ArrayRef
<uint8_t> data
);
195 void splitEhFrames(ArrayRef
<uint8_t> dataArr
, Section
&ehFrameSection
);
196 void registerCompactUnwind(Section
&compactUnwindSection
);
197 void registerEhFrames(Section
&ehFrameSection
);
200 // command-line -sectcreate file
201 class OpaqueFile final
: public InputFile
{
203 OpaqueFile(MemoryBufferRef mb
, StringRef segName
, StringRef sectName
);
204 static bool classof(const InputFile
*f
) { return f
->kind() == OpaqueKind
; }
207 // .dylib or .tbd file
208 class DylibFile final
: public InputFile
{
210 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
211 // symbols in those sub-libraries will be available under the umbrella
212 // library's namespace. Those sub-libraries can also have their own
213 // re-exports. When loading a re-exported dylib, `umbrella` should be set to
214 // the root dylib to ensure symbols in the child library are correctly bound
215 // to the root. On the other hand, if a dylib is being directly loaded
216 // (through an -lfoo flag), then `umbrella` should be a nullptr.
217 explicit DylibFile(MemoryBufferRef mb
, DylibFile
*umbrella
,
218 bool isBundleLoader
, bool explicitlyLinked
);
219 explicit DylibFile(const llvm::MachO::InterfaceFile
&interface
,
220 DylibFile
*umbrella
, bool isBundleLoader
,
221 bool explicitlyLinked
);
222 explicit DylibFile(DylibFile
*umbrella
);
224 void parseLoadCommands(MemoryBufferRef mb
);
225 void parseReexports(const llvm::MachO::InterfaceFile
&interface
);
226 bool isReferenced() const { return numReferencedSymbols
> 0; }
227 bool isExplicitlyLinked() const;
228 void setExplicitlyLinked() { explicitlyLinked
= true; }
230 static bool classof(const InputFile
*f
) { return f
->kind() == DylibKind
; }
232 StringRef installName
;
233 DylibFile
*exportingFile
= nullptr;
235 SmallVector
<StringRef
, 2> rpaths
;
236 uint32_t compatibilityVersion
= 0;
237 uint32_t currentVersion
= 0;
238 int64_t ordinal
= 0; // Ordinal numbering starts from 1, so 0 is a sentinel
239 unsigned numReferencedSymbols
= 0;
241 bool reexport
= false;
242 bool forceNeeded
= false;
243 bool forceWeakImport
= false;
244 bool deadStrippable
= false;
247 bool explicitlyLinked
= false; // Access via isExplicitlyLinked().
250 // An executable can be used as a bundle loader that will load the output
251 // file being linked, and that contains symbols referenced, but not
252 // implemented in the bundle. When used like this, it is very similar
253 // to a dylib, so we've used the same class to represent it.
256 // Synthetic Dylib objects created by $ld$previous symbols in this dylib.
257 // Usually empty. These synthetic dylibs won't have synthetic dylibs
259 SmallVector
<DylibFile
*, 2> extraDylibs
;
262 DylibFile
*getSyntheticDylib(StringRef installName
, uint32_t currentVersion
,
263 uint32_t compatVersion
);
265 bool handleLDSymbol(StringRef originalName
);
266 void handleLDPreviousSymbol(StringRef name
, StringRef originalName
);
267 void handleLDInstallNameSymbol(StringRef name
, StringRef originalName
);
268 void handleLDHideSymbol(StringRef name
, StringRef originalName
);
269 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe
) const;
270 void parseExportedSymbols(uint32_t offset
, uint32_t size
);
272 llvm::DenseSet
<llvm::CachedHashStringRef
> hiddenSymbols
;
276 class ArchiveFile final
: public InputFile
{
278 explicit ArchiveFile(std::unique_ptr
<llvm::object::Archive
> &&file
,
280 void addLazySymbols();
281 void fetch(const llvm::object::Archive::Symbol
&);
282 // LLD normally doesn't use Error for error-handling, but the underlying
283 // Archive library does, so this is the cleanest way to wrap it.
284 Error
fetch(const llvm::object::Archive::Child
&, StringRef reason
);
285 const llvm::object::Archive
&getArchive() const { return *file
; };
286 static bool classof(const InputFile
*f
) { return f
->kind() == ArchiveKind
; }
289 std::unique_ptr
<llvm::object::Archive
> file
;
290 // Keep track of children fetched from the archive by tracking
291 // which address offsets have been fetched already.
292 llvm::DenseSet
<uint64_t> seen
;
293 // Load all symbols with hidden visibility (-load_hidden).
297 class BitcodeFile final
: public InputFile
{
299 explicit BitcodeFile(MemoryBufferRef mb
, StringRef archiveName
,
300 uint64_t offsetInArchive
, bool lazy
= false,
301 bool forceHidden
= false);
302 static bool classof(const InputFile
*f
) { return f
->kind() == BitcodeKind
; }
305 std::unique_ptr
<llvm::lto::InputFile
> obj
;
312 extern llvm::SetVector
<InputFile
*> inputFiles
;
313 extern llvm::DenseMap
<llvm::CachedHashStringRef
, MemoryBufferRef
> cachedReads
;
315 llvm::Optional
<MemoryBufferRef
> readFile(StringRef path
);
317 void extract(InputFile
&file
, StringRef reason
);
321 template <class CommandType
, class... Types
>
322 std::vector
<const CommandType
*>
323 findCommands(const void *anyHdr
, size_t maxCommands
, Types
... types
) {
324 std::vector
<const CommandType
*> cmds
;
325 std::initializer_list
<uint32_t> typesList
{types
...};
326 const auto *hdr
= reinterpret_cast<const llvm::MachO::mach_header
*>(anyHdr
);
328 reinterpret_cast<const uint8_t *>(hdr
) + target
->headerSize
;
329 for (uint32_t i
= 0, n
= hdr
->ncmds
; i
< n
; ++i
) {
330 auto *cmd
= reinterpret_cast<const CommandType
*>(p
);
331 if (llvm::is_contained(typesList
, cmd
->cmd
)) {
333 if (cmds
.size() == maxCommands
)
341 } // namespace detail
343 // anyHdr should be a pointer to either mach_header or mach_header_64
344 template <class CommandType
= llvm::MachO::load_command
, class... Types
>
345 const CommandType
*findCommand(const void *anyHdr
, Types
... types
) {
346 std::vector
<const CommandType
*> cmds
=
347 detail::findCommands
<CommandType
>(anyHdr
, 1, types
...);
348 return cmds
.size() ? cmds
[0] : nullptr;
351 template <class CommandType
= llvm::MachO::load_command
, class... Types
>
352 std::vector
<const CommandType
*> findCommands(const void *anyHdr
,
354 return detail::findCommands
<CommandType
>(anyHdr
, 0, types
...);
359 std::string
toString(const macho::InputFile
*file
);
360 std::string
toString(const macho::Section
&);