1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_MACHO_INPUT_FILES_H
10 #define LLD_MACHO_INPUT_FILES_H
12 #include "MachOStructs.h"
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/BinaryFormat/MachO.h"
20 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
21 #include "llvm/Object/Archive.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/TextAPI/TextAPIReader.h"
41 class ConcatInputSection
;
44 enum class RefState
: uint8_t;
46 // If --reproduce option is given, all input files are written
47 // to this tar archive.
48 extern std::unique_ptr
<llvm::TarWriter
> tar
;
50 // If .subsections_via_symbols is set, each InputSection will be split along
51 // symbol boundaries. The field offset represents the offset of the subsection
52 // from the start of the original pre-split InputSection.
53 struct SubsectionEntry
{
57 using SubsectionMap
= std::vector
<SubsectionEntry
>;
69 virtual ~InputFile() = default;
70 Kind
kind() const { return fileKind
; }
71 StringRef
getName() const { return name
; }
75 std::vector
<Symbol
*> symbols
;
76 std::vector
<SubsectionMap
> subsections
;
77 // Provides an easy way to sort InputFiles deterministically.
80 // If not empty, this stores the name of the archive containing this file.
81 // We use this string for creating error messages.
82 std::string archiveName
;
85 InputFile(Kind kind
, MemoryBufferRef mb
)
86 : mb(mb
), id(idCount
++), fileKind(kind
), name(mb
.getBufferIdentifier()) {}
88 InputFile(Kind
, const llvm::MachO::InterfaceFile
&);
98 class ObjFile final
: public InputFile
{
100 ObjFile(MemoryBufferRef mb
, uint32_t modTime
, StringRef archiveName
);
101 static bool classof(const InputFile
*f
) { return f
->kind() == ObjKind
; }
103 llvm::DWARFUnit
*compileUnit
= nullptr;
104 const uint32_t modTime
;
105 std::vector
<ConcatInputSection
*> debugSections
;
106 ArrayRef
<llvm::MachO::data_in_code_entry
> dataInCodeEntries
;
109 template <class LP
> void parse();
110 template <class Section
> void parseSections(ArrayRef
<Section
>);
112 void parseSymbols(ArrayRef
<typename
LP::section
> sectionHeaders
,
113 ArrayRef
<typename
LP::nlist
> nList
, const char *strtab
,
114 bool subsectionsViaSymbols
);
115 template <class NList
>
116 Symbol
*parseNonSectionSymbol(const NList
&sym
, StringRef name
);
117 template <class Section
>
118 void parseRelocations(ArrayRef
<Section
> sectionHeaders
, const Section
&,
120 void parseDebugInfo();
121 void parseDataInCode();
124 // command-line -sectcreate file
125 class OpaqueFile final
: public InputFile
{
127 OpaqueFile(MemoryBufferRef mb
, StringRef segName
, StringRef sectName
);
128 static bool classof(const InputFile
*f
) { return f
->kind() == OpaqueKind
; }
131 // .dylib or .tbd file
132 class DylibFile final
: public InputFile
{
134 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
135 // symbols in those sub-libraries will be available under the umbrella
136 // library's namespace. Those sub-libraries can also have their own
137 // re-exports. When loading a re-exported dylib, `umbrella` should be set to
138 // the root dylib to ensure symbols in the child library are correctly bound
139 // to the root. On the other hand, if a dylib is being directly loaded
140 // (through an -lfoo flag), then `umbrella` should be a nullptr.
141 explicit DylibFile(MemoryBufferRef mb
, DylibFile
*umbrella
,
142 bool isBundleLoader
= false);
143 explicit DylibFile(const llvm::MachO::InterfaceFile
&interface
,
144 DylibFile
*umbrella
= nullptr,
145 bool isBundleLoader
= false);
147 void parseLoadCommands(MemoryBufferRef mb
);
148 void parseReexports(const llvm::MachO::InterfaceFile
&interface
);
150 static bool classof(const InputFile
*f
) { return f
->kind() == DylibKind
; }
152 StringRef installName
;
153 DylibFile
*exportingFile
= nullptr;
155 SmallVector
<StringRef
, 2> rpaths
;
156 uint32_t compatibilityVersion
= 0;
157 uint32_t currentVersion
= 0;
158 int64_t ordinal
= 0; // Ordinal numbering starts from 1, so 0 is a sentinel
160 bool reexport
= false;
161 bool forceNeeded
= false;
162 bool forceWeakImport
= false;
163 bool deadStrippable
= false;
164 bool explicitlyLinked
= false;
166 unsigned numReferencedSymbols
= 0;
168 bool isReferenced() const { return numReferencedSymbols
> 0; }
170 // An executable can be used as a bundle loader that will load the output
171 // file being linked, and that contains symbols referenced, but not
172 // implemented in the bundle. When used like this, it is very similar
173 // to a Dylib, so we re-used the same class to represent it.
177 bool handleLDSymbol(StringRef originalName
);
178 void handleLDPreviousSymbol(StringRef name
, StringRef originalName
);
179 void handleLDInstallNameSymbol(StringRef name
, StringRef originalName
);
180 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe
) const;
184 class ArchiveFile final
: public InputFile
{
186 explicit ArchiveFile(std::unique_ptr
<llvm::object::Archive
> &&file
);
187 void addLazySymbols();
188 void fetch(const llvm::object::Archive::Symbol
&);
189 // LLD normally doesn't use Error for error-handling, but the underlying
190 // Archive library does, so this is the cleanest way to wrap it.
191 Error
fetch(const llvm::object::Archive::Child
&, StringRef reason
);
192 const llvm::object::Archive
&getArchive() const { return *file
; };
193 static bool classof(const InputFile
*f
) { return f
->kind() == ArchiveKind
; }
196 std::unique_ptr
<llvm::object::Archive
> file
;
197 // Keep track of children fetched from the archive by tracking
198 // which address offsets have been fetched already.
199 llvm::DenseSet
<uint64_t> seen
;
202 class BitcodeFile final
: public InputFile
{
204 explicit BitcodeFile(MemoryBufferRef mb
, StringRef archiveName
,
205 uint64_t offsetInArchive
);
206 static bool classof(const InputFile
*f
) { return f
->kind() == BitcodeKind
; }
208 std::unique_ptr
<llvm::lto::InputFile
> obj
;
211 extern llvm::SetVector
<InputFile
*> inputFiles
;
213 llvm::Optional
<MemoryBufferRef
> readFile(StringRef path
);
217 template <class CommandType
, class... Types
>
218 std::vector
<const CommandType
*>
219 findCommands(const void *anyHdr
, size_t maxCommands
, Types
... types
) {
220 std::vector
<const CommandType
*> cmds
;
221 std::initializer_list
<uint32_t> typesList
{types
...};
222 const auto *hdr
= reinterpret_cast<const llvm::MachO::mach_header
*>(anyHdr
);
224 reinterpret_cast<const uint8_t *>(hdr
) + target
->headerSize
;
225 for (uint32_t i
= 0, n
= hdr
->ncmds
; i
< n
; ++i
) {
226 auto *cmd
= reinterpret_cast<const CommandType
*>(p
);
227 if (llvm::is_contained(typesList
, cmd
->cmd
)) {
229 if (cmds
.size() == maxCommands
)
237 } // namespace detail
239 // anyHdr should be a pointer to either mach_header or mach_header_64
240 template <class CommandType
= llvm::MachO::load_command
, class... Types
>
241 const CommandType
*findCommand(const void *anyHdr
, Types
... types
) {
242 std::vector
<const CommandType
*> cmds
=
243 detail::findCommands
<CommandType
>(anyHdr
, 1, types
...);
244 return cmds
.size() ? cmds
[0] : nullptr;
247 template <class CommandType
= llvm::MachO::load_command
, class... Types
>
248 std::vector
<const CommandType
*> findCommands(const void *anyHdr
,
250 return detail::findCommands
<CommandType
>(anyHdr
, 0, types
...);
255 std::string
toString(const macho::InputFile
*file
);