1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MachOReader.h"
11 #include "llvm/BinaryFormat/MachO.h"
12 #include "llvm/Object/MachO.h"
13 #include "llvm/Support/Errc.h"
17 using namespace llvm::objcopy
;
18 using namespace llvm::objcopy::macho
;
20 void MachOReader::readHeader(Object
&O
) const {
21 O
.Header
.Magic
= MachOObj
.getHeader().magic
;
22 O
.Header
.CPUType
= MachOObj
.getHeader().cputype
;
23 O
.Header
.CPUSubType
= MachOObj
.getHeader().cpusubtype
;
24 O
.Header
.FileType
= MachOObj
.getHeader().filetype
;
25 O
.Header
.NCmds
= MachOObj
.getHeader().ncmds
;
26 O
.Header
.SizeOfCmds
= MachOObj
.getHeader().sizeofcmds
;
27 O
.Header
.Flags
= MachOObj
.getHeader().flags
;
30 template <typename SectionType
>
31 static Section
constructSectionCommon(const SectionType
&Sec
, uint32_t Index
) {
32 StringRef
SegName(Sec
.segname
, strnlen(Sec
.segname
, sizeof(Sec
.segname
)));
33 StringRef
SectName(Sec
.sectname
, strnlen(Sec
.sectname
, sizeof(Sec
.sectname
)));
34 Section
S(SegName
, SectName
);
38 S
.OriginalOffset
= Sec
.offset
;
40 S
.RelOff
= Sec
.reloff
;
41 S
.NReloc
= Sec
.nreloc
;
43 S
.Reserved1
= Sec
.reserved1
;
44 S
.Reserved2
= Sec
.reserved2
;
49 Section
constructSection(const MachO::section
&Sec
, uint32_t Index
) {
50 return constructSectionCommon(Sec
, Index
);
53 Section
constructSection(const MachO::section_64
&Sec
, uint32_t Index
) {
54 Section S
= constructSectionCommon(Sec
, Index
);
55 S
.Reserved3
= Sec
.reserved3
;
59 template <typename SectionType
, typename SegmentType
>
60 Expected
<std::vector
<std::unique_ptr
<Section
>>> static extractSections(
61 const object::MachOObjectFile::LoadCommandInfo
&LoadCmd
,
62 const object::MachOObjectFile
&MachOObj
, uint32_t &NextSectionIndex
) {
63 std::vector
<std::unique_ptr
<Section
>> Sections
;
64 for (auto Curr
= reinterpret_cast<const SectionType
*>(LoadCmd
.Ptr
+
66 End
= reinterpret_cast<const SectionType
*>(LoadCmd
.Ptr
+
70 memcpy((void *)&Sec
, Curr
, sizeof(SectionType
));
72 if (MachOObj
.isLittleEndian() != sys::IsLittleEndianHost
)
73 MachO::swapStruct(Sec
);
76 std::make_unique
<Section
>(constructSection(Sec
, NextSectionIndex
)));
78 Section
&S
= *Sections
.back();
80 Expected
<object::SectionRef
> SecRef
=
81 MachOObj
.getSection(NextSectionIndex
++);
83 return SecRef
.takeError();
85 Expected
<ArrayRef
<uint8_t>> Data
=
86 MachOObj
.getSectionContents(SecRef
->getRawDataRefImpl());
88 return Data
.takeError();
91 StringRef(reinterpret_cast<const char *>(Data
->data()), Data
->size());
93 const uint32_t CPUType
= MachOObj
.getHeader().cputype
;
94 S
.Relocations
.reserve(S
.NReloc
);
95 for (auto RI
= MachOObj
.section_rel_begin(SecRef
->getRawDataRefImpl()),
96 RE
= MachOObj
.section_rel_end(SecRef
->getRawDataRefImpl());
99 R
.Symbol
= nullptr; // We'll fill this field later.
100 R
.Info
= MachOObj
.getRelocation(RI
->getRawDataRefImpl());
101 R
.Scattered
= MachOObj
.isRelocationScattered(R
.Info
);
102 unsigned Type
= MachOObj
.getAnyRelocationType(R
.Info
);
103 // TODO Support CPU_TYPE_ARM.
104 R
.IsAddend
= !R
.Scattered
&& (CPUType
== MachO::CPU_TYPE_ARM64
&&
105 Type
== MachO::ARM64_RELOC_ADDEND
);
106 R
.Extern
= !R
.Scattered
&& MachOObj
.getPlainRelocationExternal(R
.Info
);
107 S
.Relocations
.push_back(R
);
110 assert(S
.NReloc
== S
.Relocations
.size() &&
111 "Incorrect number of relocations");
113 return std::move(Sections
);
116 Error
MachOReader::readLoadCommands(Object
&O
) const {
117 // For MachO sections indices start from 1.
118 uint32_t NextSectionIndex
= 1;
119 static constexpr char TextSegmentName
[] = "__TEXT";
120 for (auto LoadCmd
: MachOObj
.load_commands()) {
122 switch (LoadCmd
.C
.cmd
) {
123 case MachO::LC_CODE_SIGNATURE
:
124 O
.CodeSignatureCommandIndex
= O
.LoadCommands
.size();
126 case MachO::LC_SEGMENT
:
127 // LoadCmd.Ptr might not be aligned temporarily as
128 // MachO::segment_command requires, but the segname char pointer do not
129 // have alignment restrictions.
130 if (StringRef(reinterpret_cast<const char *>(
131 LoadCmd
.Ptr
+ offsetof(MachO::segment_command
, segname
))) ==
133 O
.TextSegmentCommandIndex
= O
.LoadCommands
.size();
135 if (Expected
<std::vector
<std::unique_ptr
<Section
>>> Sections
=
136 extractSections
<MachO::section
, MachO::segment_command
>(
137 LoadCmd
, MachOObj
, NextSectionIndex
))
138 LC
.Sections
= std::move(*Sections
);
140 return Sections
.takeError();
142 case MachO::LC_SEGMENT_64
:
143 // LoadCmd.Ptr might not be aligned temporarily as
144 // MachO::segment_command_64 requires, but the segname char pointer do
145 // not have alignment restrictions.
146 if (StringRef(reinterpret_cast<const char *>(
147 LoadCmd
.Ptr
+ offsetof(MachO::segment_command_64
, segname
))) ==
149 O
.TextSegmentCommandIndex
= O
.LoadCommands
.size();
151 if (Expected
<std::vector
<std::unique_ptr
<Section
>>> Sections
=
152 extractSections
<MachO::section_64
, MachO::segment_command_64
>(
153 LoadCmd
, MachOObj
, NextSectionIndex
))
154 LC
.Sections
= std::move(*Sections
);
156 return Sections
.takeError();
158 case MachO::LC_SYMTAB
:
159 O
.SymTabCommandIndex
= O
.LoadCommands
.size();
161 case MachO::LC_DYSYMTAB
:
162 O
.DySymTabCommandIndex
= O
.LoadCommands
.size();
164 case MachO::LC_DYLD_INFO
:
165 case MachO::LC_DYLD_INFO_ONLY
:
166 O
.DyLdInfoCommandIndex
= O
.LoadCommands
.size();
168 case MachO::LC_DATA_IN_CODE
:
169 O
.DataInCodeCommandIndex
= O
.LoadCommands
.size();
171 case MachO::LC_LINKER_OPTIMIZATION_HINT
:
172 O
.LinkerOptimizationHintCommandIndex
= O
.LoadCommands
.size();
174 case MachO::LC_FUNCTION_STARTS
:
175 O
.FunctionStartsCommandIndex
= O
.LoadCommands
.size();
177 case MachO::LC_DYLD_EXPORTS_TRIE
:
178 O
.ExportsTrieCommandIndex
= O
.LoadCommands
.size();
180 case MachO::LC_DYLD_CHAINED_FIXUPS
:
181 O
.ChainedFixupsCommandIndex
= O
.LoadCommands
.size();
184 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
185 case MachO::LCName: \
186 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \
187 sizeof(MachO::LCStruct)); \
188 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \
189 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \
190 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \
191 LC.Payload = ArrayRef<uint8_t>( \
192 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \
193 sizeof(MachO::LCStruct), \
194 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \
197 switch (LoadCmd
.C
.cmd
) {
199 memcpy((void *)&(LC
.MachOLoadCommand
.load_command_data
), LoadCmd
.Ptr
,
200 sizeof(MachO::load_command
));
201 if (MachOObj
.isLittleEndian() != sys::IsLittleEndianHost
)
202 MachO::swapStruct(LC
.MachOLoadCommand
.load_command_data
);
203 if (LoadCmd
.C
.cmdsize
> sizeof(MachO::load_command
))
204 LC
.Payload
= ArrayRef
<uint8_t>(
205 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd
.Ptr
)) +
206 sizeof(MachO::load_command
),
207 LoadCmd
.C
.cmdsize
- sizeof(MachO::load_command
));
209 #include "llvm/BinaryFormat/MachO.def"
211 O
.LoadCommands
.push_back(std::move(LC
));
213 return Error::success();
216 template <typename nlist_t
>
217 SymbolEntry
constructSymbolEntry(StringRef StrTable
, const nlist_t
&nlist
) {
218 assert(nlist
.n_strx
< StrTable
.size() &&
219 "n_strx exceeds the size of the string table");
221 SE
.Name
= StringRef(StrTable
.data() + nlist
.n_strx
).str();
222 SE
.n_type
= nlist
.n_type
;
223 SE
.n_sect
= nlist
.n_sect
;
224 SE
.n_desc
= nlist
.n_desc
;
225 SE
.n_value
= nlist
.n_value
;
229 void MachOReader::readSymbolTable(Object
&O
) const {
230 StringRef StrTable
= MachOObj
.getStringTableData();
231 for (auto Symbol
: MachOObj
.symbols()) {
234 ? constructSymbolEntry(StrTable
, MachOObj
.getSymbol64TableEntry(
235 Symbol
.getRawDataRefImpl()))
236 : constructSymbolEntry(StrTable
, MachOObj
.getSymbolTableEntry(
237 Symbol
.getRawDataRefImpl())));
239 O
.SymTable
.Symbols
.push_back(std::make_unique
<SymbolEntry
>(SE
));
243 void MachOReader::setSymbolInRelocationInfo(Object
&O
) const {
244 std::vector
<const Section
*> Sections
;
245 for (auto &LC
: O
.LoadCommands
)
246 for (std::unique_ptr
<Section
> &Sec
: LC
.Sections
)
247 Sections
.push_back(Sec
.get());
249 for (LoadCommand
&LC
: O
.LoadCommands
)
250 for (std::unique_ptr
<Section
> &Sec
: LC
.Sections
)
251 for (auto &Reloc
: Sec
->Relocations
)
252 if (!Reloc
.Scattered
&& !Reloc
.IsAddend
) {
253 const uint32_t SymbolNum
=
254 Reloc
.getPlainRelocationSymbolNum(MachOObj
.isLittleEndian());
256 Reloc
.Symbol
= O
.SymTable
.getSymbolByIndex(SymbolNum
);
258 // FIXME: Refactor error handling in MachOReader and report an error
259 // if we encounter an invalid relocation.
260 assert(SymbolNum
>= 1 && SymbolNum
<= Sections
.size() &&
261 "Invalid section index.");
262 Reloc
.Sec
= Sections
[SymbolNum
- 1];
267 void MachOReader::readRebaseInfo(Object
&O
) const {
268 O
.Rebases
.Opcodes
= MachOObj
.getDyldInfoRebaseOpcodes();
271 void MachOReader::readBindInfo(Object
&O
) const {
272 O
.Binds
.Opcodes
= MachOObj
.getDyldInfoBindOpcodes();
275 void MachOReader::readWeakBindInfo(Object
&O
) const {
276 O
.WeakBinds
.Opcodes
= MachOObj
.getDyldInfoWeakBindOpcodes();
279 void MachOReader::readLazyBindInfo(Object
&O
) const {
280 O
.LazyBinds
.Opcodes
= MachOObj
.getDyldInfoLazyBindOpcodes();
283 void MachOReader::readExportInfo(Object
&O
) const {
284 O
.Exports
.Trie
= MachOObj
.getDyldInfoExportsTrie();
287 void MachOReader::readLinkData(Object
&O
, Optional
<size_t> LCIndex
,
288 LinkData
&LD
) const {
291 const MachO::linkedit_data_command
&LC
=
292 O
.LoadCommands
[*LCIndex
].MachOLoadCommand
.linkedit_data_command_data
;
294 arrayRefFromStringRef(MachOObj
.getData().substr(LC
.dataoff
, LC
.datasize
));
297 void MachOReader::readDataInCodeData(Object
&O
) const {
298 return readLinkData(O
, O
.DataInCodeCommandIndex
, O
.DataInCode
);
301 void MachOReader::readLinkerOptimizationHint(Object
&O
) const {
302 return readLinkData(O
, O
.LinkerOptimizationHintCommandIndex
,
303 O
.LinkerOptimizationHint
);
306 void MachOReader::readFunctionStartsData(Object
&O
) const {
307 return readLinkData(O
, O
.FunctionStartsCommandIndex
, O
.FunctionStarts
);
310 void MachOReader::readExportsTrie(Object
&O
) const {
311 return readLinkData(O
, O
.ExportsTrieCommandIndex
, O
.ExportsTrie
);
314 void MachOReader::readChainedFixups(Object
&O
) const {
315 return readLinkData(O
, O
.ChainedFixupsCommandIndex
, O
.ChainedFixups
);
318 void MachOReader::readIndirectSymbolTable(Object
&O
) const {
319 MachO::dysymtab_command DySymTab
= MachOObj
.getDysymtabLoadCommand();
320 constexpr uint32_t AbsOrLocalMask
=
321 MachO::INDIRECT_SYMBOL_LOCAL
| MachO::INDIRECT_SYMBOL_ABS
;
322 for (uint32_t i
= 0; i
< DySymTab
.nindirectsyms
; ++i
) {
323 uint32_t Index
= MachOObj
.getIndirectSymbolTableEntry(DySymTab
, i
);
324 if ((Index
& AbsOrLocalMask
) != 0)
325 O
.IndirectSymTable
.Symbols
.emplace_back(Index
, None
);
327 O
.IndirectSymTable
.Symbols
.emplace_back(
328 Index
, O
.SymTable
.getSymbolByIndex(Index
));
332 void MachOReader::readSwiftVersion(Object
&O
) const {
333 struct ObjCImageInfo
{
338 for (const LoadCommand
&LC
: O
.LoadCommands
)
339 for (const std::unique_ptr
<Section
> &Sec
: LC
.Sections
)
340 if (Sec
->Sectname
== "__objc_imageinfo" &&
341 (Sec
->Segname
== "__DATA" || Sec
->Segname
== "__DATA_CONST" ||
342 Sec
->Segname
== "__DATA_DIRTY") &&
343 Sec
->Content
.size() >= sizeof(ObjCImageInfo
)) {
344 memcpy(&ImageInfo
, Sec
->Content
.data(), sizeof(ObjCImageInfo
));
345 if (MachOObj
.isLittleEndian() != sys::IsLittleEndianHost
) {
346 sys::swapByteOrder(ImageInfo
.Version
);
347 sys::swapByteOrder(ImageInfo
.Flags
);
349 O
.SwiftVersion
= (ImageInfo
.Flags
>> 8) & 0xff;
354 Expected
<std::unique_ptr
<Object
>> MachOReader::create() const {
355 auto Obj
= std::make_unique
<Object
>();
357 if (Error E
= readLoadCommands(*Obj
))
359 readSymbolTable(*Obj
);
360 setSymbolInRelocationInfo(*Obj
);
361 readRebaseInfo(*Obj
);
363 readWeakBindInfo(*Obj
);
364 readLazyBindInfo(*Obj
);
365 readExportInfo(*Obj
);
366 readDataInCodeData(*Obj
);
367 readLinkerOptimizationHint(*Obj
);
368 readFunctionStartsData(*Obj
);
369 readExportsTrie(*Obj
);
370 readChainedFixups(*Obj
);
371 readIndirectSymbolTable(*Obj
);
372 readSwiftVersion(*Obj
);
373 return std::move(Obj
);