1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
12 #include "llvm/Object/MachOUniversal.h"
13 #include "llvm/ObjectYAML/ObjectYAML.h"
14 #include "llvm/Support/ErrorHandling.h"
15 #include "llvm/Support/LEB128.h"
17 #include <string.h> // for memcpy
23 template <typename StructType
>
24 const char *processLoadCommandData(
25 MachOYAML::LoadCommand
&LC
,
26 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
);
28 const object::MachOObjectFile
&Obj
;
29 void dumpHeader(std::unique_ptr
<MachOYAML::Object
> &Y
);
30 void dumpLoadCommands(std::unique_ptr
<MachOYAML::Object
> &Y
);
31 void dumpLinkEdit(std::unique_ptr
<MachOYAML::Object
> &Y
);
32 void dumpRebaseOpcodes(std::unique_ptr
<MachOYAML::Object
> &Y
);
33 void dumpBindOpcodes(std::vector
<MachOYAML::BindOpcode
> &BindOpcodes
,
34 ArrayRef
<uint8_t> OpcodeBuffer
, bool Lazy
= false);
35 void dumpExportTrie(std::unique_ptr
<MachOYAML::Object
> &Y
);
36 void dumpSymbols(std::unique_ptr
<MachOYAML::Object
> &Y
);
37 void dumpDebugAbbrev(DWARFContext
&DCtx
,
38 std::unique_ptr
<MachOYAML::Object
> &Y
);
39 void dumpDebugStrings(DWARFContext
&DCtx
,
40 std::unique_ptr
<MachOYAML::Object
> &Y
);
43 MachODumper(const object::MachOObjectFile
&O
) : Obj(O
) {}
44 Expected
<std::unique_ptr
<MachOYAML::Object
>> dump();
47 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
49 memcpy((void *) & (LC.Data.LCStruct##_data), LoadCmd.Ptr, \
50 sizeof(MachO::LCStruct)); \
51 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
52 MachO::swapStruct(LC.Data.LCStruct##_data); \
53 EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd); \
56 template <typename SectionType
>
57 MachOYAML::Section
constructSectionCommon(SectionType Sec
) {
58 MachOYAML::Section TempSec
;
59 memcpy(reinterpret_cast<void *>(&TempSec
.sectname
[0]), &Sec
.sectname
[0], 16);
60 memcpy(reinterpret_cast<void *>(&TempSec
.segname
[0]), &Sec
.segname
[0], 16);
61 TempSec
.addr
= Sec
.addr
;
62 TempSec
.size
= Sec
.size
;
63 TempSec
.offset
= Sec
.offset
;
64 TempSec
.align
= Sec
.align
;
65 TempSec
.reloff
= Sec
.reloff
;
66 TempSec
.nreloc
= Sec
.nreloc
;
67 TempSec
.flags
= Sec
.flags
;
68 TempSec
.reserved1
= Sec
.reserved1
;
69 TempSec
.reserved2
= Sec
.reserved2
;
70 TempSec
.reserved3
= 0;
74 template <typename SectionType
>
75 MachOYAML::Section
constructSection(SectionType Sec
);
77 template <> MachOYAML::Section
constructSection(MachO::section Sec
) {
78 MachOYAML::Section TempSec
= constructSectionCommon(Sec
);
79 TempSec
.reserved3
= 0;
83 template <> MachOYAML::Section
constructSection(MachO::section_64 Sec
) {
84 MachOYAML::Section TempSec
= constructSectionCommon(Sec
);
85 TempSec
.reserved3
= Sec
.reserved3
;
89 template <typename SectionType
, typename SegmentType
>
91 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
,
92 std::vector
<MachOYAML::Section
> &Sections
,
93 bool IsLittleEndian
) {
94 auto End
= LoadCmd
.Ptr
+ LoadCmd
.C
.cmdsize
;
95 const SectionType
*Curr
=
96 reinterpret_cast<const SectionType
*>(LoadCmd
.Ptr
+ sizeof(SegmentType
));
97 for (; reinterpret_cast<const void *>(Curr
) < End
; Curr
++) {
98 if (IsLittleEndian
!= sys::IsLittleEndianHost
) {
100 memcpy((void *)&Sec
, Curr
, sizeof(SectionType
));
101 MachO::swapStruct(Sec
);
102 Sections
.push_back(constructSection(Sec
));
104 Sections
.push_back(constructSection(*Curr
));
107 return reinterpret_cast<const char *>(Curr
);
110 template <typename StructType
>
111 const char *MachODumper::processLoadCommandData(
112 MachOYAML::LoadCommand
&LC
,
113 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
114 return LoadCmd
.Ptr
+ sizeof(StructType
);
118 const char *MachODumper::processLoadCommandData
<MachO::segment_command
>(
119 MachOYAML::LoadCommand
&LC
,
120 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
121 return extractSections
<MachO::section
, MachO::segment_command
>(
122 LoadCmd
, LC
.Sections
, Obj
.isLittleEndian());
126 const char *MachODumper::processLoadCommandData
<MachO::segment_command_64
>(
127 MachOYAML::LoadCommand
&LC
,
128 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
129 return extractSections
<MachO::section_64
, MachO::segment_command_64
>(
130 LoadCmd
, LC
.Sections
, Obj
.isLittleEndian());
133 template <typename StructType
>
135 readString(MachOYAML::LoadCommand
&LC
,
136 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
137 auto Start
= LoadCmd
.Ptr
+ sizeof(StructType
);
138 auto MaxSize
= LoadCmd
.C
.cmdsize
- sizeof(StructType
);
139 auto Size
= strnlen(Start
, MaxSize
);
140 LC
.PayloadString
= StringRef(Start
, Size
).str();
145 const char *MachODumper::processLoadCommandData
<MachO::dylib_command
>(
146 MachOYAML::LoadCommand
&LC
,
147 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
148 return readString
<MachO::dylib_command
>(LC
, LoadCmd
);
152 const char *MachODumper::processLoadCommandData
<MachO::dylinker_command
>(
153 MachOYAML::LoadCommand
&LC
,
154 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
155 return readString
<MachO::dylinker_command
>(LC
, LoadCmd
);
159 const char *MachODumper::processLoadCommandData
<MachO::rpath_command
>(
160 MachOYAML::LoadCommand
&LC
,
161 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
162 return readString
<MachO::rpath_command
>(LC
, LoadCmd
);
166 const char *MachODumper::processLoadCommandData
<MachO::build_version_command
>(
167 MachOYAML::LoadCommand
&LC
,
168 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
169 auto Start
= LoadCmd
.Ptr
+ sizeof(MachO::build_version_command
);
170 auto NTools
= LC
.Data
.build_version_command_data
.ntools
;
171 for (unsigned i
= 0; i
< NTools
; ++i
) {
172 auto Curr
= Start
+ i
* sizeof(MachO::build_tool_version
);
173 MachO::build_tool_version BV
;
174 memcpy((void *)&BV
, Curr
, sizeof(MachO::build_tool_version
));
175 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
)
176 MachO::swapStruct(BV
);
177 LC
.Tools
.push_back(BV
);
179 return Start
+ NTools
* sizeof(MachO::build_tool_version
);
182 Expected
<std::unique_ptr
<MachOYAML::Object
>> MachODumper::dump() {
183 auto Y
= make_unique
<MachOYAML::Object
>();
184 Y
->IsLittleEndian
= Obj
.isLittleEndian();
189 std::unique_ptr
<DWARFContext
> DICtx
= DWARFContext::create(Obj
);
190 if (auto Err
= dwarf2yaml(*DICtx
, Y
->DWARF
))
191 return errorCodeToError(Err
);
195 void MachODumper::dumpHeader(std::unique_ptr
<MachOYAML::Object
> &Y
) {
196 Y
->Header
.magic
= Obj
.getHeader().magic
;
197 Y
->Header
.cputype
= Obj
.getHeader().cputype
;
198 Y
->Header
.cpusubtype
= Obj
.getHeader().cpusubtype
;
199 Y
->Header
.filetype
= Obj
.getHeader().filetype
;
200 Y
->Header
.ncmds
= Obj
.getHeader().ncmds
;
201 Y
->Header
.sizeofcmds
= Obj
.getHeader().sizeofcmds
;
202 Y
->Header
.flags
= Obj
.getHeader().flags
;
203 Y
->Header
.reserved
= 0;
206 void MachODumper::dumpLoadCommands(std::unique_ptr
<MachOYAML::Object
> &Y
) {
207 for (auto LoadCmd
: Obj
.load_commands()) {
208 MachOYAML::LoadCommand LC
;
209 const char *EndPtr
= LoadCmd
.Ptr
;
210 switch (LoadCmd
.C
.cmd
) {
212 memcpy((void *)&(LC
.Data
.load_command_data
), LoadCmd
.Ptr
,
213 sizeof(MachO::load_command
));
214 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
)
215 MachO::swapStruct(LC
.Data
.load_command_data
);
216 EndPtr
= processLoadCommandData
<MachO::load_command
>(LC
, LoadCmd
);
218 #include "llvm/BinaryFormat/MachO.def"
220 auto RemainingBytes
= LoadCmd
.C
.cmdsize
- (EndPtr
- LoadCmd
.Ptr
);
221 if (!std::all_of(EndPtr
, &EndPtr
[RemainingBytes
],
222 [](const char C
) { return C
== 0; })) {
223 LC
.PayloadBytes
.insert(LC
.PayloadBytes
.end(), EndPtr
,
224 &EndPtr
[RemainingBytes
]);
227 LC
.ZeroPadBytes
= RemainingBytes
;
228 Y
->LoadCommands
.push_back(std::move(LC
));
232 void MachODumper::dumpLinkEdit(std::unique_ptr
<MachOYAML::Object
> &Y
) {
233 dumpRebaseOpcodes(Y
);
234 dumpBindOpcodes(Y
->LinkEdit
.BindOpcodes
, Obj
.getDyldInfoBindOpcodes());
235 dumpBindOpcodes(Y
->LinkEdit
.WeakBindOpcodes
,
236 Obj
.getDyldInfoWeakBindOpcodes());
237 dumpBindOpcodes(Y
->LinkEdit
.LazyBindOpcodes
, Obj
.getDyldInfoLazyBindOpcodes(),
243 void MachODumper::dumpRebaseOpcodes(std::unique_ptr
<MachOYAML::Object
> &Y
) {
244 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
246 auto RebaseOpcodes
= Obj
.getDyldInfoRebaseOpcodes();
247 for (auto OpCode
= RebaseOpcodes
.begin(); OpCode
!= RebaseOpcodes
.end();
249 MachOYAML::RebaseOpcode RebaseOp
;
251 static_cast<MachO::RebaseOpcode
>(*OpCode
& MachO::REBASE_OPCODE_MASK
);
252 RebaseOp
.Imm
= *OpCode
& MachO::REBASE_IMMEDIATE_MASK
;
257 switch (RebaseOp
.Opcode
) {
258 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
:
260 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
261 RebaseOp
.ExtraData
.push_back(ULEB
);
264 // Intentionally no break here -- This opcode has two ULEB values
265 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
266 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB
:
267 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES
:
268 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
:
270 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
271 RebaseOp
.ExtraData
.push_back(ULEB
);
278 LEData
.RebaseOpcodes
.push_back(RebaseOp
);
280 if (RebaseOp
.Opcode
== MachO::REBASE_OPCODE_DONE
)
285 StringRef
ReadStringRef(const uint8_t *Start
) {
286 const uint8_t *Itr
= Start
;
289 return StringRef(reinterpret_cast<const char *>(Start
), Itr
- Start
);
292 void MachODumper::dumpBindOpcodes(
293 std::vector
<MachOYAML::BindOpcode
> &BindOpcodes
,
294 ArrayRef
<uint8_t> OpcodeBuffer
, bool Lazy
) {
295 for (auto OpCode
= OpcodeBuffer
.begin(); OpCode
!= OpcodeBuffer
.end();
297 MachOYAML::BindOpcode BindOp
;
299 static_cast<MachO::BindOpcode
>(*OpCode
& MachO::BIND_OPCODE_MASK
);
300 BindOp
.Imm
= *OpCode
& MachO::BIND_IMMEDIATE_MASK
;
306 switch (BindOp
.Opcode
) {
307 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
:
308 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
309 BindOp
.ULEBExtraData
.push_back(ULEB
);
312 // Intentionally no break here -- this opcode has two ULEB values
314 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
:
315 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
316 case MachO::BIND_OPCODE_ADD_ADDR_ULEB
:
317 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
:
318 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
319 BindOp
.ULEBExtraData
.push_back(ULEB
);
323 case MachO::BIND_OPCODE_SET_ADDEND_SLEB
:
324 SLEB
= decodeSLEB128(OpCode
+ 1, &Count
);
325 BindOp
.SLEBExtraData
.push_back(SLEB
);
329 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
:
330 BindOp
.Symbol
= ReadStringRef(OpCode
+ 1);
331 OpCode
+= BindOp
.Symbol
.size() + 1;
337 BindOpcodes
.push_back(BindOp
);
339 // Lazy bindings have DONE opcodes between operations, so we need to keep
340 // processing after a DONE.
341 if (!Lazy
&& BindOp
.Opcode
== MachO::BIND_OPCODE_DONE
)
347 * /brief processes a node from the export trie, and its children.
349 * To my knowledge there is no documentation of the encoded format of this data
350 * other than in the heads of the Apple linker engineers. To that end hopefully
351 * this comment and the implementation below can serve to light the way for
352 * anyone crazy enough to come down this path in the future.
354 * This function reads and preserves the trie structure of the export trie. To
355 * my knowledge there is no code anywhere else that reads the data and preserves
356 * the Trie. LD64 (sources available at opensource.apple.com) has a similar
357 * implementation that parses the export trie into a vector. That code as well
358 * as LLVM's libObject MachO implementation were the basis for this.
360 * The export trie is an encoded trie. The node serialization is a bit awkward.
361 * The below pseudo-code is the best description I've come up with for it.
363 * struct SerializedNode {
364 * ULEB128 TerminalSize;
365 * struct TerminalData { <-- This is only present if TerminalSize > 0
367 * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
368 * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
369 * Flags & STUB_AND_RESOLVER )
370 * char[] ImportName; <-- Present if ( Flags & REEXPORT )
372 * uint8_t ChildrenCount;
373 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
374 * SerializedNode Children[ChildrenCount]
377 * Terminal nodes are nodes that represent actual exports. They can appear
378 * anywhere in the tree other than at the root; they do not need to be leaf
379 * nodes. When reading the data out of the trie this routine reads it in-order,
380 * but it puts the child names and offsets directly into the child nodes. This
381 * results in looping over the children twice during serialization and
382 * de-serialization, but it makes the YAML representation more human readable.
384 * Below is an example of the graph from a "Hello World" executable:
394 * |----------------------------------------|
396 * ------------------------ ---------------------
397 * | '_mh_execute_header' | | 'main' |
398 * | Flags: 0x00000000 | | Flags: 0x00000000 |
399 * | Addr: 0x00000000 | | Addr: 0x00001160 |
400 * ------------------------ ---------------------
402 * This graph represents the trie for the exports "__mh_execute_header" and
403 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
407 const uint8_t *processExportNode(const uint8_t *CurrPtr
,
408 const uint8_t *const End
,
409 MachOYAML::ExportEntry
&Entry
) {
413 Entry
.TerminalSize
= decodeULEB128(CurrPtr
, &Count
);
415 if (Entry
.TerminalSize
!= 0) {
416 Entry
.Flags
= decodeULEB128(CurrPtr
, &Count
);
418 if (Entry
.Flags
& MachO::EXPORT_SYMBOL_FLAGS_REEXPORT
) {
420 Entry
.Other
= decodeULEB128(CurrPtr
, &Count
);
422 Entry
.ImportName
= std::string(reinterpret_cast<const char *>(CurrPtr
));
424 Entry
.Address
= decodeULEB128(CurrPtr
, &Count
);
426 if (Entry
.Flags
& MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER
) {
427 Entry
.Other
= decodeULEB128(CurrPtr
, &Count
);
433 uint8_t childrenCount
= *CurrPtr
++;
434 if (childrenCount
== 0)
437 Entry
.Children
.insert(Entry
.Children
.begin(), (size_t)childrenCount
,
438 MachOYAML::ExportEntry());
439 for (auto &Child
: Entry
.Children
) {
440 Child
.Name
= std::string(reinterpret_cast<const char *>(CurrPtr
));
441 CurrPtr
+= Child
.Name
.length() + 1;
442 Child
.NodeOffset
= decodeULEB128(CurrPtr
, &Count
);
445 for (auto &Child
: Entry
.Children
) {
446 CurrPtr
= processExportNode(CurrPtr
, End
, Child
);
451 void MachODumper::dumpExportTrie(std::unique_ptr
<MachOYAML::Object
> &Y
) {
452 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
453 auto ExportsTrie
= Obj
.getDyldInfoExportsTrie();
454 processExportNode(ExportsTrie
.begin(), ExportsTrie
.end(), LEData
.ExportTrie
);
457 template <typename nlist_t
>
458 MachOYAML::NListEntry
constructNameList(const nlist_t
&nlist
) {
459 MachOYAML::NListEntry NL
;
460 NL
.n_strx
= nlist
.n_strx
;
461 NL
.n_type
= nlist
.n_type
;
462 NL
.n_sect
= nlist
.n_sect
;
463 NL
.n_desc
= nlist
.n_desc
;
464 NL
.n_value
= nlist
.n_value
;
468 void MachODumper::dumpSymbols(std::unique_ptr
<MachOYAML::Object
> &Y
) {
469 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
471 for (auto Symbol
: Obj
.symbols()) {
472 MachOYAML::NListEntry NLE
=
474 ? constructNameList
<MachO::nlist_64
>(
475 Obj
.getSymbol64TableEntry(Symbol
.getRawDataRefImpl()))
476 : constructNameList
<MachO::nlist
>(
477 Obj
.getSymbolTableEntry(Symbol
.getRawDataRefImpl()));
478 LEData
.NameList
.push_back(NLE
);
481 StringRef RemainingTable
= Obj
.getStringTableData();
482 while (RemainingTable
.size() > 0) {
483 auto SymbolPair
= RemainingTable
.split('\0');
484 RemainingTable
= SymbolPair
.second
;
485 LEData
.StringTable
.push_back(SymbolPair
.first
);
489 Error
macho2yaml(raw_ostream
&Out
, const object::MachOObjectFile
&Obj
) {
490 MachODumper
Dumper(Obj
);
491 Expected
<std::unique_ptr
<MachOYAML::Object
>> YAML
= Dumper
.dump();
493 return YAML
.takeError();
495 yaml::YamlObjectFile YAMLFile
;
496 YAMLFile
.MachO
= std::move(YAML
.get());
498 yaml::Output
Yout(Out
);
500 return Error::success();
503 Error
macho2yaml(raw_ostream
&Out
, const object::MachOUniversalBinary
&Obj
) {
504 yaml::YamlObjectFile YAMLFile
;
505 YAMLFile
.FatMachO
.reset(new MachOYAML::UniversalBinary());
506 MachOYAML::UniversalBinary
&YAML
= *YAMLFile
.FatMachO
;
507 YAML
.Header
.magic
= Obj
.getMagic();
508 YAML
.Header
.nfat_arch
= Obj
.getNumberOfObjects();
510 for (auto Slice
: Obj
.objects()) {
511 MachOYAML::FatArch arch
;
512 arch
.cputype
= Slice
.getCPUType();
513 arch
.cpusubtype
= Slice
.getCPUSubType();
514 arch
.offset
= Slice
.getOffset();
515 arch
.size
= Slice
.getSize();
516 arch
.align
= Slice
.getAlign();
517 arch
.reserved
= Slice
.getReserved();
518 YAML
.FatArchs
.push_back(arch
);
520 auto SliceObj
= Slice
.getAsObjectFile();
522 return SliceObj
.takeError();
524 MachODumper
Dumper(*SliceObj
.get());
525 Expected
<std::unique_ptr
<MachOYAML::Object
>> YAMLObj
= Dumper
.dump();
527 return YAMLObj
.takeError();
528 YAML
.Slices
.push_back(*YAMLObj
.get());
531 yaml::Output
Yout(Out
);
533 return Error::success();
536 std::error_code
macho2yaml(raw_ostream
&Out
, const object::Binary
&Binary
) {
537 if (const auto *MachOObj
= dyn_cast
<object::MachOUniversalBinary
>(&Binary
)) {
538 if (auto Err
= macho2yaml(Out
, *MachOObj
)) {
539 return errorToErrorCode(std::move(Err
));
541 return obj2yaml_error::success
;
544 if (const auto *MachOObj
= dyn_cast
<object::MachOObjectFile
>(&Binary
)) {
545 if (auto Err
= macho2yaml(Out
, *MachOObj
)) {
546 return errorToErrorCode(std::move(Err
));
548 return obj2yaml_error::success
;
551 return obj2yaml_error::unsupported_obj_file_format
;