1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
12 #include "llvm/Object/MachOUniversal.h"
13 #include "llvm/ObjectYAML/ObjectYAML.h"
14 #include "llvm/Support/ErrorHandling.h"
15 #include "llvm/Support/LEB128.h"
17 #include <string.h> // for memcpy
23 template <typename StructType
>
24 const char *processLoadCommandData(
25 MachOYAML::LoadCommand
&LC
,
26 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
);
28 const object::MachOObjectFile
&Obj
;
29 void dumpHeader(std::unique_ptr
<MachOYAML::Object
> &Y
);
30 void dumpLoadCommands(std::unique_ptr
<MachOYAML::Object
> &Y
);
31 void dumpLinkEdit(std::unique_ptr
<MachOYAML::Object
> &Y
);
32 void dumpRebaseOpcodes(std::unique_ptr
<MachOYAML::Object
> &Y
);
33 void dumpBindOpcodes(std::vector
<MachOYAML::BindOpcode
> &BindOpcodes
,
34 ArrayRef
<uint8_t> OpcodeBuffer
, bool Lazy
= false);
35 void dumpExportTrie(std::unique_ptr
<MachOYAML::Object
> &Y
);
36 void dumpSymbols(std::unique_ptr
<MachOYAML::Object
> &Y
);
37 void dumpDebugAbbrev(DWARFContext
&DCtx
,
38 std::unique_ptr
<MachOYAML::Object
> &Y
);
39 void dumpDebugStrings(DWARFContext
&DCtx
,
40 std::unique_ptr
<MachOYAML::Object
> &Y
);
42 template <typename SectionType
>
43 MachOYAML::Section
constructSectionCommon(SectionType Sec
);
44 template <typename SectionType
>
45 MachOYAML::Section
constructSection(SectionType Sec
);
46 template <typename SectionType
, typename SegmentType
>
48 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
,
49 std::vector
<MachOYAML::Section
> &Sections
);
52 MachODumper(const object::MachOObjectFile
&O
) : Obj(O
) {}
53 Expected
<std::unique_ptr
<MachOYAML::Object
>> dump();
56 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
58 memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \
59 sizeof(MachO::LCStruct)); \
60 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
61 MachO::swapStruct(LC.Data.LCStruct##_data); \
62 EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd); \
65 template <typename SectionType
>
66 MachOYAML::Section
MachODumper::constructSectionCommon(SectionType Sec
) {
67 MachOYAML::Section TempSec
;
68 memcpy(reinterpret_cast<void *>(&TempSec
.sectname
[0]), &Sec
.sectname
[0], 16);
69 memcpy(reinterpret_cast<void *>(&TempSec
.segname
[0]), &Sec
.segname
[0], 16);
70 TempSec
.addr
= Sec
.addr
;
71 TempSec
.size
= Sec
.size
;
72 TempSec
.offset
= Sec
.offset
;
73 TempSec
.align
= Sec
.align
;
74 TempSec
.reloff
= Sec
.reloff
;
75 TempSec
.nreloc
= Sec
.nreloc
;
76 TempSec
.flags
= Sec
.flags
;
77 TempSec
.reserved1
= Sec
.reserved1
;
78 TempSec
.reserved2
= Sec
.reserved2
;
79 TempSec
.reserved3
= 0;
80 if (!MachO::isVirtualSection(Sec
.flags
& MachO::SECTION_TYPE
))
82 yaml::BinaryRef(Obj
.getSectionContents(Sec
.offset
, Sec
.size
));
87 MachOYAML::Section
MachODumper::constructSection(MachO::section Sec
) {
88 MachOYAML::Section TempSec
= constructSectionCommon(Sec
);
89 TempSec
.reserved3
= 0;
94 MachOYAML::Section
MachODumper::constructSection(MachO::section_64 Sec
) {
95 MachOYAML::Section TempSec
= constructSectionCommon(Sec
);
96 TempSec
.reserved3
= Sec
.reserved3
;
100 template <typename SectionType
, typename SegmentType
>
101 const char *MachODumper::extractSections(
102 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
,
103 std::vector
<MachOYAML::Section
> &Sections
) {
104 auto End
= LoadCmd
.Ptr
+ LoadCmd
.C
.cmdsize
;
105 const SectionType
*Curr
=
106 reinterpret_cast<const SectionType
*>(LoadCmd
.Ptr
+ sizeof(SegmentType
));
107 for (; reinterpret_cast<const void *>(Curr
) < End
; Curr
++) {
108 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
) {
110 memcpy((void *)&Sec
, Curr
, sizeof(SectionType
));
111 MachO::swapStruct(Sec
);
112 Sections
.push_back(constructSection(Sec
));
114 Sections
.push_back(constructSection(*Curr
));
117 return reinterpret_cast<const char *>(Curr
);
120 template <typename StructType
>
121 const char *MachODumper::processLoadCommandData(
122 MachOYAML::LoadCommand
&LC
,
123 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
124 return LoadCmd
.Ptr
+ sizeof(StructType
);
128 const char *MachODumper::processLoadCommandData
<MachO::segment_command
>(
129 MachOYAML::LoadCommand
&LC
,
130 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
131 return extractSections
<MachO::section
, MachO::segment_command
>(LoadCmd
,
136 const char *MachODumper::processLoadCommandData
<MachO::segment_command_64
>(
137 MachOYAML::LoadCommand
&LC
,
138 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
139 return extractSections
<MachO::section_64
, MachO::segment_command_64
>(
140 LoadCmd
, LC
.Sections
);
143 template <typename StructType
>
145 readString(MachOYAML::LoadCommand
&LC
,
146 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
147 auto Start
= LoadCmd
.Ptr
+ sizeof(StructType
);
148 auto MaxSize
= LoadCmd
.C
.cmdsize
- sizeof(StructType
);
149 auto Size
= strnlen(Start
, MaxSize
);
150 LC
.PayloadString
= StringRef(Start
, Size
).str();
155 const char *MachODumper::processLoadCommandData
<MachO::dylib_command
>(
156 MachOYAML::LoadCommand
&LC
,
157 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
158 return readString
<MachO::dylib_command
>(LC
, LoadCmd
);
162 const char *MachODumper::processLoadCommandData
<MachO::dylinker_command
>(
163 MachOYAML::LoadCommand
&LC
,
164 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
165 return readString
<MachO::dylinker_command
>(LC
, LoadCmd
);
169 const char *MachODumper::processLoadCommandData
<MachO::rpath_command
>(
170 MachOYAML::LoadCommand
&LC
,
171 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
172 return readString
<MachO::rpath_command
>(LC
, LoadCmd
);
176 const char *MachODumper::processLoadCommandData
<MachO::build_version_command
>(
177 MachOYAML::LoadCommand
&LC
,
178 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
179 auto Start
= LoadCmd
.Ptr
+ sizeof(MachO::build_version_command
);
180 auto NTools
= LC
.Data
.build_version_command_data
.ntools
;
181 for (unsigned i
= 0; i
< NTools
; ++i
) {
182 auto Curr
= Start
+ i
* sizeof(MachO::build_tool_version
);
183 MachO::build_tool_version BV
;
184 memcpy((void *)&BV
, Curr
, sizeof(MachO::build_tool_version
));
185 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
)
186 MachO::swapStruct(BV
);
187 LC
.Tools
.push_back(BV
);
189 return Start
+ NTools
* sizeof(MachO::build_tool_version
);
192 Expected
<std::unique_ptr
<MachOYAML::Object
>> MachODumper::dump() {
193 auto Y
= std::make_unique
<MachOYAML::Object
>();
194 Y
->IsLittleEndian
= Obj
.isLittleEndian();
199 std::unique_ptr
<DWARFContext
> DICtx
= DWARFContext::create(Obj
);
200 if (auto Err
= dwarf2yaml(*DICtx
, Y
->DWARF
))
201 return errorCodeToError(Err
);
205 void MachODumper::dumpHeader(std::unique_ptr
<MachOYAML::Object
> &Y
) {
206 Y
->Header
.magic
= Obj
.getHeader().magic
;
207 Y
->Header
.cputype
= Obj
.getHeader().cputype
;
208 Y
->Header
.cpusubtype
= Obj
.getHeader().cpusubtype
;
209 Y
->Header
.filetype
= Obj
.getHeader().filetype
;
210 Y
->Header
.ncmds
= Obj
.getHeader().ncmds
;
211 Y
->Header
.sizeofcmds
= Obj
.getHeader().sizeofcmds
;
212 Y
->Header
.flags
= Obj
.getHeader().flags
;
213 Y
->Header
.reserved
= 0;
216 void MachODumper::dumpLoadCommands(std::unique_ptr
<MachOYAML::Object
> &Y
) {
217 for (auto LoadCmd
: Obj
.load_commands()) {
218 MachOYAML::LoadCommand LC
;
219 const char *EndPtr
= LoadCmd
.Ptr
;
220 switch (LoadCmd
.C
.cmd
) {
222 memcpy((void *)&(LC
.Data
.load_command_data
), LoadCmd
.Ptr
,
223 sizeof(MachO::load_command
));
224 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
)
225 MachO::swapStruct(LC
.Data
.load_command_data
);
226 EndPtr
= processLoadCommandData
<MachO::load_command
>(LC
, LoadCmd
);
228 #include "llvm/BinaryFormat/MachO.def"
230 auto RemainingBytes
= LoadCmd
.C
.cmdsize
- (EndPtr
- LoadCmd
.Ptr
);
231 if (!std::all_of(EndPtr
, &EndPtr
[RemainingBytes
],
232 [](const char C
) { return C
== 0; })) {
233 LC
.PayloadBytes
.insert(LC
.PayloadBytes
.end(), EndPtr
,
234 &EndPtr
[RemainingBytes
]);
237 LC
.ZeroPadBytes
= RemainingBytes
;
238 Y
->LoadCommands
.push_back(std::move(LC
));
242 void MachODumper::dumpLinkEdit(std::unique_ptr
<MachOYAML::Object
> &Y
) {
243 dumpRebaseOpcodes(Y
);
244 dumpBindOpcodes(Y
->LinkEdit
.BindOpcodes
, Obj
.getDyldInfoBindOpcodes());
245 dumpBindOpcodes(Y
->LinkEdit
.WeakBindOpcodes
,
246 Obj
.getDyldInfoWeakBindOpcodes());
247 dumpBindOpcodes(Y
->LinkEdit
.LazyBindOpcodes
, Obj
.getDyldInfoLazyBindOpcodes(),
253 void MachODumper::dumpRebaseOpcodes(std::unique_ptr
<MachOYAML::Object
> &Y
) {
254 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
256 auto RebaseOpcodes
= Obj
.getDyldInfoRebaseOpcodes();
257 for (auto OpCode
= RebaseOpcodes
.begin(); OpCode
!= RebaseOpcodes
.end();
259 MachOYAML::RebaseOpcode RebaseOp
;
261 static_cast<MachO::RebaseOpcode
>(*OpCode
& MachO::REBASE_OPCODE_MASK
);
262 RebaseOp
.Imm
= *OpCode
& MachO::REBASE_IMMEDIATE_MASK
;
267 switch (RebaseOp
.Opcode
) {
268 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
:
270 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
271 RebaseOp
.ExtraData
.push_back(ULEB
);
274 // Intentionally no break here -- This opcode has two ULEB values
275 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
276 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB
:
277 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES
:
278 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
:
280 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
281 RebaseOp
.ExtraData
.push_back(ULEB
);
288 LEData
.RebaseOpcodes
.push_back(RebaseOp
);
290 if (RebaseOp
.Opcode
== MachO::REBASE_OPCODE_DONE
)
295 StringRef
ReadStringRef(const uint8_t *Start
) {
296 const uint8_t *Itr
= Start
;
299 return StringRef(reinterpret_cast<const char *>(Start
), Itr
- Start
);
302 void MachODumper::dumpBindOpcodes(
303 std::vector
<MachOYAML::BindOpcode
> &BindOpcodes
,
304 ArrayRef
<uint8_t> OpcodeBuffer
, bool Lazy
) {
305 for (auto OpCode
= OpcodeBuffer
.begin(); OpCode
!= OpcodeBuffer
.end();
307 MachOYAML::BindOpcode BindOp
;
309 static_cast<MachO::BindOpcode
>(*OpCode
& MachO::BIND_OPCODE_MASK
);
310 BindOp
.Imm
= *OpCode
& MachO::BIND_IMMEDIATE_MASK
;
316 switch (BindOp
.Opcode
) {
317 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
:
318 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
319 BindOp
.ULEBExtraData
.push_back(ULEB
);
322 // Intentionally no break here -- this opcode has two ULEB values
324 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
:
325 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
326 case MachO::BIND_OPCODE_ADD_ADDR_ULEB
:
327 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
:
328 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
329 BindOp
.ULEBExtraData
.push_back(ULEB
);
333 case MachO::BIND_OPCODE_SET_ADDEND_SLEB
:
334 SLEB
= decodeSLEB128(OpCode
+ 1, &Count
);
335 BindOp
.SLEBExtraData
.push_back(SLEB
);
339 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
:
340 BindOp
.Symbol
= ReadStringRef(OpCode
+ 1);
341 OpCode
+= BindOp
.Symbol
.size() + 1;
347 BindOpcodes
.push_back(BindOp
);
349 // Lazy bindings have DONE opcodes between operations, so we need to keep
350 // processing after a DONE.
351 if (!Lazy
&& BindOp
.Opcode
== MachO::BIND_OPCODE_DONE
)
357 * /brief processes a node from the export trie, and its children.
359 * To my knowledge there is no documentation of the encoded format of this data
360 * other than in the heads of the Apple linker engineers. To that end hopefully
361 * this comment and the implementation below can serve to light the way for
362 * anyone crazy enough to come down this path in the future.
364 * This function reads and preserves the trie structure of the export trie. To
365 * my knowledge there is no code anywhere else that reads the data and preserves
366 * the Trie. LD64 (sources available at opensource.apple.com) has a similar
367 * implementation that parses the export trie into a vector. That code as well
368 * as LLVM's libObject MachO implementation were the basis for this.
370 * The export trie is an encoded trie. The node serialization is a bit awkward.
371 * The below pseudo-code is the best description I've come up with for it.
373 * struct SerializedNode {
374 * ULEB128 TerminalSize;
375 * struct TerminalData { <-- This is only present if TerminalSize > 0
377 * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
378 * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
379 * Flags & STUB_AND_RESOLVER )
380 * char[] ImportName; <-- Present if ( Flags & REEXPORT )
382 * uint8_t ChildrenCount;
383 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
384 * SerializedNode Children[ChildrenCount]
387 * Terminal nodes are nodes that represent actual exports. They can appear
388 * anywhere in the tree other than at the root; they do not need to be leaf
389 * nodes. When reading the data out of the trie this routine reads it in-order,
390 * but it puts the child names and offsets directly into the child nodes. This
391 * results in looping over the children twice during serialization and
392 * de-serialization, but it makes the YAML representation more human readable.
394 * Below is an example of the graph from a "Hello World" executable:
404 * |----------------------------------------|
406 * ------------------------ ---------------------
407 * | '_mh_execute_header' | | 'main' |
408 * | Flags: 0x00000000 | | Flags: 0x00000000 |
409 * | Addr: 0x00000000 | | Addr: 0x00001160 |
410 * ------------------------ ---------------------
412 * This graph represents the trie for the exports "__mh_execute_header" and
413 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
417 const uint8_t *processExportNode(const uint8_t *CurrPtr
,
418 const uint8_t *const End
,
419 MachOYAML::ExportEntry
&Entry
) {
423 Entry
.TerminalSize
= decodeULEB128(CurrPtr
, &Count
);
425 if (Entry
.TerminalSize
!= 0) {
426 Entry
.Flags
= decodeULEB128(CurrPtr
, &Count
);
428 if (Entry
.Flags
& MachO::EXPORT_SYMBOL_FLAGS_REEXPORT
) {
430 Entry
.Other
= decodeULEB128(CurrPtr
, &Count
);
432 Entry
.ImportName
= std::string(reinterpret_cast<const char *>(CurrPtr
));
434 Entry
.Address
= decodeULEB128(CurrPtr
, &Count
);
436 if (Entry
.Flags
& MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER
) {
437 Entry
.Other
= decodeULEB128(CurrPtr
, &Count
);
443 uint8_t childrenCount
= *CurrPtr
++;
444 if (childrenCount
== 0)
447 Entry
.Children
.insert(Entry
.Children
.begin(), (size_t)childrenCount
,
448 MachOYAML::ExportEntry());
449 for (auto &Child
: Entry
.Children
) {
450 Child
.Name
= std::string(reinterpret_cast<const char *>(CurrPtr
));
451 CurrPtr
+= Child
.Name
.length() + 1;
452 Child
.NodeOffset
= decodeULEB128(CurrPtr
, &Count
);
455 for (auto &Child
: Entry
.Children
) {
456 CurrPtr
= processExportNode(CurrPtr
, End
, Child
);
461 void MachODumper::dumpExportTrie(std::unique_ptr
<MachOYAML::Object
> &Y
) {
462 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
463 auto ExportsTrie
= Obj
.getDyldInfoExportsTrie();
464 processExportNode(ExportsTrie
.begin(), ExportsTrie
.end(), LEData
.ExportTrie
);
467 template <typename nlist_t
>
468 MachOYAML::NListEntry
constructNameList(const nlist_t
&nlist
) {
469 MachOYAML::NListEntry NL
;
470 NL
.n_strx
= nlist
.n_strx
;
471 NL
.n_type
= nlist
.n_type
;
472 NL
.n_sect
= nlist
.n_sect
;
473 NL
.n_desc
= nlist
.n_desc
;
474 NL
.n_value
= nlist
.n_value
;
478 void MachODumper::dumpSymbols(std::unique_ptr
<MachOYAML::Object
> &Y
) {
479 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
481 for (auto Symbol
: Obj
.symbols()) {
482 MachOYAML::NListEntry NLE
=
484 ? constructNameList
<MachO::nlist_64
>(
485 Obj
.getSymbol64TableEntry(Symbol
.getRawDataRefImpl()))
486 : constructNameList
<MachO::nlist
>(
487 Obj
.getSymbolTableEntry(Symbol
.getRawDataRefImpl()));
488 LEData
.NameList
.push_back(NLE
);
491 StringRef RemainingTable
= Obj
.getStringTableData();
492 while (RemainingTable
.size() > 0) {
493 auto SymbolPair
= RemainingTable
.split('\0');
494 RemainingTable
= SymbolPair
.second
;
495 LEData
.StringTable
.push_back(SymbolPair
.first
);
499 Error
macho2yaml(raw_ostream
&Out
, const object::MachOObjectFile
&Obj
) {
500 MachODumper
Dumper(Obj
);
501 Expected
<std::unique_ptr
<MachOYAML::Object
>> YAML
= Dumper
.dump();
503 return YAML
.takeError();
505 yaml::YamlObjectFile YAMLFile
;
506 YAMLFile
.MachO
= std::move(YAML
.get());
508 yaml::Output
Yout(Out
);
510 return Error::success();
513 Error
macho2yaml(raw_ostream
&Out
, const object::MachOUniversalBinary
&Obj
) {
514 yaml::YamlObjectFile YAMLFile
;
515 YAMLFile
.FatMachO
.reset(new MachOYAML::UniversalBinary());
516 MachOYAML::UniversalBinary
&YAML
= *YAMLFile
.FatMachO
;
517 YAML
.Header
.magic
= Obj
.getMagic();
518 YAML
.Header
.nfat_arch
= Obj
.getNumberOfObjects();
520 for (auto Slice
: Obj
.objects()) {
521 MachOYAML::FatArch arch
;
522 arch
.cputype
= Slice
.getCPUType();
523 arch
.cpusubtype
= Slice
.getCPUSubType();
524 arch
.offset
= Slice
.getOffset();
525 arch
.size
= Slice
.getSize();
526 arch
.align
= Slice
.getAlign();
527 arch
.reserved
= Slice
.getReserved();
528 YAML
.FatArchs
.push_back(arch
);
530 auto SliceObj
= Slice
.getAsObjectFile();
532 return SliceObj
.takeError();
534 MachODumper
Dumper(*SliceObj
.get());
535 Expected
<std::unique_ptr
<MachOYAML::Object
>> YAMLObj
= Dumper
.dump();
537 return YAMLObj
.takeError();
538 YAML
.Slices
.push_back(*YAMLObj
.get());
541 yaml::Output
Yout(Out
);
543 return Error::success();
546 std::error_code
macho2yaml(raw_ostream
&Out
, const object::Binary
&Binary
) {
547 if (const auto *MachOObj
= dyn_cast
<object::MachOUniversalBinary
>(&Binary
)) {
548 if (auto Err
= macho2yaml(Out
, *MachOObj
)) {
549 return errorToErrorCode(std::move(Err
));
551 return obj2yaml_error::success
;
554 if (const auto *MachOObj
= dyn_cast
<object::MachOObjectFile
>(&Binary
)) {
555 if (auto Err
= macho2yaml(Out
, *MachOObj
)) {
556 return errorToErrorCode(std::move(Err
));
558 return obj2yaml_error::success
;
561 return obj2yaml_error::unsupported_obj_file_format
;