1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
13 #include "llvm/Object/MachOUniversal.h"
14 #include "llvm/ObjectYAML/ObjectYAML.h"
15 #include "llvm/Support/ErrorHandling.h"
16 #include "llvm/Support/LEB128.h"
18 #include <string.h> // for memcpy
24 template <typename StructType
>
25 const char *processLoadCommandData(
26 MachOYAML::LoadCommand
&LC
,
27 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
);
29 const object::MachOObjectFile
&Obj
;
30 void dumpHeader(std::unique_ptr
<MachOYAML::Object
> &Y
);
31 void dumpLoadCommands(std::unique_ptr
<MachOYAML::Object
> &Y
);
32 void dumpLinkEdit(std::unique_ptr
<MachOYAML::Object
> &Y
);
33 void dumpRebaseOpcodes(std::unique_ptr
<MachOYAML::Object
> &Y
);
34 void dumpBindOpcodes(std::vector
<MachOYAML::BindOpcode
> &BindOpcodes
,
35 ArrayRef
<uint8_t> OpcodeBuffer
, bool Lazy
= false);
36 void dumpExportTrie(std::unique_ptr
<MachOYAML::Object
> &Y
);
37 void dumpSymbols(std::unique_ptr
<MachOYAML::Object
> &Y
);
38 void dumpDebugAbbrev(DWARFContext
&DCtx
,
39 std::unique_ptr
<MachOYAML::Object
> &Y
);
40 void dumpDebugStrings(DWARFContext
&DCtx
,
41 std::unique_ptr
<MachOYAML::Object
> &Y
);
44 MachODumper(const object::MachOObjectFile
&O
) : Obj(O
) {}
45 Expected
<std::unique_ptr
<MachOYAML::Object
>> dump();
48 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
50 memcpy((void *) & (LC.Data.LCStruct##_data), LoadCmd.Ptr, \
51 sizeof(MachO::LCStruct)); \
52 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
53 MachO::swapStruct(LC.Data.LCStruct##_data); \
54 EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd); \
57 template <typename SectionType
>
58 MachOYAML::Section
constructSectionCommon(SectionType Sec
) {
59 MachOYAML::Section TempSec
;
60 memcpy(reinterpret_cast<void *>(&TempSec
.sectname
[0]), &Sec
.sectname
[0], 16);
61 memcpy(reinterpret_cast<void *>(&TempSec
.segname
[0]), &Sec
.segname
[0], 16);
62 TempSec
.addr
= Sec
.addr
;
63 TempSec
.size
= Sec
.size
;
64 TempSec
.offset
= Sec
.offset
;
65 TempSec
.align
= Sec
.align
;
66 TempSec
.reloff
= Sec
.reloff
;
67 TempSec
.nreloc
= Sec
.nreloc
;
68 TempSec
.flags
= Sec
.flags
;
69 TempSec
.reserved1
= Sec
.reserved1
;
70 TempSec
.reserved2
= Sec
.reserved2
;
71 TempSec
.reserved3
= 0;
75 template <typename SectionType
>
76 MachOYAML::Section
constructSection(SectionType Sec
);
78 template <> MachOYAML::Section
constructSection(MachO::section Sec
) {
79 MachOYAML::Section TempSec
= constructSectionCommon(Sec
);
80 TempSec
.reserved3
= 0;
84 template <> MachOYAML::Section
constructSection(MachO::section_64 Sec
) {
85 MachOYAML::Section TempSec
= constructSectionCommon(Sec
);
86 TempSec
.reserved3
= Sec
.reserved3
;
90 template <typename SectionType
, typename SegmentType
>
92 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
,
93 std::vector
<MachOYAML::Section
> &Sections
,
94 bool IsLittleEndian
) {
95 auto End
= LoadCmd
.Ptr
+ LoadCmd
.C
.cmdsize
;
96 const SectionType
*Curr
=
97 reinterpret_cast<const SectionType
*>(LoadCmd
.Ptr
+ sizeof(SegmentType
));
98 for (; reinterpret_cast<const void *>(Curr
) < End
; Curr
++) {
99 if (IsLittleEndian
!= sys::IsLittleEndianHost
) {
101 memcpy((void *)&Sec
, Curr
, sizeof(SectionType
));
102 MachO::swapStruct(Sec
);
103 Sections
.push_back(constructSection(Sec
));
105 Sections
.push_back(constructSection(*Curr
));
108 return reinterpret_cast<const char *>(Curr
);
111 template <typename StructType
>
112 const char *MachODumper::processLoadCommandData(
113 MachOYAML::LoadCommand
&LC
,
114 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
115 return LoadCmd
.Ptr
+ sizeof(StructType
);
119 const char *MachODumper::processLoadCommandData
<MachO::segment_command
>(
120 MachOYAML::LoadCommand
&LC
,
121 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
122 return extractSections
<MachO::section
, MachO::segment_command
>(
123 LoadCmd
, LC
.Sections
, Obj
.isLittleEndian());
127 const char *MachODumper::processLoadCommandData
<MachO::segment_command_64
>(
128 MachOYAML::LoadCommand
&LC
,
129 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
130 return extractSections
<MachO::section_64
, MachO::segment_command_64
>(
131 LoadCmd
, LC
.Sections
, Obj
.isLittleEndian());
134 template <typename StructType
>
136 readString(MachOYAML::LoadCommand
&LC
,
137 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
138 auto Start
= LoadCmd
.Ptr
+ sizeof(StructType
);
139 auto MaxSize
= LoadCmd
.C
.cmdsize
- sizeof(StructType
);
140 auto Size
= strnlen(Start
, MaxSize
);
141 LC
.PayloadString
= StringRef(Start
, Size
).str();
146 const char *MachODumper::processLoadCommandData
<MachO::dylib_command
>(
147 MachOYAML::LoadCommand
&LC
,
148 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
149 return readString
<MachO::dylib_command
>(LC
, LoadCmd
);
153 const char *MachODumper::processLoadCommandData
<MachO::dylinker_command
>(
154 MachOYAML::LoadCommand
&LC
,
155 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
156 return readString
<MachO::dylinker_command
>(LC
, LoadCmd
);
160 const char *MachODumper::processLoadCommandData
<MachO::rpath_command
>(
161 MachOYAML::LoadCommand
&LC
,
162 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
163 return readString
<MachO::rpath_command
>(LC
, LoadCmd
);
167 const char *MachODumper::processLoadCommandData
<MachO::build_version_command
>(
168 MachOYAML::LoadCommand
&LC
,
169 const llvm::object::MachOObjectFile::LoadCommandInfo
&LoadCmd
) {
170 auto Start
= LoadCmd
.Ptr
+ sizeof(MachO::build_version_command
);
171 auto NTools
= LC
.Data
.build_version_command_data
.ntools
;
172 for (unsigned i
= 0; i
< NTools
; ++i
) {
173 auto Curr
= Start
+ i
* sizeof(MachO::build_tool_version
);
174 MachO::build_tool_version BV
;
175 memcpy((void *)&BV
, Curr
, sizeof(MachO::build_tool_version
));
176 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
)
177 MachO::swapStruct(BV
);
178 LC
.Tools
.push_back(BV
);
180 return Start
+ NTools
* sizeof(MachO::build_tool_version
);
183 Expected
<std::unique_ptr
<MachOYAML::Object
>> MachODumper::dump() {
184 auto Y
= make_unique
<MachOYAML::Object
>();
185 Y
->IsLittleEndian
= Obj
.isLittleEndian();
190 std::unique_ptr
<DWARFContext
> DICtx
= DWARFContext::create(Obj
);
191 if (auto Err
= dwarf2yaml(*DICtx
, Y
->DWARF
))
192 return errorCodeToError(Err
);
196 void MachODumper::dumpHeader(std::unique_ptr
<MachOYAML::Object
> &Y
) {
197 Y
->Header
.magic
= Obj
.getHeader().magic
;
198 Y
->Header
.cputype
= Obj
.getHeader().cputype
;
199 Y
->Header
.cpusubtype
= Obj
.getHeader().cpusubtype
;
200 Y
->Header
.filetype
= Obj
.getHeader().filetype
;
201 Y
->Header
.ncmds
= Obj
.getHeader().ncmds
;
202 Y
->Header
.sizeofcmds
= Obj
.getHeader().sizeofcmds
;
203 Y
->Header
.flags
= Obj
.getHeader().flags
;
204 Y
->Header
.reserved
= 0;
207 void MachODumper::dumpLoadCommands(std::unique_ptr
<MachOYAML::Object
> &Y
) {
208 for (auto LoadCmd
: Obj
.load_commands()) {
209 MachOYAML::LoadCommand LC
;
210 const char *EndPtr
= LoadCmd
.Ptr
;
211 switch (LoadCmd
.C
.cmd
) {
213 memcpy((void *)&(LC
.Data
.load_command_data
), LoadCmd
.Ptr
,
214 sizeof(MachO::load_command
));
215 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
)
216 MachO::swapStruct(LC
.Data
.load_command_data
);
217 EndPtr
= processLoadCommandData
<MachO::load_command
>(LC
, LoadCmd
);
219 #include "llvm/BinaryFormat/MachO.def"
221 auto RemainingBytes
= LoadCmd
.C
.cmdsize
- (EndPtr
- LoadCmd
.Ptr
);
222 if (!std::all_of(EndPtr
, &EndPtr
[RemainingBytes
],
223 [](const char C
) { return C
== 0; })) {
224 LC
.PayloadBytes
.insert(LC
.PayloadBytes
.end(), EndPtr
,
225 &EndPtr
[RemainingBytes
]);
228 LC
.ZeroPadBytes
= RemainingBytes
;
229 Y
->LoadCommands
.push_back(std::move(LC
));
233 void MachODumper::dumpLinkEdit(std::unique_ptr
<MachOYAML::Object
> &Y
) {
234 dumpRebaseOpcodes(Y
);
235 dumpBindOpcodes(Y
->LinkEdit
.BindOpcodes
, Obj
.getDyldInfoBindOpcodes());
236 dumpBindOpcodes(Y
->LinkEdit
.WeakBindOpcodes
,
237 Obj
.getDyldInfoWeakBindOpcodes());
238 dumpBindOpcodes(Y
->LinkEdit
.LazyBindOpcodes
, Obj
.getDyldInfoLazyBindOpcodes(),
244 void MachODumper::dumpRebaseOpcodes(std::unique_ptr
<MachOYAML::Object
> &Y
) {
245 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
247 auto RebaseOpcodes
= Obj
.getDyldInfoRebaseOpcodes();
248 for (auto OpCode
= RebaseOpcodes
.begin(); OpCode
!= RebaseOpcodes
.end();
250 MachOYAML::RebaseOpcode RebaseOp
;
252 static_cast<MachO::RebaseOpcode
>(*OpCode
& MachO::REBASE_OPCODE_MASK
);
253 RebaseOp
.Imm
= *OpCode
& MachO::REBASE_IMMEDIATE_MASK
;
258 switch (RebaseOp
.Opcode
) {
259 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
:
261 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
262 RebaseOp
.ExtraData
.push_back(ULEB
);
265 // Intentionally no break here -- This opcode has two ULEB values
266 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
267 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB
:
268 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES
:
269 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
:
271 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
272 RebaseOp
.ExtraData
.push_back(ULEB
);
279 LEData
.RebaseOpcodes
.push_back(RebaseOp
);
281 if (RebaseOp
.Opcode
== MachO::REBASE_OPCODE_DONE
)
286 StringRef
ReadStringRef(const uint8_t *Start
) {
287 const uint8_t *Itr
= Start
;
290 return StringRef(reinterpret_cast<const char *>(Start
), Itr
- Start
);
293 void MachODumper::dumpBindOpcodes(
294 std::vector
<MachOYAML::BindOpcode
> &BindOpcodes
,
295 ArrayRef
<uint8_t> OpcodeBuffer
, bool Lazy
) {
296 for (auto OpCode
= OpcodeBuffer
.begin(); OpCode
!= OpcodeBuffer
.end();
298 MachOYAML::BindOpcode BindOp
;
300 static_cast<MachO::BindOpcode
>(*OpCode
& MachO::BIND_OPCODE_MASK
);
301 BindOp
.Imm
= *OpCode
& MachO::BIND_IMMEDIATE_MASK
;
307 switch (BindOp
.Opcode
) {
308 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
:
309 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
310 BindOp
.ULEBExtraData
.push_back(ULEB
);
313 // Intentionally no break here -- this opcode has two ULEB values
315 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
:
316 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
317 case MachO::BIND_OPCODE_ADD_ADDR_ULEB
:
318 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
:
319 ULEB
= decodeULEB128(OpCode
+ 1, &Count
);
320 BindOp
.ULEBExtraData
.push_back(ULEB
);
324 case MachO::BIND_OPCODE_SET_ADDEND_SLEB
:
325 SLEB
= decodeSLEB128(OpCode
+ 1, &Count
);
326 BindOp
.SLEBExtraData
.push_back(SLEB
);
330 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
:
331 BindOp
.Symbol
= ReadStringRef(OpCode
+ 1);
332 OpCode
+= BindOp
.Symbol
.size() + 1;
338 BindOpcodes
.push_back(BindOp
);
340 // Lazy bindings have DONE opcodes between operations, so we need to keep
341 // processing after a DONE.
342 if (!Lazy
&& BindOp
.Opcode
== MachO::BIND_OPCODE_DONE
)
348 * /brief processes a node from the export trie, and its children.
350 * To my knowledge there is no documentation of the encoded format of this data
351 * other than in the heads of the Apple linker engineers. To that end hopefully
352 * this comment and the implementation below can serve to light the way for
353 * anyone crazy enough to come down this path in the future.
355 * This function reads and preserves the trie structure of the export trie. To
356 * my knowledge there is no code anywhere else that reads the data and preserves
357 * the Trie. LD64 (sources available at opensource.apple.com) has a similar
358 * implementation that parses the export trie into a vector. That code as well
359 * as LLVM's libObject MachO implementation were the basis for this.
361 * The export trie is an encoded trie. The node serialization is a bit awkward.
362 * The below pseudo-code is the best description I've come up with for it.
364 * struct SerializedNode {
365 * ULEB128 TerminalSize;
366 * struct TerminalData { <-- This is only present if TerminalSize > 0
368 * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
369 * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
370 * Flags & STUB_AND_RESOLVER )
371 * char[] ImportName; <-- Present if ( Flags & REEXPORT )
373 * uint8_t ChildrenCount;
374 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
375 * SerializedNode Children[ChildrenCount]
378 * Terminal nodes are nodes that represent actual exports. They can appear
379 * anywhere in the tree other than at the root; they do not need to be leaf
380 * nodes. When reading the data out of the trie this routine reads it in-order,
381 * but it puts the child names and offsets directly into the child nodes. This
382 * results in looping over the children twice during serialization and
383 * de-serialization, but it makes the YAML representation more human readable.
385 * Below is an example of the graph from a "Hello World" executable:
395 * |----------------------------------------|
397 * ------------------------ ---------------------
398 * | '_mh_execute_header' | | 'main' |
399 * | Flags: 0x00000000 | | Flags: 0x00000000 |
400 * | Addr: 0x00000000 | | Addr: 0x00001160 |
401 * ------------------------ ---------------------
403 * This graph represents the trie for the exports "__mh_execute_header" and
404 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
408 const uint8_t *processExportNode(const uint8_t *CurrPtr
,
409 const uint8_t *const End
,
410 MachOYAML::ExportEntry
&Entry
) {
414 Entry
.TerminalSize
= decodeULEB128(CurrPtr
, &Count
);
416 if (Entry
.TerminalSize
!= 0) {
417 Entry
.Flags
= decodeULEB128(CurrPtr
, &Count
);
419 if (Entry
.Flags
& MachO::EXPORT_SYMBOL_FLAGS_REEXPORT
) {
421 Entry
.Other
= decodeULEB128(CurrPtr
, &Count
);
423 Entry
.ImportName
= std::string(reinterpret_cast<const char *>(CurrPtr
));
425 Entry
.Address
= decodeULEB128(CurrPtr
, &Count
);
427 if (Entry
.Flags
& MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER
) {
428 Entry
.Other
= decodeULEB128(CurrPtr
, &Count
);
434 uint8_t childrenCount
= *CurrPtr
++;
435 if (childrenCount
== 0)
438 Entry
.Children
.insert(Entry
.Children
.begin(), (size_t)childrenCount
,
439 MachOYAML::ExportEntry());
440 for (auto &Child
: Entry
.Children
) {
441 Child
.Name
= std::string(reinterpret_cast<const char *>(CurrPtr
));
442 CurrPtr
+= Child
.Name
.length() + 1;
443 Child
.NodeOffset
= decodeULEB128(CurrPtr
, &Count
);
446 for (auto &Child
: Entry
.Children
) {
447 CurrPtr
= processExportNode(CurrPtr
, End
, Child
);
452 void MachODumper::dumpExportTrie(std::unique_ptr
<MachOYAML::Object
> &Y
) {
453 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
454 auto ExportsTrie
= Obj
.getDyldInfoExportsTrie();
455 processExportNode(ExportsTrie
.begin(), ExportsTrie
.end(), LEData
.ExportTrie
);
458 template <typename nlist_t
>
459 MachOYAML::NListEntry
constructNameList(const nlist_t
&nlist
) {
460 MachOYAML::NListEntry NL
;
461 NL
.n_strx
= nlist
.n_strx
;
462 NL
.n_type
= nlist
.n_type
;
463 NL
.n_sect
= nlist
.n_sect
;
464 NL
.n_desc
= nlist
.n_desc
;
465 NL
.n_value
= nlist
.n_value
;
469 void MachODumper::dumpSymbols(std::unique_ptr
<MachOYAML::Object
> &Y
) {
470 MachOYAML::LinkEditData
&LEData
= Y
->LinkEdit
;
472 for (auto Symbol
: Obj
.symbols()) {
473 MachOYAML::NListEntry NLE
=
475 ? constructNameList
<MachO::nlist_64
>(
476 Obj
.getSymbol64TableEntry(Symbol
.getRawDataRefImpl()))
477 : constructNameList
<MachO::nlist
>(
478 Obj
.getSymbolTableEntry(Symbol
.getRawDataRefImpl()));
479 LEData
.NameList
.push_back(NLE
);
482 StringRef RemainingTable
= Obj
.getStringTableData();
483 while (RemainingTable
.size() > 0) {
484 auto SymbolPair
= RemainingTable
.split('\0');
485 RemainingTable
= SymbolPair
.second
;
486 LEData
.StringTable
.push_back(SymbolPair
.first
);
490 Error
macho2yaml(raw_ostream
&Out
, const object::MachOObjectFile
&Obj
) {
491 MachODumper
Dumper(Obj
);
492 Expected
<std::unique_ptr
<MachOYAML::Object
>> YAML
= Dumper
.dump();
494 return YAML
.takeError();
496 yaml::YamlObjectFile YAMLFile
;
497 YAMLFile
.MachO
= std::move(YAML
.get());
499 yaml::Output
Yout(Out
);
501 return Error::success();
504 Error
macho2yaml(raw_ostream
&Out
, const object::MachOUniversalBinary
&Obj
) {
505 yaml::YamlObjectFile YAMLFile
;
506 YAMLFile
.FatMachO
.reset(new MachOYAML::UniversalBinary());
507 MachOYAML::UniversalBinary
&YAML
= *YAMLFile
.FatMachO
;
508 YAML
.Header
.magic
= Obj
.getMagic();
509 YAML
.Header
.nfat_arch
= Obj
.getNumberOfObjects();
511 for (auto Slice
: Obj
.objects()) {
512 MachOYAML::FatArch arch
;
513 arch
.cputype
= Slice
.getCPUType();
514 arch
.cpusubtype
= Slice
.getCPUSubType();
515 arch
.offset
= Slice
.getOffset();
516 arch
.size
= Slice
.getSize();
517 arch
.align
= Slice
.getAlign();
518 arch
.reserved
= Slice
.getReserved();
519 YAML
.FatArchs
.push_back(arch
);
521 auto SliceObj
= Slice
.getAsObjectFile();
523 return SliceObj
.takeError();
525 MachODumper
Dumper(*SliceObj
.get());
526 Expected
<std::unique_ptr
<MachOYAML::Object
>> YAMLObj
= Dumper
.dump();
528 return YAMLObj
.takeError();
529 YAML
.Slices
.push_back(*YAMLObj
.get());
532 yaml::Output
Yout(Out
);
534 return Error::success();
537 std::error_code
macho2yaml(raw_ostream
&Out
, const object::Binary
&Binary
) {
538 if (const auto *MachOObj
= dyn_cast
<object::MachOUniversalBinary
>(&Binary
)) {
539 if (auto Err
= macho2yaml(Out
, *MachOObj
)) {
540 return errorToErrorCode(std::move(Err
));
542 return obj2yaml_error::success
;
545 if (const auto *MachOObj
= dyn_cast
<object::MachOObjectFile
>(&Binary
)) {
546 if (auto Err
= macho2yaml(Out
, *MachOObj
)) {
547 return errorToErrorCode(std::move(Err
));
549 return obj2yaml_error::success
;
552 return obj2yaml_error::unsupported_obj_file_format
;