[ARM] Add support for MVE pre and post inc loads and stores
[llvm-core.git] / tools / obj2yaml / macho2yaml.cpp
blob63e81686632489a8dd23fafccadf3e95ff1aa1ef
1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "Error.h"
10 #include "obj2yaml.h"
11 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
12 #include "llvm/Object/MachOUniversal.h"
13 #include "llvm/ObjectYAML/ObjectYAML.h"
14 #include "llvm/Support/ErrorHandling.h"
15 #include "llvm/Support/LEB128.h"
17 #include <string.h> // for memcpy
19 using namespace llvm;
21 class MachODumper {
23 template <typename StructType>
24 const char *processLoadCommandData(
25 MachOYAML::LoadCommand &LC,
26 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd);
28 const object::MachOObjectFile &Obj;
29 void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
30 void dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
31 void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
32 void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
33 void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
34 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
35 void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
36 void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
37 void dumpDebugAbbrev(DWARFContext &DCtx,
38 std::unique_ptr<MachOYAML::Object> &Y);
39 void dumpDebugStrings(DWARFContext &DCtx,
40 std::unique_ptr<MachOYAML::Object> &Y);
42 public:
43 MachODumper(const object::MachOObjectFile &O) : Obj(O) {}
44 Expected<std::unique_ptr<MachOYAML::Object>> dump();
47 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
48 case MachO::LCName: \
49 memcpy((void *) & (LC.Data.LCStruct##_data), LoadCmd.Ptr, \
50 sizeof(MachO::LCStruct)); \
51 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
52 MachO::swapStruct(LC.Data.LCStruct##_data); \
53 EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd); \
54 break;
56 template <typename SectionType>
57 MachOYAML::Section constructSectionCommon(SectionType Sec) {
58 MachOYAML::Section TempSec;
59 memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
60 memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
61 TempSec.addr = Sec.addr;
62 TempSec.size = Sec.size;
63 TempSec.offset = Sec.offset;
64 TempSec.align = Sec.align;
65 TempSec.reloff = Sec.reloff;
66 TempSec.nreloc = Sec.nreloc;
67 TempSec.flags = Sec.flags;
68 TempSec.reserved1 = Sec.reserved1;
69 TempSec.reserved2 = Sec.reserved2;
70 TempSec.reserved3 = 0;
71 return TempSec;
74 template <typename SectionType>
75 MachOYAML::Section constructSection(SectionType Sec);
77 template <> MachOYAML::Section constructSection(MachO::section Sec) {
78 MachOYAML::Section TempSec = constructSectionCommon(Sec);
79 TempSec.reserved3 = 0;
80 return TempSec;
83 template <> MachOYAML::Section constructSection(MachO::section_64 Sec) {
84 MachOYAML::Section TempSec = constructSectionCommon(Sec);
85 TempSec.reserved3 = Sec.reserved3;
86 return TempSec;
89 template <typename SectionType, typename SegmentType>
90 const char *
91 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
92 std::vector<MachOYAML::Section> &Sections,
93 bool IsLittleEndian) {
94 auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
95 const SectionType *Curr =
96 reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
97 for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
98 if (IsLittleEndian != sys::IsLittleEndianHost) {
99 SectionType Sec;
100 memcpy((void *)&Sec, Curr, sizeof(SectionType));
101 MachO::swapStruct(Sec);
102 Sections.push_back(constructSection(Sec));
103 } else {
104 Sections.push_back(constructSection(*Curr));
107 return reinterpret_cast<const char *>(Curr);
110 template <typename StructType>
111 const char *MachODumper::processLoadCommandData(
112 MachOYAML::LoadCommand &LC,
113 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
114 return LoadCmd.Ptr + sizeof(StructType);
117 template <>
118 const char *MachODumper::processLoadCommandData<MachO::segment_command>(
119 MachOYAML::LoadCommand &LC,
120 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
121 return extractSections<MachO::section, MachO::segment_command>(
122 LoadCmd, LC.Sections, Obj.isLittleEndian());
125 template <>
126 const char *MachODumper::processLoadCommandData<MachO::segment_command_64>(
127 MachOYAML::LoadCommand &LC,
128 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
129 return extractSections<MachO::section_64, MachO::segment_command_64>(
130 LoadCmd, LC.Sections, Obj.isLittleEndian());
133 template <typename StructType>
134 const char *
135 readString(MachOYAML::LoadCommand &LC,
136 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
137 auto Start = LoadCmd.Ptr + sizeof(StructType);
138 auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
139 auto Size = strnlen(Start, MaxSize);
140 LC.PayloadString = StringRef(Start, Size).str();
141 return Start + Size;
144 template <>
145 const char *MachODumper::processLoadCommandData<MachO::dylib_command>(
146 MachOYAML::LoadCommand &LC,
147 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
148 return readString<MachO::dylib_command>(LC, LoadCmd);
151 template <>
152 const char *MachODumper::processLoadCommandData<MachO::dylinker_command>(
153 MachOYAML::LoadCommand &LC,
154 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
155 return readString<MachO::dylinker_command>(LC, LoadCmd);
158 template <>
159 const char *MachODumper::processLoadCommandData<MachO::rpath_command>(
160 MachOYAML::LoadCommand &LC,
161 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
162 return readString<MachO::rpath_command>(LC, LoadCmd);
165 template <>
166 const char *MachODumper::processLoadCommandData<MachO::build_version_command>(
167 MachOYAML::LoadCommand &LC,
168 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
169 auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
170 auto NTools = LC.Data.build_version_command_data.ntools;
171 for (unsigned i = 0; i < NTools; ++i) {
172 auto Curr = Start + i * sizeof(MachO::build_tool_version);
173 MachO::build_tool_version BV;
174 memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
175 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
176 MachO::swapStruct(BV);
177 LC.Tools.push_back(BV);
179 return Start + NTools * sizeof(MachO::build_tool_version);
182 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
183 auto Y = make_unique<MachOYAML::Object>();
184 Y->IsLittleEndian = Obj.isLittleEndian();
185 dumpHeader(Y);
186 dumpLoadCommands(Y);
187 dumpLinkEdit(Y);
189 std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
190 if (auto Err = dwarf2yaml(*DICtx, Y->DWARF))
191 return errorCodeToError(Err);
192 return std::move(Y);
195 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
196 Y->Header.magic = Obj.getHeader().magic;
197 Y->Header.cputype = Obj.getHeader().cputype;
198 Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
199 Y->Header.filetype = Obj.getHeader().filetype;
200 Y->Header.ncmds = Obj.getHeader().ncmds;
201 Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
202 Y->Header.flags = Obj.getHeader().flags;
203 Y->Header.reserved = 0;
206 void MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
207 for (auto LoadCmd : Obj.load_commands()) {
208 MachOYAML::LoadCommand LC;
209 const char *EndPtr = LoadCmd.Ptr;
210 switch (LoadCmd.C.cmd) {
211 default:
212 memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
213 sizeof(MachO::load_command));
214 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
215 MachO::swapStruct(LC.Data.load_command_data);
216 EndPtr = processLoadCommandData<MachO::load_command>(LC, LoadCmd);
217 break;
218 #include "llvm/BinaryFormat/MachO.def"
220 auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
221 if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
222 [](const char C) { return C == 0; })) {
223 LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
224 &EndPtr[RemainingBytes]);
225 RemainingBytes = 0;
227 LC.ZeroPadBytes = RemainingBytes;
228 Y->LoadCommands.push_back(std::move(LC));
232 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
233 dumpRebaseOpcodes(Y);
234 dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
235 dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
236 Obj.getDyldInfoWeakBindOpcodes());
237 dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
238 true);
239 dumpExportTrie(Y);
240 dumpSymbols(Y);
243 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
244 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
246 auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
247 for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
248 ++OpCode) {
249 MachOYAML::RebaseOpcode RebaseOp;
250 RebaseOp.Opcode =
251 static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
252 RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
254 unsigned Count;
255 uint64_t ULEB = 0;
257 switch (RebaseOp.Opcode) {
258 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
260 ULEB = decodeULEB128(OpCode + 1, &Count);
261 RebaseOp.ExtraData.push_back(ULEB);
262 OpCode += Count;
263 LLVM_FALLTHROUGH;
264 // Intentionally no break here -- This opcode has two ULEB values
265 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
266 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
267 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
268 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
270 ULEB = decodeULEB128(OpCode + 1, &Count);
271 RebaseOp.ExtraData.push_back(ULEB);
272 OpCode += Count;
273 break;
274 default:
275 break;
278 LEData.RebaseOpcodes.push_back(RebaseOp);
280 if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
281 break;
285 StringRef ReadStringRef(const uint8_t *Start) {
286 const uint8_t *Itr = Start;
287 for (; *Itr; ++Itr)
289 return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
292 void MachODumper::dumpBindOpcodes(
293 std::vector<MachOYAML::BindOpcode> &BindOpcodes,
294 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
295 for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
296 ++OpCode) {
297 MachOYAML::BindOpcode BindOp;
298 BindOp.Opcode =
299 static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
300 BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
302 unsigned Count;
303 uint64_t ULEB = 0;
304 int64_t SLEB = 0;
306 switch (BindOp.Opcode) {
307 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
308 ULEB = decodeULEB128(OpCode + 1, &Count);
309 BindOp.ULEBExtraData.push_back(ULEB);
310 OpCode += Count;
311 LLVM_FALLTHROUGH;
312 // Intentionally no break here -- this opcode has two ULEB values
314 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
315 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
316 case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
317 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
318 ULEB = decodeULEB128(OpCode + 1, &Count);
319 BindOp.ULEBExtraData.push_back(ULEB);
320 OpCode += Count;
321 break;
323 case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
324 SLEB = decodeSLEB128(OpCode + 1, &Count);
325 BindOp.SLEBExtraData.push_back(SLEB);
326 OpCode += Count;
327 break;
329 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
330 BindOp.Symbol = ReadStringRef(OpCode + 1);
331 OpCode += BindOp.Symbol.size() + 1;
332 break;
333 default:
334 break;
337 BindOpcodes.push_back(BindOp);
339 // Lazy bindings have DONE opcodes between operations, so we need to keep
340 // processing after a DONE.
341 if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
342 break;
347 * /brief processes a node from the export trie, and its children.
349 * To my knowledge there is no documentation of the encoded format of this data
350 * other than in the heads of the Apple linker engineers. To that end hopefully
351 * this comment and the implementation below can serve to light the way for
352 * anyone crazy enough to come down this path in the future.
354 * This function reads and preserves the trie structure of the export trie. To
355 * my knowledge there is no code anywhere else that reads the data and preserves
356 * the Trie. LD64 (sources available at opensource.apple.com) has a similar
357 * implementation that parses the export trie into a vector. That code as well
358 * as LLVM's libObject MachO implementation were the basis for this.
360 * The export trie is an encoded trie. The node serialization is a bit awkward.
361 * The below pseudo-code is the best description I've come up with for it.
363 * struct SerializedNode {
364 * ULEB128 TerminalSize;
365 * struct TerminalData { <-- This is only present if TerminalSize > 0
366 * ULEB128 Flags;
367 * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
368 * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
369 * Flags & STUB_AND_RESOLVER )
370 * char[] ImportName; <-- Present if ( Flags & REEXPORT )
372 * uint8_t ChildrenCount;
373 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
374 * SerializedNode Children[ChildrenCount]
377 * Terminal nodes are nodes that represent actual exports. They can appear
378 * anywhere in the tree other than at the root; they do not need to be leaf
379 * nodes. When reading the data out of the trie this routine reads it in-order,
380 * but it puts the child names and offsets directly into the child nodes. This
381 * results in looping over the children twice during serialization and
382 * de-serialization, but it makes the YAML representation more human readable.
384 * Below is an example of the graph from a "Hello World" executable:
386 * -------
387 * | '' |
388 * -------
390 * -------
391 * | '_' |
392 * -------
394 * |----------------------------------------|
395 * | |
396 * ------------------------ ---------------------
397 * | '_mh_execute_header' | | 'main' |
398 * | Flags: 0x00000000 | | Flags: 0x00000000 |
399 * | Addr: 0x00000000 | | Addr: 0x00001160 |
400 * ------------------------ ---------------------
402 * This graph represents the trie for the exports "__mh_execute_header" and
403 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
404 * terminal.
407 const uint8_t *processExportNode(const uint8_t *CurrPtr,
408 const uint8_t *const End,
409 MachOYAML::ExportEntry &Entry) {
410 if (CurrPtr >= End)
411 return CurrPtr;
412 unsigned Count = 0;
413 Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
414 CurrPtr += Count;
415 if (Entry.TerminalSize != 0) {
416 Entry.Flags = decodeULEB128(CurrPtr, &Count);
417 CurrPtr += Count;
418 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
419 Entry.Address = 0;
420 Entry.Other = decodeULEB128(CurrPtr, &Count);
421 CurrPtr += Count;
422 Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
423 } else {
424 Entry.Address = decodeULEB128(CurrPtr, &Count);
425 CurrPtr += Count;
426 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
427 Entry.Other = decodeULEB128(CurrPtr, &Count);
428 CurrPtr += Count;
429 } else
430 Entry.Other = 0;
433 uint8_t childrenCount = *CurrPtr++;
434 if (childrenCount == 0)
435 return CurrPtr;
437 Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
438 MachOYAML::ExportEntry());
439 for (auto &Child : Entry.Children) {
440 Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
441 CurrPtr += Child.Name.length() + 1;
442 Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
443 CurrPtr += Count;
445 for (auto &Child : Entry.Children) {
446 CurrPtr = processExportNode(CurrPtr, End, Child);
448 return CurrPtr;
451 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
452 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
453 auto ExportsTrie = Obj.getDyldInfoExportsTrie();
454 processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
457 template <typename nlist_t>
458 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
459 MachOYAML::NListEntry NL;
460 NL.n_strx = nlist.n_strx;
461 NL.n_type = nlist.n_type;
462 NL.n_sect = nlist.n_sect;
463 NL.n_desc = nlist.n_desc;
464 NL.n_value = nlist.n_value;
465 return NL;
468 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
469 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
471 for (auto Symbol : Obj.symbols()) {
472 MachOYAML::NListEntry NLE =
473 Obj.is64Bit()
474 ? constructNameList<MachO::nlist_64>(
475 Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
476 : constructNameList<MachO::nlist>(
477 Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
478 LEData.NameList.push_back(NLE);
481 StringRef RemainingTable = Obj.getStringTableData();
482 while (RemainingTable.size() > 0) {
483 auto SymbolPair = RemainingTable.split('\0');
484 RemainingTable = SymbolPair.second;
485 LEData.StringTable.push_back(SymbolPair.first);
489 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
490 MachODumper Dumper(Obj);
491 Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
492 if (!YAML)
493 return YAML.takeError();
495 yaml::YamlObjectFile YAMLFile;
496 YAMLFile.MachO = std::move(YAML.get());
498 yaml::Output Yout(Out);
499 Yout << YAMLFile;
500 return Error::success();
503 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
504 yaml::YamlObjectFile YAMLFile;
505 YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
506 MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
507 YAML.Header.magic = Obj.getMagic();
508 YAML.Header.nfat_arch = Obj.getNumberOfObjects();
510 for (auto Slice : Obj.objects()) {
511 MachOYAML::FatArch arch;
512 arch.cputype = Slice.getCPUType();
513 arch.cpusubtype = Slice.getCPUSubType();
514 arch.offset = Slice.getOffset();
515 arch.size = Slice.getSize();
516 arch.align = Slice.getAlign();
517 arch.reserved = Slice.getReserved();
518 YAML.FatArchs.push_back(arch);
520 auto SliceObj = Slice.getAsObjectFile();
521 if (!SliceObj)
522 return SliceObj.takeError();
524 MachODumper Dumper(*SliceObj.get());
525 Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
526 if (!YAMLObj)
527 return YAMLObj.takeError();
528 YAML.Slices.push_back(*YAMLObj.get());
531 yaml::Output Yout(Out);
532 Yout << YAML;
533 return Error::success();
536 std::error_code macho2yaml(raw_ostream &Out, const object::Binary &Binary) {
537 if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary)) {
538 if (auto Err = macho2yaml(Out, *MachOObj)) {
539 return errorToErrorCode(std::move(Err));
541 return obj2yaml_error::success;
544 if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary)) {
545 if (auto Err = macho2yaml(Out, *MachOObj)) {
546 return errorToErrorCode(std::move(Err));
548 return obj2yaml_error::success;
551 return obj2yaml_error::unsupported_obj_file_format;