[memprof] Move YAML traits to MemProf.h (NFC) (#118668)
[llvm-project.git] / lldb / source / Plugins / ObjectFile / Mach-O / ObjectFileMachO.cpp
blobdaffa1379fe575b1ffac52ff412da5840df2c6fa
1 //===-- ObjectFileMachO.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/ScopeExit.h"
10 #include "llvm/ADT/StringRef.h"
12 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h"
13 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h"
14 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h"
15 #include "Plugins/Process/Utility/RegisterContextDarwin_x86_64.h"
16 #include "lldb/Core/Debugger.h"
17 #include "lldb/Core/Module.h"
18 #include "lldb/Core/ModuleSpec.h"
19 #include "lldb/Core/PluginManager.h"
20 #include "lldb/Core/Progress.h"
21 #include "lldb/Core/Section.h"
22 #include "lldb/Host/Host.h"
23 #include "lldb/Symbol/DWARFCallFrameInfo.h"
24 #include "lldb/Symbol/ObjectFile.h"
25 #include "lldb/Target/DynamicLoader.h"
26 #include "lldb/Target/MemoryRegionInfo.h"
27 #include "lldb/Target/Platform.h"
28 #include "lldb/Target/Process.h"
29 #include "lldb/Target/SectionLoadList.h"
30 #include "lldb/Target/Target.h"
31 #include "lldb/Target/Thread.h"
32 #include "lldb/Target/ThreadList.h"
33 #include "lldb/Utility/ArchSpec.h"
34 #include "lldb/Utility/DataBuffer.h"
35 #include "lldb/Utility/FileSpec.h"
36 #include "lldb/Utility/FileSpecList.h"
37 #include "lldb/Utility/LLDBLog.h"
38 #include "lldb/Utility/Log.h"
39 #include "lldb/Utility/RangeMap.h"
40 #include "lldb/Utility/RegisterValue.h"
41 #include "lldb/Utility/Status.h"
42 #include "lldb/Utility/StreamString.h"
43 #include "lldb/Utility/Timer.h"
44 #include "lldb/Utility/UUID.h"
46 #include "lldb/Host/SafeMachO.h"
48 #include "llvm/ADT/DenseSet.h"
49 #include "llvm/Support/FormatVariadic.h"
50 #include "llvm/Support/MemoryBuffer.h"
52 #include "ObjectFileMachO.h"
54 #if defined(__APPLE__)
55 #include <TargetConditionals.h>
56 // GetLLDBSharedCacheUUID() needs to call dlsym()
57 #include <dlfcn.h>
58 #include <mach/mach_init.h>
59 #include <mach/vm_map.h>
60 #include <lldb/Host/SafeMachO.h>
61 #endif
63 #ifndef __APPLE__
64 #include "lldb/Utility/AppleUuidCompatibility.h"
65 #else
66 #include <uuid/uuid.h>
67 #endif
69 #include <bitset>
70 #include <memory>
71 #include <optional>
73 // Unfortunately the signpost header pulls in the system MachO header, too.
74 #ifdef CPU_TYPE_ARM
75 #undef CPU_TYPE_ARM
76 #endif
77 #ifdef CPU_TYPE_ARM64
78 #undef CPU_TYPE_ARM64
79 #endif
80 #ifdef CPU_TYPE_ARM64_32
81 #undef CPU_TYPE_ARM64_32
82 #endif
83 #ifdef CPU_TYPE_I386
84 #undef CPU_TYPE_I386
85 #endif
86 #ifdef CPU_TYPE_X86_64
87 #undef CPU_TYPE_X86_64
88 #endif
89 #ifdef MH_DYLINKER
90 #undef MH_DYLINKER
91 #endif
92 #ifdef MH_OBJECT
93 #undef MH_OBJECT
94 #endif
95 #ifdef LC_VERSION_MIN_MACOSX
96 #undef LC_VERSION_MIN_MACOSX
97 #endif
98 #ifdef LC_VERSION_MIN_IPHONEOS
99 #undef LC_VERSION_MIN_IPHONEOS
100 #endif
101 #ifdef LC_VERSION_MIN_TVOS
102 #undef LC_VERSION_MIN_TVOS
103 #endif
104 #ifdef LC_VERSION_MIN_WATCHOS
105 #undef LC_VERSION_MIN_WATCHOS
106 #endif
107 #ifdef LC_BUILD_VERSION
108 #undef LC_BUILD_VERSION
109 #endif
110 #ifdef PLATFORM_MACOS
111 #undef PLATFORM_MACOS
112 #endif
113 #ifdef PLATFORM_MACCATALYST
114 #undef PLATFORM_MACCATALYST
115 #endif
116 #ifdef PLATFORM_IOS
117 #undef PLATFORM_IOS
118 #endif
119 #ifdef PLATFORM_IOSSIMULATOR
120 #undef PLATFORM_IOSSIMULATOR
121 #endif
122 #ifdef PLATFORM_TVOS
123 #undef PLATFORM_TVOS
124 #endif
125 #ifdef PLATFORM_TVOSSIMULATOR
126 #undef PLATFORM_TVOSSIMULATOR
127 #endif
128 #ifdef PLATFORM_WATCHOS
129 #undef PLATFORM_WATCHOS
130 #endif
131 #ifdef PLATFORM_WATCHOSSIMULATOR
132 #undef PLATFORM_WATCHOSSIMULATOR
133 #endif
135 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
136 using namespace lldb;
137 using namespace lldb_private;
138 using namespace llvm::MachO;
140 static constexpr llvm::StringLiteral g_loader_path = "@loader_path";
141 static constexpr llvm::StringLiteral g_executable_path = "@executable_path";
143 LLDB_PLUGIN_DEFINE(ObjectFileMachO)
145 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
146 const char *alt_name, size_t reg_byte_size,
147 Stream &data) {
148 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
149 if (reg_info == nullptr)
150 reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
151 if (reg_info) {
152 lldb_private::RegisterValue reg_value;
153 if (reg_ctx->ReadRegister(reg_info, reg_value)) {
154 if (reg_info->byte_size >= reg_byte_size)
155 data.Write(reg_value.GetBytes(), reg_byte_size);
156 else {
157 data.Write(reg_value.GetBytes(), reg_info->byte_size);
158 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
159 data.PutChar(0);
161 return;
164 // Just write zeros if all else fails
165 for (size_t i = 0; i < reg_byte_size; ++i)
166 data.PutChar(0);
169 class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 {
170 public:
171 RegisterContextDarwin_x86_64_Mach(lldb_private::Thread &thread,
172 const DataExtractor &data)
173 : RegisterContextDarwin_x86_64(thread, 0) {
174 SetRegisterDataFrom_LC_THREAD(data);
177 void InvalidateAllRegisters() override {
178 // Do nothing... registers are always valid...
181 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
182 lldb::offset_t offset = 0;
183 SetError(GPRRegSet, Read, -1);
184 SetError(FPURegSet, Read, -1);
185 SetError(EXCRegSet, Read, -1);
186 bool done = false;
188 while (!done) {
189 int flavor = data.GetU32(&offset);
190 if (flavor == 0)
191 done = true;
192 else {
193 uint32_t i;
194 uint32_t count = data.GetU32(&offset);
195 switch (flavor) {
196 case GPRRegSet:
197 for (i = 0; i < count; ++i)
198 (&gpr.rax)[i] = data.GetU64(&offset);
199 SetError(GPRRegSet, Read, 0);
200 done = true;
202 break;
203 case FPURegSet:
204 // TODO: fill in FPU regs....
205 // SetError (FPURegSet, Read, -1);
206 done = true;
208 break;
209 case EXCRegSet:
210 exc.trapno = data.GetU32(&offset);
211 exc.err = data.GetU32(&offset);
212 exc.faultvaddr = data.GetU64(&offset);
213 SetError(EXCRegSet, Read, 0);
214 done = true;
215 break;
216 case 7:
217 case 8:
218 case 9:
219 // fancy flavors that encapsulate of the above flavors...
220 break;
222 default:
223 done = true;
224 break;
230 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
231 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
232 if (reg_ctx_sp) {
233 RegisterContext *reg_ctx = reg_ctx_sp.get();
235 data.PutHex32(GPRRegSet); // Flavor
236 data.PutHex32(GPRWordCount);
237 PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data);
238 PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data);
239 PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data);
240 PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data);
241 PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data);
242 PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data);
243 PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data);
244 PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data);
245 PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data);
246 PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data);
247 PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data);
248 PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data);
249 PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data);
250 PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data);
251 PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data);
252 PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data);
253 PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data);
254 PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data);
255 PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data);
256 PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data);
257 PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data);
259 // // Write out the FPU registers
260 // const size_t fpu_byte_size = sizeof(FPU);
261 // size_t bytes_written = 0;
262 // data.PutHex32 (FPURegSet);
263 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
264 // bytes_written += data.PutHex32(0); // uint32_t pad[0]
265 // bytes_written += data.PutHex32(0); // uint32_t pad[1]
266 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
267 // data); // uint16_t fcw; // "fctrl"
268 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
269 // data); // uint16_t fsw; // "fstat"
270 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
271 // data); // uint8_t ftw; // "ftag"
272 // bytes_written += data.PutHex8 (0); // uint8_t pad1;
273 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
274 // data); // uint16_t fop; // "fop"
275 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
276 // data); // uint32_t ip; // "fioff"
277 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
278 // data); // uint16_t cs; // "fiseg"
279 // bytes_written += data.PutHex16 (0); // uint16_t pad2;
280 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
281 // data); // uint32_t dp; // "fooff"
282 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
283 // data); // uint16_t ds; // "foseg"
284 // bytes_written += data.PutHex16 (0); // uint16_t pad3;
285 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
286 // data); // uint32_t mxcsr;
287 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
288 // 4, data);// uint32_t mxcsrmask;
289 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
290 // sizeof(MMSReg), data);
291 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
292 // sizeof(MMSReg), data);
293 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
294 // sizeof(MMSReg), data);
295 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
296 // sizeof(MMSReg), data);
297 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
298 // sizeof(MMSReg), data);
299 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
300 // sizeof(MMSReg), data);
301 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
302 // sizeof(MMSReg), data);
303 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
304 // sizeof(MMSReg), data);
305 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
306 // sizeof(XMMReg), data);
307 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
308 // sizeof(XMMReg), data);
309 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
310 // sizeof(XMMReg), data);
311 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
312 // sizeof(XMMReg), data);
313 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
314 // sizeof(XMMReg), data);
315 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
316 // sizeof(XMMReg), data);
317 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
318 // sizeof(XMMReg), data);
319 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
320 // sizeof(XMMReg), data);
321 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
322 // sizeof(XMMReg), data);
323 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
324 // sizeof(XMMReg), data);
325 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
326 // sizeof(XMMReg), data);
327 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
328 // sizeof(XMMReg), data);
329 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
330 // sizeof(XMMReg), data);
331 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
332 // sizeof(XMMReg), data);
333 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
334 // sizeof(XMMReg), data);
335 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
336 // sizeof(XMMReg), data);
338 // // Fill rest with zeros
339 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
340 // i)
341 // data.PutChar(0);
343 // Write out the EXC registers
344 data.PutHex32(EXCRegSet);
345 data.PutHex32(EXCWordCount);
346 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
347 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
348 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data);
349 return true;
351 return false;
354 protected:
355 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
357 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
359 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
361 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
362 return 0;
365 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
366 return 0;
369 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
370 return 0;
374 class RegisterContextDarwin_i386_Mach : public RegisterContextDarwin_i386 {
375 public:
376 RegisterContextDarwin_i386_Mach(lldb_private::Thread &thread,
377 const DataExtractor &data)
378 : RegisterContextDarwin_i386(thread, 0) {
379 SetRegisterDataFrom_LC_THREAD(data);
382 void InvalidateAllRegisters() override {
383 // Do nothing... registers are always valid...
386 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
387 lldb::offset_t offset = 0;
388 SetError(GPRRegSet, Read, -1);
389 SetError(FPURegSet, Read, -1);
390 SetError(EXCRegSet, Read, -1);
391 bool done = false;
393 while (!done) {
394 int flavor = data.GetU32(&offset);
395 if (flavor == 0)
396 done = true;
397 else {
398 uint32_t i;
399 uint32_t count = data.GetU32(&offset);
400 switch (flavor) {
401 case GPRRegSet:
402 for (i = 0; i < count; ++i)
403 (&gpr.eax)[i] = data.GetU32(&offset);
404 SetError(GPRRegSet, Read, 0);
405 done = true;
407 break;
408 case FPURegSet:
409 // TODO: fill in FPU regs....
410 // SetError (FPURegSet, Read, -1);
411 done = true;
413 break;
414 case EXCRegSet:
415 exc.trapno = data.GetU32(&offset);
416 exc.err = data.GetU32(&offset);
417 exc.faultvaddr = data.GetU32(&offset);
418 SetError(EXCRegSet, Read, 0);
419 done = true;
420 break;
421 case 7:
422 case 8:
423 case 9:
424 // fancy flavors that encapsulate of the above flavors...
425 break;
427 default:
428 done = true;
429 break;
435 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
436 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
437 if (reg_ctx_sp) {
438 RegisterContext *reg_ctx = reg_ctx_sp.get();
440 data.PutHex32(GPRRegSet); // Flavor
441 data.PutHex32(GPRWordCount);
442 PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data);
443 PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data);
444 PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data);
445 PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data);
446 PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data);
447 PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data);
448 PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data);
449 PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data);
450 PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data);
451 PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data);
452 PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data);
453 PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data);
454 PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data);
455 PrintRegisterValue(reg_ctx, "es", nullptr, 4, data);
456 PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data);
457 PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data);
459 // Write out the EXC registers
460 data.PutHex32(EXCRegSet);
461 data.PutHex32(EXCWordCount);
462 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
463 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
464 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data);
465 return true;
467 return false;
470 protected:
471 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
473 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
475 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
477 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
478 return 0;
481 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
482 return 0;
485 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
486 return 0;
490 class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm {
491 public:
492 RegisterContextDarwin_arm_Mach(lldb_private::Thread &thread,
493 const DataExtractor &data)
494 : RegisterContextDarwin_arm(thread, 0) {
495 SetRegisterDataFrom_LC_THREAD(data);
498 void InvalidateAllRegisters() override {
499 // Do nothing... registers are always valid...
502 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
503 lldb::offset_t offset = 0;
504 SetError(GPRRegSet, Read, -1);
505 SetError(FPURegSet, Read, -1);
506 SetError(EXCRegSet, Read, -1);
507 bool done = false;
509 while (!done) {
510 int flavor = data.GetU32(&offset);
511 uint32_t count = data.GetU32(&offset);
512 lldb::offset_t next_thread_state = offset + (count * 4);
513 switch (flavor) {
514 case GPRAltRegSet:
515 case GPRRegSet: {
516 // r0-r15, plus CPSR
517 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1;
518 if (count == gpr_buf_count) {
519 for (uint32_t i = 0; i < (count - 1); ++i) {
520 gpr.r[i] = data.GetU32(&offset);
522 gpr.cpsr = data.GetU32(&offset);
524 SetError(GPRRegSet, Read, 0);
527 offset = next_thread_state;
528 break;
530 case FPURegSet: {
531 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats;
532 const int fpu_reg_buf_size = sizeof(fpu.floats);
533 if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
534 fpu_reg_buf) == fpu_reg_buf_size) {
535 offset += fpu_reg_buf_size;
536 fpu.fpscr = data.GetU32(&offset);
537 SetError(FPURegSet, Read, 0);
538 } else {
539 done = true;
542 offset = next_thread_state;
543 break;
545 case EXCRegSet:
546 if (count == 3) {
547 exc.exception = data.GetU32(&offset);
548 exc.fsr = data.GetU32(&offset);
549 exc.far = data.GetU32(&offset);
550 SetError(EXCRegSet, Read, 0);
552 done = true;
553 offset = next_thread_state;
554 break;
556 // Unknown register set flavor, stop trying to parse.
557 default:
558 done = true;
563 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
564 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
565 if (reg_ctx_sp) {
566 RegisterContext *reg_ctx = reg_ctx_sp.get();
568 data.PutHex32(GPRRegSet); // Flavor
569 data.PutHex32(GPRWordCount);
570 PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data);
571 PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data);
572 PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data);
573 PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data);
574 PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data);
575 PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data);
576 PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data);
577 PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data);
578 PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data);
579 PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data);
580 PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data);
581 PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data);
582 PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data);
583 PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data);
584 PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data);
585 PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data);
586 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
588 // Write out the EXC registers
589 // data.PutHex32 (EXCRegSet);
590 // data.PutHex32 (EXCWordCount);
591 // WriteRegister (reg_ctx, "exception", NULL, 4, data);
592 // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
593 // WriteRegister (reg_ctx, "far", NULL, 4, data);
594 return true;
596 return false;
599 protected:
600 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
602 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
604 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
606 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
608 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
609 return 0;
612 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
613 return 0;
616 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
617 return 0;
620 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
621 return -1;
625 class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 {
626 public:
627 RegisterContextDarwin_arm64_Mach(lldb_private::Thread &thread,
628 const DataExtractor &data)
629 : RegisterContextDarwin_arm64(thread, 0) {
630 SetRegisterDataFrom_LC_THREAD(data);
633 void InvalidateAllRegisters() override {
634 // Do nothing... registers are always valid...
637 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
638 lldb::offset_t offset = 0;
639 SetError(GPRRegSet, Read, -1);
640 SetError(FPURegSet, Read, -1);
641 SetError(EXCRegSet, Read, -1);
642 bool done = false;
643 while (!done) {
644 int flavor = data.GetU32(&offset);
645 uint32_t count = data.GetU32(&offset);
646 lldb::offset_t next_thread_state = offset + (count * 4);
647 switch (flavor) {
648 case GPRRegSet:
649 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
650 // 32-bit register)
651 if (count >= (33 * 2) + 1) {
652 for (uint32_t i = 0; i < 29; ++i)
653 gpr.x[i] = data.GetU64(&offset);
654 gpr.fp = data.GetU64(&offset);
655 gpr.lr = data.GetU64(&offset);
656 gpr.sp = data.GetU64(&offset);
657 gpr.pc = data.GetU64(&offset);
658 gpr.cpsr = data.GetU32(&offset);
659 SetError(GPRRegSet, Read, 0);
661 offset = next_thread_state;
662 break;
663 case FPURegSet: {
664 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
665 const int fpu_reg_buf_size = sizeof(fpu);
666 if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
667 data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
668 fpu_reg_buf) == fpu_reg_buf_size) {
669 SetError(FPURegSet, Read, 0);
670 } else {
671 done = true;
674 offset = next_thread_state;
675 break;
676 case EXCRegSet:
677 if (count == 4) {
678 exc.far = data.GetU64(&offset);
679 exc.esr = data.GetU32(&offset);
680 exc.exception = data.GetU32(&offset);
681 SetError(EXCRegSet, Read, 0);
683 offset = next_thread_state;
684 break;
685 default:
686 done = true;
687 break;
692 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
693 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
694 if (reg_ctx_sp) {
695 RegisterContext *reg_ctx = reg_ctx_sp.get();
697 data.PutHex32(GPRRegSet); // Flavor
698 data.PutHex32(GPRWordCount);
699 PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data);
700 PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data);
701 PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data);
702 PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data);
703 PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data);
704 PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data);
705 PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data);
706 PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data);
707 PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data);
708 PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data);
709 PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data);
710 PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data);
711 PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data);
712 PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data);
713 PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data);
714 PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data);
715 PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data);
716 PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data);
717 PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data);
718 PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data);
719 PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data);
720 PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data);
721 PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data);
722 PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data);
723 PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data);
724 PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data);
725 PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data);
726 PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data);
727 PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data);
728 PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data);
729 PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data);
730 PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data);
731 PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data);
732 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
733 data.PutHex32(0); // uint32_t pad at the end
735 // Write out the EXC registers
736 data.PutHex32(EXCRegSet);
737 data.PutHex32(EXCWordCount);
738 PrintRegisterValue(reg_ctx, "far", nullptr, 8, data);
739 PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data);
740 PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data);
741 return true;
743 return false;
746 protected:
747 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
749 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
751 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
753 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
755 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
756 return 0;
759 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
760 return 0;
763 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
764 return 0;
767 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
768 return -1;
772 static uint32_t MachHeaderSizeFromMagic(uint32_t magic) {
773 switch (magic) {
774 case MH_MAGIC:
775 case MH_CIGAM:
776 return sizeof(struct llvm::MachO::mach_header);
778 case MH_MAGIC_64:
779 case MH_CIGAM_64:
780 return sizeof(struct llvm::MachO::mach_header_64);
781 break;
783 default:
784 break;
786 return 0;
789 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
791 char ObjectFileMachO::ID;
793 void ObjectFileMachO::Initialize() {
794 PluginManager::RegisterPlugin(
795 GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance,
796 CreateMemoryInstance, GetModuleSpecifications, SaveCore);
799 void ObjectFileMachO::Terminate() {
800 PluginManager::UnregisterPlugin(CreateInstance);
803 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
804 DataBufferSP data_sp,
805 lldb::offset_t data_offset,
806 const FileSpec *file,
807 lldb::offset_t file_offset,
808 lldb::offset_t length) {
809 if (!data_sp) {
810 data_sp = MapFileData(*file, length, file_offset);
811 if (!data_sp)
812 return nullptr;
813 data_offset = 0;
816 if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length))
817 return nullptr;
819 // Update the data to contain the entire file if it doesn't already
820 if (data_sp->GetByteSize() < length) {
821 data_sp = MapFileData(*file, length, file_offset);
822 if (!data_sp)
823 return nullptr;
824 data_offset = 0;
826 auto objfile_up = std::make_unique<ObjectFileMachO>(
827 module_sp, data_sp, data_offset, file, file_offset, length);
828 if (!objfile_up || !objfile_up->ParseHeader())
829 return nullptr;
831 return objfile_up.release();
834 ObjectFile *ObjectFileMachO::CreateMemoryInstance(
835 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp,
836 const ProcessSP &process_sp, lldb::addr_t header_addr) {
837 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
838 std::unique_ptr<ObjectFile> objfile_up(
839 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
840 if (objfile_up.get() && objfile_up->ParseHeader())
841 return objfile_up.release();
843 return nullptr;
846 size_t ObjectFileMachO::GetModuleSpecifications(
847 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
848 lldb::offset_t data_offset, lldb::offset_t file_offset,
849 lldb::offset_t length, lldb_private::ModuleSpecList &specs) {
850 const size_t initial_count = specs.GetSize();
852 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
853 DataExtractor data;
854 data.SetData(data_sp);
855 llvm::MachO::mach_header header;
856 if (ParseHeader(data, &data_offset, header)) {
857 size_t header_and_load_cmds =
858 header.sizeofcmds + MachHeaderSizeFromMagic(header.magic);
859 if (header_and_load_cmds >= data_sp->GetByteSize()) {
860 data_sp = MapFileData(file, header_and_load_cmds, file_offset);
861 data.SetData(data_sp);
862 data_offset = MachHeaderSizeFromMagic(header.magic);
864 if (data_sp) {
865 ModuleSpec base_spec;
866 base_spec.GetFileSpec() = file;
867 base_spec.SetObjectOffset(file_offset);
868 base_spec.SetObjectSize(length);
869 GetAllArchSpecs(header, data, data_offset, base_spec, specs);
873 return specs.GetSize() - initial_count;
876 ConstString ObjectFileMachO::GetSegmentNameTEXT() {
877 static ConstString g_segment_name_TEXT("__TEXT");
878 return g_segment_name_TEXT;
881 ConstString ObjectFileMachO::GetSegmentNameDATA() {
882 static ConstString g_segment_name_DATA("__DATA");
883 return g_segment_name_DATA;
886 ConstString ObjectFileMachO::GetSegmentNameDATA_DIRTY() {
887 static ConstString g_segment_name("__DATA_DIRTY");
888 return g_segment_name;
891 ConstString ObjectFileMachO::GetSegmentNameDATA_CONST() {
892 static ConstString g_segment_name("__DATA_CONST");
893 return g_segment_name;
896 ConstString ObjectFileMachO::GetSegmentNameOBJC() {
897 static ConstString g_segment_name_OBJC("__OBJC");
898 return g_segment_name_OBJC;
901 ConstString ObjectFileMachO::GetSegmentNameLINKEDIT() {
902 static ConstString g_section_name_LINKEDIT("__LINKEDIT");
903 return g_section_name_LINKEDIT;
906 ConstString ObjectFileMachO::GetSegmentNameDWARF() {
907 static ConstString g_section_name("__DWARF");
908 return g_section_name;
911 ConstString ObjectFileMachO::GetSegmentNameLLVM_COV() {
912 static ConstString g_section_name("__LLVM_COV");
913 return g_section_name;
916 ConstString ObjectFileMachO::GetSectionNameEHFrame() {
917 static ConstString g_section_name_eh_frame("__eh_frame");
918 return g_section_name_eh_frame;
921 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp,
922 lldb::addr_t data_offset,
923 lldb::addr_t data_length) {
924 DataExtractor data;
925 data.SetData(data_sp, data_offset, data_length);
926 lldb::offset_t offset = 0;
927 uint32_t magic = data.GetU32(&offset);
929 offset += 4; // cputype
930 offset += 4; // cpusubtype
931 uint32_t filetype = data.GetU32(&offset);
933 // A fileset has a Mach-O header but is not an
934 // individual file and must be handled via an
935 // ObjectContainer plugin.
936 if (filetype == llvm::MachO::MH_FILESET)
937 return false;
939 return MachHeaderSizeFromMagic(magic) != 0;
942 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
943 DataBufferSP data_sp,
944 lldb::offset_t data_offset,
945 const FileSpec *file,
946 lldb::offset_t file_offset,
947 lldb::offset_t length)
948 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
949 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
950 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
951 m_allow_assembly_emulation_unwind_plans(true) {
952 ::memset(&m_header, 0, sizeof(m_header));
953 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
956 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
957 lldb::WritableDataBufferSP header_data_sp,
958 const lldb::ProcessSP &process_sp,
959 lldb::addr_t header_addr)
960 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
961 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
962 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
963 m_allow_assembly_emulation_unwind_plans(true) {
964 ::memset(&m_header, 0, sizeof(m_header));
965 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
968 bool ObjectFileMachO::ParseHeader(DataExtractor &data,
969 lldb::offset_t *data_offset_ptr,
970 llvm::MachO::mach_header &header) {
971 data.SetByteOrder(endian::InlHostByteOrder());
972 // Leave magic in the original byte order
973 header.magic = data.GetU32(data_offset_ptr);
974 bool can_parse = false;
975 bool is_64_bit = false;
976 switch (header.magic) {
977 case MH_MAGIC:
978 data.SetByteOrder(endian::InlHostByteOrder());
979 data.SetAddressByteSize(4);
980 can_parse = true;
981 break;
983 case MH_MAGIC_64:
984 data.SetByteOrder(endian::InlHostByteOrder());
985 data.SetAddressByteSize(8);
986 can_parse = true;
987 is_64_bit = true;
988 break;
990 case MH_CIGAM:
991 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
992 ? eByteOrderLittle
993 : eByteOrderBig);
994 data.SetAddressByteSize(4);
995 can_parse = true;
996 break;
998 case MH_CIGAM_64:
999 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1000 ? eByteOrderLittle
1001 : eByteOrderBig);
1002 data.SetAddressByteSize(8);
1003 is_64_bit = true;
1004 can_parse = true;
1005 break;
1007 default:
1008 break;
1011 if (can_parse) {
1012 data.GetU32(data_offset_ptr, &header.cputype, 6);
1013 if (is_64_bit)
1014 *data_offset_ptr += 4;
1015 return true;
1016 } else {
1017 memset(&header, 0, sizeof(header));
1019 return false;
1022 bool ObjectFileMachO::ParseHeader() {
1023 ModuleSP module_sp(GetModule());
1024 if (!module_sp)
1025 return false;
1027 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1028 bool can_parse = false;
1029 lldb::offset_t offset = 0;
1030 m_data.SetByteOrder(endian::InlHostByteOrder());
1031 // Leave magic in the original byte order
1032 m_header.magic = m_data.GetU32(&offset);
1033 switch (m_header.magic) {
1034 case MH_MAGIC:
1035 m_data.SetByteOrder(endian::InlHostByteOrder());
1036 m_data.SetAddressByteSize(4);
1037 can_parse = true;
1038 break;
1040 case MH_MAGIC_64:
1041 m_data.SetByteOrder(endian::InlHostByteOrder());
1042 m_data.SetAddressByteSize(8);
1043 can_parse = true;
1044 break;
1046 case MH_CIGAM:
1047 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1048 ? eByteOrderLittle
1049 : eByteOrderBig);
1050 m_data.SetAddressByteSize(4);
1051 can_parse = true;
1052 break;
1054 case MH_CIGAM_64:
1055 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1056 ? eByteOrderLittle
1057 : eByteOrderBig);
1058 m_data.SetAddressByteSize(8);
1059 can_parse = true;
1060 break;
1062 default:
1063 break;
1066 if (can_parse) {
1067 m_data.GetU32(&offset, &m_header.cputype, 6);
1069 ModuleSpecList all_specs;
1070 ModuleSpec base_spec;
1071 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic),
1072 base_spec, all_specs);
1074 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1075 ArchSpec mach_arch =
1076 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture();
1078 // Check if the module has a required architecture
1079 const ArchSpec &module_arch = module_sp->GetArchitecture();
1080 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch))
1081 continue;
1083 if (SetModulesArchitecture(mach_arch)) {
1084 const size_t header_and_lc_size =
1085 m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic);
1086 if (m_data.GetByteSize() < header_and_lc_size) {
1087 DataBufferSP data_sp;
1088 ProcessSP process_sp(m_process_wp.lock());
1089 if (process_sp) {
1090 data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size);
1091 } else {
1092 // Read in all only the load command data from the file on disk
1093 data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset);
1094 if (data_sp->GetByteSize() != header_and_lc_size)
1095 continue;
1097 if (data_sp)
1098 m_data.SetData(data_sp);
1101 return true;
1103 // None found.
1104 return false;
1105 } else {
1106 memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header));
1108 return false;
1111 ByteOrder ObjectFileMachO::GetByteOrder() const {
1112 return m_data.GetByteOrder();
1115 bool ObjectFileMachO::IsExecutable() const {
1116 return m_header.filetype == MH_EXECUTE;
1119 bool ObjectFileMachO::IsDynamicLoader() const {
1120 return m_header.filetype == MH_DYLINKER;
1123 bool ObjectFileMachO::IsSharedCacheBinary() const {
1124 return m_header.flags & MH_DYLIB_IN_CACHE;
1127 bool ObjectFileMachO::IsKext() const {
1128 return m_header.filetype == MH_KEXT_BUNDLE;
1131 uint32_t ObjectFileMachO::GetAddressByteSize() const {
1132 return m_data.GetAddressByteSize();
1135 AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) {
1136 Symtab *symtab = GetSymtab();
1137 if (!symtab)
1138 return AddressClass::eUnknown;
1140 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1141 if (symbol) {
1142 if (symbol->ValueIsAddress()) {
1143 SectionSP section_sp(symbol->GetAddressRef().GetSection());
1144 if (section_sp) {
1145 const lldb::SectionType section_type = section_sp->GetType();
1146 switch (section_type) {
1147 case eSectionTypeInvalid:
1148 return AddressClass::eUnknown;
1150 case eSectionTypeCode:
1151 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1152 // For ARM we have a bit in the n_desc field of the symbol that
1153 // tells us ARM/Thumb which is bit 0x0008.
1154 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1155 return AddressClass::eCodeAlternateISA;
1157 return AddressClass::eCode;
1159 case eSectionTypeContainer:
1160 return AddressClass::eUnknown;
1162 case eSectionTypeData:
1163 case eSectionTypeDataCString:
1164 case eSectionTypeDataCStringPointers:
1165 case eSectionTypeDataSymbolAddress:
1166 case eSectionTypeData4:
1167 case eSectionTypeData8:
1168 case eSectionTypeData16:
1169 case eSectionTypeDataPointers:
1170 case eSectionTypeZeroFill:
1171 case eSectionTypeDataObjCMessageRefs:
1172 case eSectionTypeDataObjCCFStrings:
1173 case eSectionTypeGoSymtab:
1174 return AddressClass::eData;
1176 case eSectionTypeDebug:
1177 case eSectionTypeDWARFDebugAbbrev:
1178 case eSectionTypeDWARFDebugAbbrevDwo:
1179 case eSectionTypeDWARFDebugAddr:
1180 case eSectionTypeDWARFDebugAranges:
1181 case eSectionTypeDWARFDebugCuIndex:
1182 case eSectionTypeDWARFDebugFrame:
1183 case eSectionTypeDWARFDebugInfo:
1184 case eSectionTypeDWARFDebugInfoDwo:
1185 case eSectionTypeDWARFDebugLine:
1186 case eSectionTypeDWARFDebugLineStr:
1187 case eSectionTypeDWARFDebugLoc:
1188 case eSectionTypeDWARFDebugLocDwo:
1189 case eSectionTypeDWARFDebugLocLists:
1190 case eSectionTypeDWARFDebugLocListsDwo:
1191 case eSectionTypeDWARFDebugMacInfo:
1192 case eSectionTypeDWARFDebugMacro:
1193 case eSectionTypeDWARFDebugNames:
1194 case eSectionTypeDWARFDebugPubNames:
1195 case eSectionTypeDWARFDebugPubTypes:
1196 case eSectionTypeDWARFDebugRanges:
1197 case eSectionTypeDWARFDebugRngLists:
1198 case eSectionTypeDWARFDebugRngListsDwo:
1199 case eSectionTypeDWARFDebugStr:
1200 case eSectionTypeDWARFDebugStrDwo:
1201 case eSectionTypeDWARFDebugStrOffsets:
1202 case eSectionTypeDWARFDebugStrOffsetsDwo:
1203 case eSectionTypeDWARFDebugTuIndex:
1204 case eSectionTypeDWARFDebugTypes:
1205 case eSectionTypeDWARFDebugTypesDwo:
1206 case eSectionTypeDWARFAppleNames:
1207 case eSectionTypeDWARFAppleTypes:
1208 case eSectionTypeDWARFAppleNamespaces:
1209 case eSectionTypeDWARFAppleObjC:
1210 case eSectionTypeDWARFGNUDebugAltLink:
1211 case eSectionTypeCTF:
1212 case eSectionTypeSwiftModules:
1213 return AddressClass::eDebug;
1215 case eSectionTypeEHFrame:
1216 case eSectionTypeARMexidx:
1217 case eSectionTypeARMextab:
1218 case eSectionTypeCompactUnwind:
1219 return AddressClass::eRuntime;
1221 case eSectionTypeAbsoluteAddress:
1222 case eSectionTypeELFSymbolTable:
1223 case eSectionTypeELFDynamicSymbols:
1224 case eSectionTypeELFRelocationEntries:
1225 case eSectionTypeELFDynamicLinkInfo:
1226 case eSectionTypeOther:
1227 return AddressClass::eUnknown;
1232 const SymbolType symbol_type = symbol->GetType();
1233 switch (symbol_type) {
1234 case eSymbolTypeAny:
1235 return AddressClass::eUnknown;
1236 case eSymbolTypeAbsolute:
1237 return AddressClass::eUnknown;
1239 case eSymbolTypeCode:
1240 case eSymbolTypeTrampoline:
1241 case eSymbolTypeResolver:
1242 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1243 // For ARM we have a bit in the n_desc field of the symbol that tells
1244 // us ARM/Thumb which is bit 0x0008.
1245 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1246 return AddressClass::eCodeAlternateISA;
1248 return AddressClass::eCode;
1250 case eSymbolTypeData:
1251 return AddressClass::eData;
1252 case eSymbolTypeRuntime:
1253 return AddressClass::eRuntime;
1254 case eSymbolTypeException:
1255 return AddressClass::eRuntime;
1256 case eSymbolTypeSourceFile:
1257 return AddressClass::eDebug;
1258 case eSymbolTypeHeaderFile:
1259 return AddressClass::eDebug;
1260 case eSymbolTypeObjectFile:
1261 return AddressClass::eDebug;
1262 case eSymbolTypeCommonBlock:
1263 return AddressClass::eDebug;
1264 case eSymbolTypeBlock:
1265 return AddressClass::eDebug;
1266 case eSymbolTypeLocal:
1267 return AddressClass::eData;
1268 case eSymbolTypeParam:
1269 return AddressClass::eData;
1270 case eSymbolTypeVariable:
1271 return AddressClass::eData;
1272 case eSymbolTypeVariableType:
1273 return AddressClass::eDebug;
1274 case eSymbolTypeLineEntry:
1275 return AddressClass::eDebug;
1276 case eSymbolTypeLineHeader:
1277 return AddressClass::eDebug;
1278 case eSymbolTypeScopeBegin:
1279 return AddressClass::eDebug;
1280 case eSymbolTypeScopeEnd:
1281 return AddressClass::eDebug;
1282 case eSymbolTypeAdditional:
1283 return AddressClass::eUnknown;
1284 case eSymbolTypeCompiler:
1285 return AddressClass::eDebug;
1286 case eSymbolTypeInstrumentation:
1287 return AddressClass::eDebug;
1288 case eSymbolTypeUndefined:
1289 return AddressClass::eUnknown;
1290 case eSymbolTypeObjCClass:
1291 return AddressClass::eRuntime;
1292 case eSymbolTypeObjCMetaClass:
1293 return AddressClass::eRuntime;
1294 case eSymbolTypeObjCIVar:
1295 return AddressClass::eRuntime;
1296 case eSymbolTypeReExported:
1297 return AddressClass::eRuntime;
1300 return AddressClass::eUnknown;
1303 bool ObjectFileMachO::IsStripped() {
1304 if (m_dysymtab.cmd == 0) {
1305 ModuleSP module_sp(GetModule());
1306 if (module_sp) {
1307 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
1308 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1309 const lldb::offset_t load_cmd_offset = offset;
1311 llvm::MachO::load_command lc = {};
1312 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
1313 break;
1314 if (lc.cmd == LC_DYSYMTAB) {
1315 m_dysymtab.cmd = lc.cmd;
1316 m_dysymtab.cmdsize = lc.cmdsize;
1317 if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1318 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1319 nullptr) {
1320 // Clear m_dysymtab if we were unable to read all items from the
1321 // load command
1322 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
1325 offset = load_cmd_offset + lc.cmdsize;
1329 if (m_dysymtab.cmd)
1330 return m_dysymtab.nlocalsym <= 1;
1331 return false;
1334 ObjectFileMachO::EncryptedFileRanges ObjectFileMachO::GetEncryptedFileRanges() {
1335 EncryptedFileRanges result;
1336 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
1338 llvm::MachO::encryption_info_command encryption_cmd;
1339 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1340 const lldb::offset_t load_cmd_offset = offset;
1341 if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
1342 break;
1344 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1345 // 3 fields we care about, so treat them the same.
1346 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1347 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1348 if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) {
1349 if (encryption_cmd.cryptid != 0) {
1350 EncryptedFileRanges::Entry entry;
1351 entry.SetRangeBase(encryption_cmd.cryptoff);
1352 entry.SetByteSize(encryption_cmd.cryptsize);
1353 result.Append(entry);
1357 offset = load_cmd_offset + encryption_cmd.cmdsize;
1360 return result;
1363 void ObjectFileMachO::SanitizeSegmentCommand(
1364 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1365 if (m_length == 0 || seg_cmd.filesize == 0)
1366 return;
1368 if (IsSharedCacheBinary() && !IsInMemory()) {
1369 // In shared cache images, the load commands are relative to the
1370 // shared cache file, and not the specific image we are
1371 // examining. Let's fix this up so that it looks like a normal
1372 // image.
1373 if (strncmp(seg_cmd.segname, GetSegmentNameTEXT().GetCString(),
1374 sizeof(seg_cmd.segname)) == 0)
1375 m_text_address = seg_cmd.vmaddr;
1376 if (strncmp(seg_cmd.segname, GetSegmentNameLINKEDIT().GetCString(),
1377 sizeof(seg_cmd.segname)) == 0)
1378 m_linkedit_original_offset = seg_cmd.fileoff;
1380 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1383 if (seg_cmd.fileoff > m_length) {
1384 // We have a load command that says it extends past the end of the file.
1385 // This is likely a corrupt file. We don't have any way to return an error
1386 // condition here (this method was likely invoked from something like
1387 // ObjectFile::GetSectionList()), so we just null out the section contents,
1388 // and dump a message to stdout. The most common case here is core file
1389 // debugging with a truncated file.
1390 const char *lc_segment_name =
1391 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1392 GetModule()->ReportWarning(
1393 "load command {0} {1} has a fileoff ({2:x16}) that extends beyond "
1394 "the end of the file ({3:x16}), ignoring this section",
1395 cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length);
1397 seg_cmd.fileoff = 0;
1398 seg_cmd.filesize = 0;
1401 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1402 // We have a load command that says it extends past the end of the file.
1403 // This is likely a corrupt file. We don't have any way to return an error
1404 // condition here (this method was likely invoked from something like
1405 // ObjectFile::GetSectionList()), so we just null out the section contents,
1406 // and dump a message to stdout. The most common case here is core file
1407 // debugging with a truncated file.
1408 const char *lc_segment_name =
1409 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1410 GetModule()->ReportWarning(
1411 "load command {0} {1} has a fileoff + filesize ({2:x16}) that "
1412 "extends beyond the end of the file ({3:x16}), the segment will be "
1413 "truncated to match",
1414 cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length);
1416 // Truncate the length
1417 seg_cmd.filesize = m_length - seg_cmd.fileoff;
1421 static uint32_t
1422 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1423 uint32_t result = 0;
1424 if (seg_cmd.initprot & VM_PROT_READ)
1425 result |= ePermissionsReadable;
1426 if (seg_cmd.initprot & VM_PROT_WRITE)
1427 result |= ePermissionsWritable;
1428 if (seg_cmd.initprot & VM_PROT_EXECUTE)
1429 result |= ePermissionsExecutable;
1430 return result;
1433 static lldb::SectionType GetSectionType(uint32_t flags,
1434 ConstString section_name) {
1436 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1437 return eSectionTypeCode;
1439 uint32_t mach_sect_type = flags & SECTION_TYPE;
1440 static ConstString g_sect_name_objc_data("__objc_data");
1441 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1442 static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1443 static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1444 static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1445 static ConstString g_sect_name_objc_const("__objc_const");
1446 static ConstString g_sect_name_objc_classlist("__objc_classlist");
1447 static ConstString g_sect_name_cfstring("__cfstring");
1449 static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev");
1450 static ConstString g_sect_name_dwarf_debug_abbrev_dwo("__debug_abbrev.dwo");
1451 static ConstString g_sect_name_dwarf_debug_addr("__debug_addr");
1452 static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges");
1453 static ConstString g_sect_name_dwarf_debug_cu_index("__debug_cu_index");
1454 static ConstString g_sect_name_dwarf_debug_frame("__debug_frame");
1455 static ConstString g_sect_name_dwarf_debug_info("__debug_info");
1456 static ConstString g_sect_name_dwarf_debug_info_dwo("__debug_info.dwo");
1457 static ConstString g_sect_name_dwarf_debug_line("__debug_line");
1458 static ConstString g_sect_name_dwarf_debug_line_dwo("__debug_line.dwo");
1459 static ConstString g_sect_name_dwarf_debug_line_str("__debug_line_str");
1460 static ConstString g_sect_name_dwarf_debug_loc("__debug_loc");
1461 static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists");
1462 static ConstString g_sect_name_dwarf_debug_loclists_dwo("__debug_loclists.dwo");
1463 static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo");
1464 static ConstString g_sect_name_dwarf_debug_macro("__debug_macro");
1465 static ConstString g_sect_name_dwarf_debug_macro_dwo("__debug_macro.dwo");
1466 static ConstString g_sect_name_dwarf_debug_names("__debug_names");
1467 static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames");
1468 static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes");
1469 static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges");
1470 static ConstString g_sect_name_dwarf_debug_rnglists("__debug_rnglists");
1471 static ConstString g_sect_name_dwarf_debug_str("__debug_str");
1472 static ConstString g_sect_name_dwarf_debug_str_dwo("__debug_str.dwo");
1473 static ConstString g_sect_name_dwarf_debug_str_offs("__debug_str_offs");
1474 static ConstString g_sect_name_dwarf_debug_str_offs_dwo("__debug_str_offs.dwo");
1475 static ConstString g_sect_name_dwarf_debug_tu_index("__debug_tu_index");
1476 static ConstString g_sect_name_dwarf_debug_types("__debug_types");
1477 static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1478 static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1479 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1480 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1481 static ConstString g_sect_name_eh_frame("__eh_frame");
1482 static ConstString g_sect_name_compact_unwind("__unwind_info");
1483 static ConstString g_sect_name_text("__text");
1484 static ConstString g_sect_name_data("__data");
1485 static ConstString g_sect_name_go_symtab("__gosymtab");
1486 static ConstString g_sect_name_ctf("__ctf");
1487 static ConstString g_sect_name_swift_ast("__swift_ast");
1489 if (section_name == g_sect_name_dwarf_debug_abbrev)
1490 return eSectionTypeDWARFDebugAbbrev;
1491 if (section_name == g_sect_name_dwarf_debug_abbrev_dwo)
1492 return eSectionTypeDWARFDebugAbbrevDwo;
1493 if (section_name == g_sect_name_dwarf_debug_addr)
1494 return eSectionTypeDWARFDebugAddr;
1495 if (section_name == g_sect_name_dwarf_debug_aranges)
1496 return eSectionTypeDWARFDebugAranges;
1497 if (section_name == g_sect_name_dwarf_debug_cu_index)
1498 return eSectionTypeDWARFDebugCuIndex;
1499 if (section_name == g_sect_name_dwarf_debug_frame)
1500 return eSectionTypeDWARFDebugFrame;
1501 if (section_name == g_sect_name_dwarf_debug_info)
1502 return eSectionTypeDWARFDebugInfo;
1503 if (section_name == g_sect_name_dwarf_debug_info_dwo)
1504 return eSectionTypeDWARFDebugInfoDwo;
1505 if (section_name == g_sect_name_dwarf_debug_line)
1506 return eSectionTypeDWARFDebugLine;
1507 if (section_name == g_sect_name_dwarf_debug_line_dwo)
1508 return eSectionTypeDWARFDebugLine; // Same as debug_line.
1509 if (section_name == g_sect_name_dwarf_debug_line_str)
1510 return eSectionTypeDWARFDebugLineStr;
1511 if (section_name == g_sect_name_dwarf_debug_loc)
1512 return eSectionTypeDWARFDebugLoc;
1513 if (section_name == g_sect_name_dwarf_debug_loclists)
1514 return eSectionTypeDWARFDebugLocLists;
1515 if (section_name == g_sect_name_dwarf_debug_loclists_dwo)
1516 return eSectionTypeDWARFDebugLocListsDwo;
1517 if (section_name == g_sect_name_dwarf_debug_macinfo)
1518 return eSectionTypeDWARFDebugMacInfo;
1519 if (section_name == g_sect_name_dwarf_debug_macro)
1520 return eSectionTypeDWARFDebugMacro;
1521 if (section_name == g_sect_name_dwarf_debug_macro_dwo)
1522 return eSectionTypeDWARFDebugMacInfo; // Same as debug_macro.
1523 if (section_name == g_sect_name_dwarf_debug_names)
1524 return eSectionTypeDWARFDebugNames;
1525 if (section_name == g_sect_name_dwarf_debug_pubnames)
1526 return eSectionTypeDWARFDebugPubNames;
1527 if (section_name == g_sect_name_dwarf_debug_pubtypes)
1528 return eSectionTypeDWARFDebugPubTypes;
1529 if (section_name == g_sect_name_dwarf_debug_ranges)
1530 return eSectionTypeDWARFDebugRanges;
1531 if (section_name == g_sect_name_dwarf_debug_rnglists)
1532 return eSectionTypeDWARFDebugRngLists;
1533 if (section_name == g_sect_name_dwarf_debug_str)
1534 return eSectionTypeDWARFDebugStr;
1535 if (section_name == g_sect_name_dwarf_debug_str_dwo)
1536 return eSectionTypeDWARFDebugStrDwo;
1537 if (section_name == g_sect_name_dwarf_debug_str_offs)
1538 return eSectionTypeDWARFDebugStrOffsets;
1539 if (section_name == g_sect_name_dwarf_debug_str_offs_dwo)
1540 return eSectionTypeDWARFDebugStrOffsetsDwo;
1541 if (section_name == g_sect_name_dwarf_debug_tu_index)
1542 return eSectionTypeDWARFDebugTuIndex;
1543 if (section_name == g_sect_name_dwarf_debug_types)
1544 return eSectionTypeDWARFDebugTypes;
1545 if (section_name == g_sect_name_dwarf_apple_names)
1546 return eSectionTypeDWARFAppleNames;
1547 if (section_name == g_sect_name_dwarf_apple_types)
1548 return eSectionTypeDWARFAppleTypes;
1549 if (section_name == g_sect_name_dwarf_apple_namespaces)
1550 return eSectionTypeDWARFAppleNamespaces;
1551 if (section_name == g_sect_name_dwarf_apple_objc)
1552 return eSectionTypeDWARFAppleObjC;
1553 if (section_name == g_sect_name_objc_selrefs)
1554 return eSectionTypeDataCStringPointers;
1555 if (section_name == g_sect_name_objc_msgrefs)
1556 return eSectionTypeDataObjCMessageRefs;
1557 if (section_name == g_sect_name_eh_frame)
1558 return eSectionTypeEHFrame;
1559 if (section_name == g_sect_name_compact_unwind)
1560 return eSectionTypeCompactUnwind;
1561 if (section_name == g_sect_name_cfstring)
1562 return eSectionTypeDataObjCCFStrings;
1563 if (section_name == g_sect_name_go_symtab)
1564 return eSectionTypeGoSymtab;
1565 if (section_name == g_sect_name_ctf)
1566 return eSectionTypeCTF;
1567 if (section_name == g_sect_name_swift_ast)
1568 return eSectionTypeSwiftModules;
1569 if (section_name == g_sect_name_objc_data ||
1570 section_name == g_sect_name_objc_classrefs ||
1571 section_name == g_sect_name_objc_superrefs ||
1572 section_name == g_sect_name_objc_const ||
1573 section_name == g_sect_name_objc_classlist) {
1574 return eSectionTypeDataPointers;
1577 switch (mach_sect_type) {
1578 // TODO: categorize sections by other flags for regular sections
1579 case S_REGULAR:
1580 if (section_name == g_sect_name_text)
1581 return eSectionTypeCode;
1582 if (section_name == g_sect_name_data)
1583 return eSectionTypeData;
1584 return eSectionTypeOther;
1585 case S_ZEROFILL:
1586 return eSectionTypeZeroFill;
1587 case S_CSTRING_LITERALS: // section with only literal C strings
1588 return eSectionTypeDataCString;
1589 case S_4BYTE_LITERALS: // section with only 4 byte literals
1590 return eSectionTypeData4;
1591 case S_8BYTE_LITERALS: // section with only 8 byte literals
1592 return eSectionTypeData8;
1593 case S_LITERAL_POINTERS: // section with only pointers to literals
1594 return eSectionTypeDataPointers;
1595 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1596 return eSectionTypeDataPointers;
1597 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1598 return eSectionTypeDataPointers;
1599 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1600 // the reserved2 field
1601 return eSectionTypeCode;
1602 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1603 // initialization
1604 return eSectionTypeDataPointers;
1605 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1606 // termination
1607 return eSectionTypeDataPointers;
1608 case S_COALESCED:
1609 return eSectionTypeOther;
1610 case S_GB_ZEROFILL:
1611 return eSectionTypeZeroFill;
1612 case S_INTERPOSING: // section with only pairs of function pointers for
1613 // interposing
1614 return eSectionTypeCode;
1615 case S_16BYTE_LITERALS: // section with only 16 byte literals
1616 return eSectionTypeData16;
1617 case S_DTRACE_DOF:
1618 return eSectionTypeDebug;
1619 case S_LAZY_DYLIB_SYMBOL_POINTERS:
1620 return eSectionTypeDataPointers;
1621 default:
1622 return eSectionTypeOther;
1626 struct ObjectFileMachO::SegmentParsingContext {
1627 const EncryptedFileRanges EncryptedRanges;
1628 lldb_private::SectionList &UnifiedList;
1629 uint32_t NextSegmentIdx = 0;
1630 uint32_t NextSectionIdx = 0;
1631 bool FileAddressesChanged = false;
1633 SegmentParsingContext(EncryptedFileRanges EncryptedRanges,
1634 lldb_private::SectionList &UnifiedList)
1635 : EncryptedRanges(std::move(EncryptedRanges)), UnifiedList(UnifiedList) {}
1638 void ObjectFileMachO::ProcessSegmentCommand(
1639 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1640 uint32_t cmd_idx, SegmentParsingContext &context) {
1641 llvm::MachO::segment_command_64 load_cmd;
1642 memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_));
1644 if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16))
1645 return;
1647 ModuleSP module_sp = GetModule();
1648 const bool is_core = GetType() == eTypeCoreFile;
1649 const bool is_dsym = (m_header.filetype == MH_DSYM);
1650 bool add_section = true;
1651 bool add_to_unified = true;
1652 ConstString const_segname(
1653 load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname)));
1655 SectionSP unified_section_sp(
1656 context.UnifiedList.FindSectionByName(const_segname));
1657 if (is_dsym && unified_section_sp) {
1658 if (const_segname == GetSegmentNameLINKEDIT()) {
1659 // We need to keep the __LINKEDIT segment private to this object file
1660 // only
1661 add_to_unified = false;
1662 } else {
1663 // This is the dSYM file and this section has already been created by the
1664 // object file, no need to create it.
1665 add_section = false;
1668 load_cmd.vmaddr = m_data.GetAddress(&offset);
1669 load_cmd.vmsize = m_data.GetAddress(&offset);
1670 load_cmd.fileoff = m_data.GetAddress(&offset);
1671 load_cmd.filesize = m_data.GetAddress(&offset);
1672 if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4))
1673 return;
1675 SanitizeSegmentCommand(load_cmd, cmd_idx);
1677 const uint32_t segment_permissions = GetSegmentPermissions(load_cmd);
1678 const bool segment_is_encrypted =
1679 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1681 // Use a segment ID of the segment index shifted left by 8 so they never
1682 // conflict with any of the sections.
1683 SectionSP segment_sp;
1684 if (add_section && (const_segname || is_core)) {
1685 segment_sp = std::make_shared<Section>(
1686 module_sp, // Module to which this section belongs
1687 this, // Object file to which this sections belongs
1688 ++context.NextSegmentIdx
1689 << 8, // Section ID is the 1 based segment index
1690 // shifted right by 8 bits as not to collide with any of the 256
1691 // section IDs that are possible
1692 const_segname, // Name of this section
1693 eSectionTypeContainer, // This section is a container of other
1694 // sections.
1695 load_cmd.vmaddr, // File VM address == addresses as they are
1696 // found in the object file
1697 load_cmd.vmsize, // VM size in bytes of this section
1698 load_cmd.fileoff, // Offset to the data for this section in
1699 // the file
1700 load_cmd.filesize, // Size in bytes of this section as found
1701 // in the file
1702 0, // Segments have no alignment information
1703 load_cmd.flags); // Flags for this section
1705 segment_sp->SetIsEncrypted(segment_is_encrypted);
1706 m_sections_up->AddSection(segment_sp);
1707 segment_sp->SetPermissions(segment_permissions);
1708 if (add_to_unified)
1709 context.UnifiedList.AddSection(segment_sp);
1710 } else if (unified_section_sp) {
1711 // If this is a dSYM and the file addresses in the dSYM differ from the
1712 // file addresses in the ObjectFile, we must use the file base address for
1713 // the Section from the dSYM for the DWARF to resolve correctly.
1714 // This only happens with binaries in the shared cache in practice;
1715 // normally a mismatch like this would give a binary & dSYM that do not
1716 // match UUIDs. When a binary is included in the shared cache, its
1717 // segments are rearranged to optimize the shared cache, so its file
1718 // addresses will differ from what the ObjectFile had originally,
1719 // and what the dSYM has.
1720 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1721 Log *log = GetLog(LLDBLog::Symbols);
1722 if (log) {
1723 log->Printf(
1724 "Installing dSYM's %s segment file address over ObjectFile's "
1725 "so symbol table/debug info resolves correctly for %s",
1726 const_segname.AsCString(),
1727 module_sp->GetFileSpec().GetFilename().AsCString());
1730 // Make sure we've parsed the symbol table from the ObjectFile before
1731 // we go around changing its Sections.
1732 module_sp->GetObjectFile()->GetSymtab();
1733 // eh_frame would present the same problems but we parse that on a per-
1734 // function basis as-needed so it's more difficult to remove its use of
1735 // the Sections. Realistically, the environments where this code path
1736 // will be taken will not have eh_frame sections.
1738 unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1740 // Notify the module that the section addresses have been changed once
1741 // we're done so any file-address caches can be updated.
1742 context.FileAddressesChanged = true;
1744 m_sections_up->AddSection(unified_section_sp);
1747 llvm::MachO::section_64 sect64;
1748 ::memset(&sect64, 0, sizeof(sect64));
1749 // Push a section into our mach sections for the section at index zero
1750 // (NO_SECT) if we don't have any mach sections yet...
1751 if (m_mach_sections.empty())
1752 m_mach_sections.push_back(sect64);
1753 uint32_t segment_sect_idx;
1754 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1756 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1757 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1758 ++segment_sect_idx) {
1759 if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
1760 sizeof(sect64.sectname)) == nullptr)
1761 break;
1762 if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
1763 sizeof(sect64.segname)) == nullptr)
1764 break;
1765 sect64.addr = m_data.GetAddress(&offset);
1766 sect64.size = m_data.GetAddress(&offset);
1768 if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
1769 break;
1771 if (IsSharedCacheBinary() && !IsInMemory()) {
1772 sect64.offset = sect64.addr - m_text_address;
1775 // Keep a list of mach sections around in case we need to get at data that
1776 // isn't stored in the abstracted Sections.
1777 m_mach_sections.push_back(sect64);
1779 if (add_section) {
1780 ConstString section_name(
1781 sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
1782 if (!const_segname) {
1783 // We have a segment with no name so we need to conjure up segments
1784 // that correspond to the section's segname if there isn't already such
1785 // a section. If there is such a section, we resize the section so that
1786 // it spans all sections. We also mark these sections as fake so
1787 // address matches don't hit if they land in the gaps between the child
1788 // sections.
1789 const_segname.SetTrimmedCStringWithLength(sect64.segname,
1790 sizeof(sect64.segname));
1791 segment_sp = context.UnifiedList.FindSectionByName(const_segname);
1792 if (segment_sp.get()) {
1793 Section *segment = segment_sp.get();
1794 // Grow the section size as needed.
1795 const lldb::addr_t sect64_min_addr = sect64.addr;
1796 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1797 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1798 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1799 const lldb::addr_t curr_seg_max_addr =
1800 curr_seg_min_addr + curr_seg_byte_size;
1801 if (sect64_min_addr >= curr_seg_min_addr) {
1802 const lldb::addr_t new_seg_byte_size =
1803 sect64_max_addr - curr_seg_min_addr;
1804 // Only grow the section size if needed
1805 if (new_seg_byte_size > curr_seg_byte_size)
1806 segment->SetByteSize(new_seg_byte_size);
1807 } else {
1808 // We need to change the base address of the segment and adjust the
1809 // child section offsets for all existing children.
1810 const lldb::addr_t slide_amount =
1811 sect64_min_addr - curr_seg_min_addr;
1812 segment->Slide(slide_amount, false);
1813 segment->GetChildren().Slide(-slide_amount, false);
1814 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1817 // Grow the section size as needed.
1818 if (sect64.offset) {
1819 const lldb::addr_t segment_min_file_offset =
1820 segment->GetFileOffset();
1821 const lldb::addr_t segment_max_file_offset =
1822 segment_min_file_offset + segment->GetFileSize();
1824 const lldb::addr_t section_min_file_offset = sect64.offset;
1825 const lldb::addr_t section_max_file_offset =
1826 section_min_file_offset + sect64.size;
1827 const lldb::addr_t new_file_offset =
1828 std::min(section_min_file_offset, segment_min_file_offset);
1829 const lldb::addr_t new_file_size =
1830 std::max(section_max_file_offset, segment_max_file_offset) -
1831 new_file_offset;
1832 segment->SetFileOffset(new_file_offset);
1833 segment->SetFileSize(new_file_size);
1835 } else {
1836 // Create a fake section for the section's named segment
1837 segment_sp = std::make_shared<Section>(
1838 segment_sp, // Parent section
1839 module_sp, // Module to which this section belongs
1840 this, // Object file to which this section belongs
1841 ++context.NextSegmentIdx
1842 << 8, // Section ID is the 1 based segment index
1843 // shifted right by 8 bits as not to
1844 // collide with any of the 256 section IDs
1845 // that are possible
1846 const_segname, // Name of this section
1847 eSectionTypeContainer, // This section is a container of
1848 // other sections.
1849 sect64.addr, // File VM address == addresses as they are
1850 // found in the object file
1851 sect64.size, // VM size in bytes of this section
1852 sect64.offset, // Offset to the data for this section in
1853 // the file
1854 sect64.offset ? sect64.size : 0, // Size in bytes of
1855 // this section as
1856 // found in the file
1857 sect64.align,
1858 load_cmd.flags); // Flags for this section
1859 segment_sp->SetIsFake(true);
1860 segment_sp->SetPermissions(segment_permissions);
1861 m_sections_up->AddSection(segment_sp);
1862 if (add_to_unified)
1863 context.UnifiedList.AddSection(segment_sp);
1864 segment_sp->SetIsEncrypted(segment_is_encrypted);
1867 assert(segment_sp.get());
1869 lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name);
1871 SectionSP section_sp(new Section(
1872 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1873 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1874 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1875 sect64.flags));
1876 // Set the section to be encrypted to match the segment
1878 bool section_is_encrypted = false;
1879 if (!segment_is_encrypted && load_cmd.filesize != 0)
1880 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1881 sect64.offset) != nullptr;
1883 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1884 section_sp->SetPermissions(segment_permissions);
1885 segment_sp->GetChildren().AddSection(section_sp);
1887 if (segment_sp->IsFake()) {
1888 segment_sp.reset();
1889 const_segname.Clear();
1893 if (segment_sp && is_dsym) {
1894 if (first_segment_sectID <= context.NextSectionIdx) {
1895 lldb::user_id_t sect_uid;
1896 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1897 ++sect_uid) {
1898 SectionSP curr_section_sp(
1899 segment_sp->GetChildren().FindSectionByID(sect_uid));
1900 SectionSP next_section_sp;
1901 if (sect_uid + 1 <= context.NextSectionIdx)
1902 next_section_sp =
1903 segment_sp->GetChildren().FindSectionByID(sect_uid + 1);
1905 if (curr_section_sp.get()) {
1906 if (curr_section_sp->GetByteSize() == 0) {
1907 if (next_section_sp.get() != nullptr)
1908 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1909 curr_section_sp->GetFileAddress());
1910 else
1911 curr_section_sp->SetByteSize(load_cmd.vmsize);
1919 void ObjectFileMachO::ProcessDysymtabCommand(
1920 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
1921 m_dysymtab.cmd = load_cmd.cmd;
1922 m_dysymtab.cmdsize = load_cmd.cmdsize;
1923 m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1924 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1927 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
1928 if (m_sections_up)
1929 return;
1931 m_sections_up = std::make_unique<SectionList>();
1933 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
1934 // bool dump_sections = false;
1935 ModuleSP module_sp(GetModule());
1937 offset = MachHeaderSizeFromMagic(m_header.magic);
1939 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1940 llvm::MachO::load_command load_cmd;
1941 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1942 const lldb::offset_t load_cmd_offset = offset;
1943 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
1944 break;
1946 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1947 ProcessSegmentCommand(load_cmd, offset, i, context);
1948 else if (load_cmd.cmd == LC_DYSYMTAB)
1949 ProcessDysymtabCommand(load_cmd, offset);
1951 offset = load_cmd_offset + load_cmd.cmdsize;
1954 if (context.FileAddressesChanged && module_sp)
1955 module_sp->SectionFileAddressesChanged();
1958 class MachSymtabSectionInfo {
1959 public:
1960 MachSymtabSectionInfo(SectionList *section_list)
1961 : m_section_list(section_list), m_section_infos() {
1962 // Get the number of sections down to a depth of 1 to include all segments
1963 // and their sections, but no other sections that may be added for debug
1964 // map or
1965 m_section_infos.resize(section_list->GetNumSections(1));
1968 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1969 if (n_sect == 0)
1970 return SectionSP();
1971 if (n_sect < m_section_infos.size()) {
1972 if (!m_section_infos[n_sect].section_sp) {
1973 SectionSP section_sp(m_section_list->FindSectionByID(n_sect));
1974 m_section_infos[n_sect].section_sp = section_sp;
1975 if (section_sp) {
1976 m_section_infos[n_sect].vm_range.SetBaseAddress(
1977 section_sp->GetFileAddress());
1978 m_section_infos[n_sect].vm_range.SetByteSize(
1979 section_sp->GetByteSize());
1980 } else {
1981 std::string filename = "<unknown>";
1982 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0));
1983 if (first_section_sp)
1984 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1986 Debugger::ReportError(
1987 llvm::formatv("unable to find section {0} for a symbol in "
1988 "{1}, corrupt file?",
1989 n_sect, filename));
1992 if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
1993 // Symbol is in section.
1994 return m_section_infos[n_sect].section_sp;
1995 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1996 m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1997 file_addr) {
1998 // Symbol is in section with zero size, but has the same start address
1999 // as the section. This can happen with linker symbols (symbols that
2000 // start with the letter 'l' or 'L'.
2001 return m_section_infos[n_sect].section_sp;
2004 return m_section_list->FindSectionContainingFileAddress(file_addr);
2007 protected:
2008 struct SectionInfo {
2009 SectionInfo() : vm_range(), section_sp() {}
2011 VMRange vm_range;
2012 SectionSP section_sp;
2014 SectionList *m_section_list;
2015 std::vector<SectionInfo> m_section_infos;
2018 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
2019 struct TrieEntry {
2020 void Dump() const {
2021 printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
2022 static_cast<unsigned long long>(address),
2023 static_cast<unsigned long long>(flags),
2024 static_cast<unsigned long long>(other), name.GetCString());
2025 if (import_name)
2026 printf(" -> \"%s\"\n", import_name.GetCString());
2027 else
2028 printf("\n");
2030 ConstString name;
2031 uint64_t address = LLDB_INVALID_ADDRESS;
2032 uint64_t flags =
2033 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
2034 // TRIE_SYMBOL_IS_THUMB
2035 uint64_t other = 0;
2036 ConstString import_name;
2039 struct TrieEntryWithOffset {
2040 lldb::offset_t nodeOffset;
2041 TrieEntry entry;
2043 TrieEntryWithOffset(lldb::offset_t offset) : nodeOffset(offset), entry() {}
2045 void Dump(uint32_t idx) const {
2046 printf("[%3u] 0x%16.16llx: ", idx,
2047 static_cast<unsigned long long>(nodeOffset));
2048 entry.Dump();
2051 bool operator<(const TrieEntryWithOffset &other) const {
2052 return (nodeOffset < other.nodeOffset);
2056 static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset,
2057 const bool is_arm, addr_t text_seg_base_addr,
2058 std::vector<llvm::StringRef> &nameSlices,
2059 std::set<lldb::addr_t> &resolver_addresses,
2060 std::vector<TrieEntryWithOffset> &reexports,
2061 std::vector<TrieEntryWithOffset> &ext_symbols) {
2062 if (!data.ValidOffset(offset))
2063 return true;
2065 // Terminal node -- end of a branch, possibly add this to
2066 // the symbol table or resolver table.
2067 const uint64_t terminalSize = data.GetULEB128(&offset);
2068 lldb::offset_t children_offset = offset + terminalSize;
2069 if (terminalSize != 0) {
2070 TrieEntryWithOffset e(offset);
2071 e.entry.flags = data.GetULEB128(&offset);
2072 const char *import_name = nullptr;
2073 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
2074 e.entry.address = 0;
2075 e.entry.other = data.GetULEB128(&offset); // dylib ordinal
2076 import_name = data.GetCStr(&offset);
2077 } else {
2078 e.entry.address = data.GetULEB128(&offset);
2079 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2080 e.entry.address += text_seg_base_addr;
2081 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2082 e.entry.other = data.GetULEB128(&offset);
2083 uint64_t resolver_addr = e.entry.other;
2084 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2085 resolver_addr += text_seg_base_addr;
2086 if (is_arm)
2087 resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2088 resolver_addresses.insert(resolver_addr);
2089 } else
2090 e.entry.other = 0;
2092 bool add_this_entry = false;
2093 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2094 import_name && import_name[0]) {
2095 // add symbols that are reexport symbols with a valid import name.
2096 add_this_entry = true;
2097 } else if (e.entry.flags == 0 &&
2098 (import_name == nullptr || import_name[0] == '\0')) {
2099 // add externally visible symbols, in case the nlist record has
2100 // been stripped/omitted.
2101 add_this_entry = true;
2103 if (add_this_entry) {
2104 std::string name;
2105 if (!nameSlices.empty()) {
2106 for (auto name_slice : nameSlices)
2107 name.append(name_slice.data(), name_slice.size());
2109 if (name.size() > 1) {
2110 // Skip the leading '_'
2111 e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1);
2113 if (import_name) {
2114 // Skip the leading '_'
2115 e.entry.import_name.SetCString(import_name + 1);
2117 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2118 reexports.push_back(e);
2119 } else {
2120 if (is_arm && (e.entry.address & 1)) {
2121 e.entry.flags |= TRIE_SYMBOL_IS_THUMB;
2122 e.entry.address &= THUMB_ADDRESS_BIT_MASK;
2124 ext_symbols.push_back(e);
2129 const uint8_t childrenCount = data.GetU8(&children_offset);
2130 for (uint8_t i = 0; i < childrenCount; ++i) {
2131 const char *cstr = data.GetCStr(&children_offset);
2132 if (cstr)
2133 nameSlices.push_back(llvm::StringRef(cstr));
2134 else
2135 return false; // Corrupt data
2136 lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
2137 if (childNodeOffset) {
2138 if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
2139 nameSlices, resolver_addresses, reexports,
2140 ext_symbols)) {
2141 return false;
2144 nameSlices.pop_back();
2146 return true;
2149 static SymbolType GetSymbolType(const char *&symbol_name,
2150 bool &demangled_is_synthesized,
2151 const SectionSP &text_section_sp,
2152 const SectionSP &data_section_sp,
2153 const SectionSP &data_dirty_section_sp,
2154 const SectionSP &data_const_section_sp,
2155 const SectionSP &symbol_section) {
2156 SymbolType type = eSymbolTypeInvalid;
2158 const char *symbol_sect_name = symbol_section->GetName().AsCString();
2159 if (symbol_section->IsDescendant(text_section_sp.get())) {
2160 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
2161 S_ATTR_SELF_MODIFYING_CODE |
2162 S_ATTR_SOME_INSTRUCTIONS))
2163 type = eSymbolTypeData;
2164 else
2165 type = eSymbolTypeCode;
2166 } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
2167 symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
2168 symbol_section->IsDescendant(data_const_section_sp.get())) {
2169 if (symbol_sect_name &&
2170 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
2171 type = eSymbolTypeRuntime;
2173 if (symbol_name) {
2174 llvm::StringRef symbol_name_ref(symbol_name);
2175 if (symbol_name_ref.starts_with("OBJC_")) {
2176 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2177 static const llvm::StringRef g_objc_v2_prefix_metaclass(
2178 "OBJC_METACLASS_$_");
2179 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2180 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) {
2181 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2182 type = eSymbolTypeObjCClass;
2183 demangled_is_synthesized = true;
2184 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_metaclass)) {
2185 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2186 type = eSymbolTypeObjCMetaClass;
2187 demangled_is_synthesized = true;
2188 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) {
2189 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2190 type = eSymbolTypeObjCIVar;
2191 demangled_is_synthesized = true;
2195 } else if (symbol_sect_name &&
2196 ::strstr(symbol_sect_name, "__gcc_except_tab") ==
2197 symbol_sect_name) {
2198 type = eSymbolTypeException;
2199 } else {
2200 type = eSymbolTypeData;
2202 } else if (symbol_sect_name &&
2203 ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
2204 type = eSymbolTypeTrampoline;
2206 return type;
2209 static std::optional<struct nlist_64>
2210 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2211 size_t nlist_byte_size) {
2212 struct nlist_64 nlist;
2213 if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size))
2214 return {};
2215 nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset);
2216 nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset);
2217 nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset);
2218 nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset);
2219 nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
2220 return nlist;
2223 enum { DebugSymbols = true, NonDebugSymbols = false };
2225 void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
2226 ModuleSP module_sp(GetModule());
2227 if (!module_sp)
2228 return;
2230 Log *log = GetLog(LLDBLog::Symbols);
2232 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec();
2233 const char *file_name = file.GetFilename().AsCString("<Unknown>");
2234 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name);
2235 LLDB_LOG(log, "Parsing symbol table for {0}", file_name);
2236 Progress progress("Parsing symbol table", file_name);
2238 llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
2239 llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
2240 llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2241 llvm::MachO::dysymtab_command dysymtab = m_dysymtab;
2242 SymtabCommandLargeOffsets symtab_load_command;
2243 // The data element of type bool indicates that this entry is thumb
2244 // code.
2245 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2247 // Record the address of every function/data that we add to the symtab.
2248 // We add symbols to the table in the order of most information (nlist
2249 // records) to least (function starts), and avoid duplicating symbols
2250 // via this set.
2251 llvm::DenseSet<addr_t> symbols_added;
2253 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2254 // do not add the tombstone or empty keys to the set.
2255 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2256 // Don't add the tombstone or empty keys.
2257 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2258 return;
2259 symbols_added.insert(file_addr);
2261 FunctionStarts function_starts;
2262 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
2263 uint32_t i;
2264 FileSpecList dylib_files;
2265 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2266 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2267 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2268 UUID image_uuid;
2270 for (i = 0; i < m_header.ncmds; ++i) {
2271 const lldb::offset_t cmd_offset = offset;
2272 // Read in the load command and load command size
2273 llvm::MachO::load_command lc;
2274 if (m_data.GetU32(&offset, &lc, 2) == nullptr)
2275 break;
2276 // Watch for the symbol table load command
2277 switch (lc.cmd) {
2278 case LC_SYMTAB:
2279 // struct symtab_command {
2280 // uint32_t cmd; /* LC_SYMTAB */
2281 // uint32_t cmdsize; /* sizeof(struct symtab_command) */
2282 // uint32_t symoff; /* symbol table offset */
2283 // uint32_t nsyms; /* number of symbol table entries */
2284 // uint32_t stroff; /* string table offset */
2285 // uint32_t strsize; /* string table size in bytes */
2286 // };
2287 symtab_load_command.cmd = lc.cmd;
2288 symtab_load_command.cmdsize = lc.cmdsize;
2289 symtab_load_command.symoff = m_data.GetU32(&offset);
2290 symtab_load_command.nsyms = m_data.GetU32(&offset);
2291 symtab_load_command.stroff = m_data.GetU32(&offset);
2292 symtab_load_command.strsize = m_data.GetU32(&offset);
2293 break;
2295 case LC_DYLD_INFO:
2296 case LC_DYLD_INFO_ONLY:
2297 if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
2298 dyld_info.cmd = lc.cmd;
2299 dyld_info.cmdsize = lc.cmdsize;
2300 } else {
2301 memset(&dyld_info, 0, sizeof(dyld_info));
2303 break;
2305 case LC_LOAD_DYLIB:
2306 case LC_LOAD_WEAK_DYLIB:
2307 case LC_REEXPORT_DYLIB:
2308 case LC_LOADFVMLIB:
2309 case LC_LOAD_UPWARD_DYLIB: {
2310 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
2311 const char *path = m_data.PeekCStr(name_offset);
2312 if (path) {
2313 FileSpec file_spec(path);
2314 // Strip the path if there is @rpath, @executable, etc so we just use
2315 // the basename
2316 if (path[0] == '@')
2317 file_spec.ClearDirectory();
2319 if (lc.cmd == LC_REEXPORT_DYLIB) {
2320 m_reexported_dylibs.AppendIfUnique(file_spec);
2323 dylib_files.Append(file_spec);
2325 } break;
2327 case LC_DYLD_EXPORTS_TRIE:
2328 exports_trie_load_command.cmd = lc.cmd;
2329 exports_trie_load_command.cmdsize = lc.cmdsize;
2330 if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) ==
2331 nullptr) // fill in offset and size fields
2332 memset(&exports_trie_load_command, 0,
2333 sizeof(exports_trie_load_command));
2334 break;
2335 case LC_FUNCTION_STARTS:
2336 function_starts_load_command.cmd = lc.cmd;
2337 function_starts_load_command.cmdsize = lc.cmdsize;
2338 if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
2339 nullptr) // fill in data offset and size fields
2340 memset(&function_starts_load_command, 0,
2341 sizeof(function_starts_load_command));
2342 break;
2344 case LC_UUID: {
2345 const uint8_t *uuid_bytes = m_data.PeekData(offset, 16);
2347 if (uuid_bytes)
2348 image_uuid = UUID(uuid_bytes, 16);
2349 break;
2352 default:
2353 break;
2355 offset = cmd_offset + lc.cmdsize;
2358 if (!symtab_load_command.cmd)
2359 return;
2361 SectionList *section_list = GetSectionList();
2362 if (section_list == nullptr)
2363 return;
2365 const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2366 const ByteOrder byte_order = m_data.GetByteOrder();
2367 bool bit_width_32 = addr_byte_size == 4;
2368 const size_t nlist_byte_size =
2369 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2371 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2372 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2373 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2374 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2375 addr_byte_size);
2376 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2378 const addr_t nlist_data_byte_size =
2379 symtab_load_command.nsyms * nlist_byte_size;
2380 const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2381 addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2383 ProcessSP process_sp(m_process_wp.lock());
2384 Process *process = process_sp.get();
2386 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2387 bool is_shared_cache_image = IsSharedCacheBinary();
2388 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2389 SectionSP linkedit_section_sp(
2390 section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
2392 if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2393 !is_local_shared_cache_image) {
2394 Target &target = process->GetTarget();
2396 memory_module_load_level = target.GetMemoryModuleLoadLevel();
2398 // Reading mach file from memory in a process or core file...
2400 if (linkedit_section_sp) {
2401 addr_t linkedit_load_addr =
2402 linkedit_section_sp->GetLoadBaseAddress(&target);
2403 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2404 // We might be trying to access the symbol table before the
2405 // __LINKEDIT's load address has been set in the target. We can't
2406 // fail to read the symbol table, so calculate the right address
2407 // manually
2408 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2409 m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
2412 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2413 const addr_t symoff_addr = linkedit_load_addr +
2414 symtab_load_command.symoff -
2415 linkedit_file_offset;
2416 strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2417 linkedit_file_offset;
2419 // Always load dyld - the dynamic linker - from memory if we didn't
2420 // find a binary anywhere else. lldb will not register
2421 // dylib/framework/bundle loads/unloads if we don't have the dyld
2422 // symbols, we force dyld to load from memory despite the user's
2423 // target.memory-module-load-level setting.
2424 if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2425 m_header.filetype == llvm::MachO::MH_DYLINKER) {
2426 DataBufferSP nlist_data_sp(
2427 ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
2428 if (nlist_data_sp)
2429 nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
2430 if (dysymtab.nindirectsyms != 0) {
2431 const addr_t indirect_syms_addr = linkedit_load_addr +
2432 dysymtab.indirectsymoff -
2433 linkedit_file_offset;
2434 DataBufferSP indirect_syms_data_sp(ReadMemory(
2435 process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4));
2436 if (indirect_syms_data_sp)
2437 indirect_symbol_index_data.SetData(
2438 indirect_syms_data_sp, 0, indirect_syms_data_sp->GetByteSize());
2439 // If this binary is outside the shared cache,
2440 // cache the string table.
2441 // Binaries in the shared cache all share a giant string table,
2442 // and we can't share the string tables across multiple
2443 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2444 // for every binary in the shared cache - it would be a big perf
2445 // problem. For binaries outside the shared cache, it's faster to
2446 // read the entire strtab at once instead of piece-by-piece as we
2447 // process the nlist records.
2448 if (!is_shared_cache_image) {
2449 DataBufferSP strtab_data_sp(
2450 ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
2451 if (strtab_data_sp) {
2452 strtab_data.SetData(strtab_data_sp, 0,
2453 strtab_data_sp->GetByteSize());
2457 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2458 if (function_starts_load_command.cmd) {
2459 const addr_t func_start_addr =
2460 linkedit_load_addr + function_starts_load_command.dataoff -
2461 linkedit_file_offset;
2462 DataBufferSP func_start_data_sp(
2463 ReadMemory(process_sp, func_start_addr,
2464 function_starts_load_command.datasize));
2465 if (func_start_data_sp)
2466 function_starts_data.SetData(func_start_data_sp, 0,
2467 func_start_data_sp->GetByteSize());
2472 } else {
2473 if (is_local_shared_cache_image) {
2474 // The load commands in shared cache images are relative to the
2475 // beginning of the shared cache, not the library image. The
2476 // data we get handed when creating the ObjectFileMachO starts
2477 // at the beginning of a specific library and spans to the end
2478 // of the cache to be able to reach the shared LINKEDIT
2479 // segments. We need to convert the load command offsets to be
2480 // relative to the beginning of our specific image.
2481 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2482 lldb::offset_t linkedit_slide =
2483 linkedit_offset - m_linkedit_original_offset;
2484 symtab_load_command.symoff += linkedit_slide;
2485 symtab_load_command.stroff += linkedit_slide;
2486 dyld_info.export_off += linkedit_slide;
2487 dysymtab.indirectsymoff += linkedit_slide;
2488 function_starts_load_command.dataoff += linkedit_slide;
2489 exports_trie_load_command.dataoff += linkedit_slide;
2492 nlist_data.SetData(m_data, symtab_load_command.symoff,
2493 nlist_data_byte_size);
2494 strtab_data.SetData(m_data, symtab_load_command.stroff,
2495 strtab_data_byte_size);
2497 // We shouldn't have exports data from both the LC_DYLD_INFO command
2498 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2499 lldbassert(!((dyld_info.export_size > 0)
2500 && (exports_trie_load_command.datasize > 0)));
2501 if (dyld_info.export_size > 0) {
2502 dyld_trie_data.SetData(m_data, dyld_info.export_off,
2503 dyld_info.export_size);
2504 } else if (exports_trie_load_command.datasize > 0) {
2505 dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff,
2506 exports_trie_load_command.datasize);
2509 if (dysymtab.nindirectsyms != 0) {
2510 indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff,
2511 dysymtab.nindirectsyms * 4);
2513 if (function_starts_load_command.cmd) {
2514 function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
2515 function_starts_load_command.datasize);
2519 const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2521 ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2522 ConstString g_segment_name_DATA = GetSegmentNameDATA();
2523 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2524 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2525 ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2526 ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2527 SectionSP text_section_sp(
2528 section_list->FindSectionByName(g_segment_name_TEXT));
2529 SectionSP data_section_sp(
2530 section_list->FindSectionByName(g_segment_name_DATA));
2531 SectionSP data_dirty_section_sp(
2532 section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
2533 SectionSP data_const_section_sp(
2534 section_list->FindSectionByName(g_segment_name_DATA_CONST));
2535 SectionSP objc_section_sp(
2536 section_list->FindSectionByName(g_segment_name_OBJC));
2537 SectionSP eh_frame_section_sp;
2538 if (text_section_sp.get())
2539 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2540 g_section_name_eh_frame);
2541 else
2542 eh_frame_section_sp =
2543 section_list->FindSectionByName(g_section_name_eh_frame);
2545 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2546 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2548 // lldb works best if it knows the start address of all functions in a
2549 // module. Linker symbols or debug info are normally the best source of
2550 // information for start addr / size but they may be stripped in a released
2551 // binary. Two additional sources of information exist in Mach-O binaries:
2552 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2553 // function's start address in the
2554 // binary, relative to the text section.
2555 // eh_frame - the eh_frame FDEs have the start addr & size of
2556 // each function
2557 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2558 // all modern binaries.
2559 // Binaries built to run on older releases may need to use eh_frame
2560 // information.
2562 if (text_section_sp && function_starts_data.GetByteSize()) {
2563 FunctionStarts::Entry function_start_entry;
2564 function_start_entry.data = false;
2565 lldb::offset_t function_start_offset = 0;
2566 function_start_entry.addr = text_section_sp->GetFileAddress();
2567 uint64_t delta;
2568 while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
2569 0) {
2570 // Now append the current entry
2571 function_start_entry.addr += delta;
2572 if (is_arm) {
2573 if (function_start_entry.addr & 1) {
2574 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2575 function_start_entry.data = true;
2576 } else if (always_thumb) {
2577 function_start_entry.data = true;
2580 function_starts.Append(function_start_entry);
2582 } else {
2583 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2584 // load command claiming an eh_frame but it doesn't actually have the
2585 // eh_frame content. And if we have a dSYM, we don't need to do any of
2586 // this fill-in-the-missing-symbols works anyway - the debug info should
2587 // give us all the functions in the module.
2588 if (text_section_sp.get() && eh_frame_section_sp.get() &&
2589 m_type != eTypeDebugInfo) {
2590 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2591 DWARFCallFrameInfo::EH);
2592 DWARFCallFrameInfo::FunctionAddressAndSizeVector functions;
2593 eh_frame.GetFunctionAddressAndSizeVector(functions);
2594 addr_t text_base_addr = text_section_sp->GetFileAddress();
2595 size_t count = functions.GetSize();
2596 for (size_t i = 0; i < count; ++i) {
2597 const DWARFCallFrameInfo::FunctionAddressAndSizeVector::Entry *func =
2598 functions.GetEntryAtIndex(i);
2599 if (func) {
2600 FunctionStarts::Entry function_start_entry;
2601 function_start_entry.addr = func->base - text_base_addr;
2602 if (is_arm) {
2603 if (function_start_entry.addr & 1) {
2604 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2605 function_start_entry.data = true;
2606 } else if (always_thumb) {
2607 function_start_entry.data = true;
2610 function_starts.Append(function_start_entry);
2616 const size_t function_starts_count = function_starts.GetSize();
2618 // For user process binaries (executables, dylibs, frameworks, bundles), if
2619 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2620 // going to assume the binary has been stripped. Don't allow assembly
2621 // language instruction emulation because we don't know proper function
2622 // start boundaries.
2624 // For all other types of binaries (kernels, stand-alone bare board
2625 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2626 // sections - we should not make any assumptions about them based on that.
2627 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2628 m_allow_assembly_emulation_unwind_plans = false;
2629 Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind));
2631 if (unwind_or_symbol_log)
2632 module_sp->LogMessage(
2633 unwind_or_symbol_log,
2634 "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2637 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2638 ? eh_frame_section_sp->GetID()
2639 : static_cast<user_id_t>(NO_SECT);
2641 uint32_t N_SO_index = UINT32_MAX;
2643 MachSymtabSectionInfo section_info(section_list);
2644 std::vector<uint32_t> N_FUN_indexes;
2645 std::vector<uint32_t> N_NSYM_indexes;
2646 std::vector<uint32_t> N_INCL_indexes;
2647 std::vector<uint32_t> N_BRAC_indexes;
2648 std::vector<uint32_t> N_COMM_indexes;
2649 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2650 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2651 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2652 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2653 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2654 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2655 // Any symbols that get merged into another will get an entry in this map
2656 // so we know
2657 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2658 uint32_t nlist_idx = 0;
2659 Symbol *symbol_ptr = nullptr;
2661 uint32_t sym_idx = 0;
2662 Symbol *sym = nullptr;
2663 size_t num_syms = 0;
2664 std::string memory_symbol_name;
2665 uint32_t unmapped_local_symbols_found = 0;
2667 std::vector<TrieEntryWithOffset> reexport_trie_entries;
2668 std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2669 std::set<lldb::addr_t> resolver_addresses;
2671 const size_t dyld_trie_data_size = dyld_trie_data.GetByteSize();
2672 if (dyld_trie_data_size > 0) {
2673 LLDB_LOG(log, "Parsing {0} bytes of dyld trie data", dyld_trie_data_size);
2674 SectionSP text_segment_sp =
2675 GetSectionList()->FindSectionByName(GetSegmentNameTEXT());
2676 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2677 if (text_segment_sp)
2678 text_segment_file_addr = text_segment_sp->GetFileAddress();
2679 std::vector<llvm::StringRef> nameSlices;
2680 ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
2681 nameSlices, resolver_addresses, reexport_trie_entries,
2682 external_sym_trie_entries);
2685 typedef std::set<ConstString> IndirectSymbols;
2686 IndirectSymbols indirect_symbol_names;
2688 #if TARGET_OS_IPHONE
2690 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2691 // optimized by moving LOCAL symbols out of the memory mapped portion of
2692 // the DSC. The symbol information has all been retained, but it isn't
2693 // available in the normal nlist data. However, there *are* duplicate
2694 // entries of *some*
2695 // LOCAL symbols in the normal nlist data. To handle this situation
2696 // correctly, we must first attempt
2697 // to parse any DSC unmapped symbol information. If we find any, we set a
2698 // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2700 if (IsSharedCacheBinary()) {
2701 // Before we can start mapping the DSC, we need to make certain the
2702 // target process is actually using the cache we can find.
2704 // Next we need to determine the correct path for the dyld shared cache.
2706 ArchSpec header_arch = GetArchitecture();
2708 UUID dsc_uuid;
2709 UUID process_shared_cache_uuid;
2710 addr_t process_shared_cache_base_addr;
2712 if (process) {
2713 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2714 process_shared_cache_uuid);
2717 __block bool found_image = false;
2718 __block void *nlist_buffer = nullptr;
2719 __block unsigned nlist_count = 0;
2720 __block char *string_table = nullptr;
2721 __block vm_offset_t vm_nlist_memory = 0;
2722 __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
2723 __block vm_offset_t vm_string_memory = 0;
2724 __block mach_msg_type_number_t vm_string_bytes_read = 0;
2726 auto _ = llvm::make_scope_exit(^{
2727 if (vm_nlist_memory)
2728 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
2729 if (vm_string_memory)
2730 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
2733 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2734 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2735 UndefinedNameToDescMap undefined_name_to_desc;
2736 SymbolIndexToName reexport_shlib_needs_fixup;
2738 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
2739 uuid_t cache_uuid;
2740 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
2741 if (found_image)
2742 return;
2744 if (process_shared_cache_uuid.IsValid() &&
2745 process_shared_cache_uuid != UUID::fromData(&cache_uuid, 16))
2746 return;
2748 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
2749 uuid_t dsc_image_uuid;
2750 if (found_image)
2751 return;
2753 dyld_image_copy_uuid(image, &dsc_image_uuid);
2754 if (image_uuid != UUID::fromData(dsc_image_uuid, 16))
2755 return;
2757 found_image = true;
2759 // Compute the size of the string table. We need to ask dyld for a
2760 // new SPI to avoid this step.
2761 dyld_image_local_nlist_content_4Symbolication(
2762 image, ^(const void *nlistStart, uint64_t nlistCount,
2763 const char *stringTable) {
2764 if (!nlistStart || !nlistCount)
2765 return;
2767 // The buffers passed here are valid only inside the block.
2768 // Use vm_read to make a cheap copy of them available for our
2769 // processing later.
2770 kern_return_t ret =
2771 vm_read(mach_task_self(), (vm_address_t)nlistStart,
2772 nlist_byte_size * nlistCount, &vm_nlist_memory,
2773 &vm_nlist_bytes_read);
2774 if (ret != KERN_SUCCESS)
2775 return;
2776 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
2778 // We don't know the size of the string table. It's cheaper
2779 // to map the whole VM region than to determine the size by
2780 // parsing all the nlist entries.
2781 vm_address_t string_address = (vm_address_t)stringTable;
2782 vm_size_t region_size;
2783 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
2784 vm_region_basic_info_data_t info;
2785 memory_object_name_t object;
2786 ret = vm_region_64(mach_task_self(), &string_address,
2787 &region_size, VM_REGION_BASIC_INFO_64,
2788 (vm_region_info_t)&info, &info_count, &object);
2789 if (ret != KERN_SUCCESS)
2790 return;
2792 ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
2793 region_size -
2794 ((vm_address_t)stringTable - string_address),
2795 &vm_string_memory, &vm_string_bytes_read);
2796 if (ret != KERN_SUCCESS)
2797 return;
2799 nlist_buffer = (void *)vm_nlist_memory;
2800 string_table = (char *)vm_string_memory;
2801 nlist_count = nlistCount;
2805 if (nlist_buffer) {
2806 DataExtractor dsc_local_symbols_data(nlist_buffer,
2807 nlist_count * nlist_byte_size,
2808 byte_order, addr_byte_size);
2809 unmapped_local_symbols_found = nlist_count;
2811 // The normal nlist code cannot correctly size the Symbols
2812 // array, we need to allocate it here.
2813 sym = symtab.Resize(
2814 symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2815 unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2816 num_syms = symtab.GetNumSymbols();
2818 lldb::offset_t nlist_data_offset = 0;
2820 for (uint32_t nlist_index = 0;
2821 nlist_index < nlist_count;
2822 nlist_index++) {
2823 /////////////////////////////
2825 std::optional<struct nlist_64> nlist_maybe =
2826 ParseNList(dsc_local_symbols_data, nlist_data_offset,
2827 nlist_byte_size);
2828 if (!nlist_maybe)
2829 break;
2830 struct nlist_64 nlist = *nlist_maybe;
2832 SymbolType type = eSymbolTypeInvalid;
2833 const char *symbol_name = string_table + nlist.n_strx;
2835 if (symbol_name == NULL) {
2836 // No symbol should be NULL, even the symbols with no
2837 // string values should have an offset zero which
2838 // points to an empty C-string
2839 Debugger::ReportError(llvm::formatv(
2840 "DSC unmapped local symbol[{0}] has invalid "
2841 "string table offset {1:x} in {2}, ignoring symbol",
2842 nlist_index, nlist.n_strx,
2843 module_sp->GetFileSpec().GetPath());
2844 continue;
2846 if (symbol_name[0] == '\0')
2847 symbol_name = NULL;
2849 const char *symbol_name_non_abi_mangled = NULL;
2851 SectionSP symbol_section;
2852 uint32_t symbol_byte_size = 0;
2853 bool add_nlist = true;
2854 bool is_debug = ((nlist.n_type & N_STAB) != 0);
2855 bool demangled_is_synthesized = false;
2856 bool is_gsym = false;
2857 bool set_value = true;
2859 assert(sym_idx < num_syms);
2861 sym[sym_idx].SetDebug(is_debug);
2863 if (is_debug) {
2864 switch (nlist.n_type) {
2865 case N_GSYM:
2866 // global symbol: name,,NO_SECT,type,0
2867 // Sometimes the N_GSYM value contains the address.
2869 // FIXME: In the .o files, we have a GSYM and a debug
2870 // symbol for all the ObjC data. They
2871 // have the same address, but we want to ensure that
2872 // we always find only the real symbol, 'cause we
2873 // don't currently correctly attribute the
2874 // GSYM one to the ObjCClass/Ivar/MetaClass
2875 // symbol type. This is a temporary hack to make
2876 // sure the ObjectiveC symbols get treated correctly.
2877 // To do this right, we should coalesce all the GSYM
2878 // & global symbols that have the same address.
2880 is_gsym = true;
2881 sym[sym_idx].SetExternal(true);
2883 if (symbol_name && symbol_name[0] == '_' &&
2884 symbol_name[1] == 'O') {
2885 llvm::StringRef symbol_name_ref(symbol_name);
2886 if (symbol_name_ref.starts_with(
2887 g_objc_v2_prefix_class)) {
2888 symbol_name_non_abi_mangled = symbol_name + 1;
2889 symbol_name =
2890 symbol_name + g_objc_v2_prefix_class.size();
2891 type = eSymbolTypeObjCClass;
2892 demangled_is_synthesized = true;
2894 } else if (symbol_name_ref.starts_with(
2895 g_objc_v2_prefix_metaclass)) {
2896 symbol_name_non_abi_mangled = symbol_name + 1;
2897 symbol_name =
2898 symbol_name + g_objc_v2_prefix_metaclass.size();
2899 type = eSymbolTypeObjCMetaClass;
2900 demangled_is_synthesized = true;
2901 } else if (symbol_name_ref.starts_with(
2902 g_objc_v2_prefix_ivar)) {
2903 symbol_name_non_abi_mangled = symbol_name + 1;
2904 symbol_name =
2905 symbol_name + g_objc_v2_prefix_ivar.size();
2906 type = eSymbolTypeObjCIVar;
2907 demangled_is_synthesized = true;
2909 } else {
2910 if (nlist.n_value != 0)
2911 symbol_section = section_info.GetSection(
2912 nlist.n_sect, nlist.n_value);
2913 type = eSymbolTypeData;
2915 break;
2917 case N_FNAME:
2918 // procedure name (f77 kludge): name,,NO_SECT,0,0
2919 type = eSymbolTypeCompiler;
2920 break;
2922 case N_FUN:
2923 // procedure: name,,n_sect,linenumber,address
2924 if (symbol_name) {
2925 type = eSymbolTypeCode;
2926 symbol_section = section_info.GetSection(
2927 nlist.n_sect, nlist.n_value);
2929 N_FUN_addr_to_sym_idx.insert(
2930 std::make_pair(nlist.n_value, sym_idx));
2931 // We use the current number of symbols in the
2932 // symbol table in lieu of using nlist_idx in case
2933 // we ever start trimming entries out
2934 N_FUN_indexes.push_back(sym_idx);
2935 } else {
2936 type = eSymbolTypeCompiler;
2938 if (!N_FUN_indexes.empty()) {
2939 // Copy the size of the function into the
2940 // original
2941 // STAB entry so we don't have
2942 // to hunt for it later
2943 symtab.SymbolAtIndex(N_FUN_indexes.back())
2944 ->SetByteSize(nlist.n_value);
2945 N_FUN_indexes.pop_back();
2946 // We don't really need the end function STAB as
2947 // it contains the size which we already placed
2948 // with the original symbol, so don't add it if
2949 // we want a minimal symbol table
2950 add_nlist = false;
2953 break;
2955 case N_STSYM:
2956 // static symbol: name,,n_sect,type,address
2957 N_STSYM_addr_to_sym_idx.insert(
2958 std::make_pair(nlist.n_value, sym_idx));
2959 symbol_section = section_info.GetSection(nlist.n_sect,
2960 nlist.n_value);
2961 if (symbol_name && symbol_name[0]) {
2962 type = ObjectFile::GetSymbolTypeFromName(
2963 symbol_name + 1, eSymbolTypeData);
2965 break;
2967 case N_LCSYM:
2968 // .lcomm symbol: name,,n_sect,type,address
2969 symbol_section = section_info.GetSection(nlist.n_sect,
2970 nlist.n_value);
2971 type = eSymbolTypeCommonBlock;
2972 break;
2974 case N_BNSYM:
2975 // We use the current number of symbols in the symbol
2976 // table in lieu of using nlist_idx in case we ever
2977 // start trimming entries out Skip these if we want
2978 // minimal symbol tables
2979 add_nlist = false;
2980 break;
2982 case N_ENSYM:
2983 // Set the size of the N_BNSYM to the terminating
2984 // index of this N_ENSYM so that we can always skip
2985 // the entire symbol if we need to navigate more
2986 // quickly at the source level when parsing STABS
2987 // Skip these if we want minimal symbol tables
2988 add_nlist = false;
2989 break;
2991 case N_OPT:
2992 // emitted with gcc2_compiled and in gcc source
2993 type = eSymbolTypeCompiler;
2994 break;
2996 case N_RSYM:
2997 // register sym: name,,NO_SECT,type,register
2998 type = eSymbolTypeVariable;
2999 break;
3001 case N_SLINE:
3002 // src line: 0,,n_sect,linenumber,address
3003 symbol_section = section_info.GetSection(nlist.n_sect,
3004 nlist.n_value);
3005 type = eSymbolTypeLineEntry;
3006 break;
3008 case N_SSYM:
3009 // structure elt: name,,NO_SECT,type,struct_offset
3010 type = eSymbolTypeVariableType;
3011 break;
3013 case N_SO:
3014 // source file name
3015 type = eSymbolTypeSourceFile;
3016 if (symbol_name == NULL) {
3017 add_nlist = false;
3018 if (N_SO_index != UINT32_MAX) {
3019 // Set the size of the N_SO to the terminating
3020 // index of this N_SO so that we can always skip
3021 // the entire N_SO if we need to navigate more
3022 // quickly at the source level when parsing STABS
3023 symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3024 symbol_ptr->SetByteSize(sym_idx);
3025 symbol_ptr->SetSizeIsSibling(true);
3027 N_NSYM_indexes.clear();
3028 N_INCL_indexes.clear();
3029 N_BRAC_indexes.clear();
3030 N_COMM_indexes.clear();
3031 N_FUN_indexes.clear();
3032 N_SO_index = UINT32_MAX;
3033 } else {
3034 // We use the current number of symbols in the
3035 // symbol table in lieu of using nlist_idx in case
3036 // we ever start trimming entries out
3037 const bool N_SO_has_full_path = symbol_name[0] == '/';
3038 if (N_SO_has_full_path) {
3039 if ((N_SO_index == sym_idx - 1) &&
3040 ((sym_idx - 1) < num_syms)) {
3041 // We have two consecutive N_SO entries where
3042 // the first contains a directory and the
3043 // second contains a full path.
3044 sym[sym_idx - 1].GetMangled().SetValue(
3045 ConstString(symbol_name));
3046 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3047 add_nlist = false;
3048 } else {
3049 // This is the first entry in a N_SO that
3050 // contains a directory or
3051 // a full path to the source file
3052 N_SO_index = sym_idx;
3054 } else if ((N_SO_index == sym_idx - 1) &&
3055 ((sym_idx - 1) < num_syms)) {
3056 // This is usually the second N_SO entry that
3057 // contains just the filename, so here we combine
3058 // it with the first one if we are minimizing the
3059 // symbol table
3060 const char *so_path = sym[sym_idx - 1]
3061 .GetMangled()
3062 .GetDemangledName()
3063 .AsCString();
3064 if (so_path && so_path[0]) {
3065 std::string full_so_path(so_path);
3066 const size_t double_slash_pos =
3067 full_so_path.find("//");
3068 if (double_slash_pos != std::string::npos) {
3069 // The linker has been generating bad N_SO
3070 // entries with doubled up paths
3071 // in the format "%s%s" where the first
3072 // string in the DW_AT_comp_dir, and the
3073 // second is the directory for the source
3074 // file so you end up with a path that looks
3075 // like "/tmp/src//tmp/src/"
3076 FileSpec so_dir(so_path);
3077 if (!FileSystem::Instance().Exists(so_dir)) {
3078 so_dir.SetFile(
3079 &full_so_path[double_slash_pos + 1],
3080 FileSpec::Style::native);
3081 if (FileSystem::Instance().Exists(so_dir)) {
3082 // Trim off the incorrect path
3083 full_so_path.erase(0, double_slash_pos + 1);
3087 if (*full_so_path.rbegin() != '/')
3088 full_so_path += '/';
3089 full_so_path += symbol_name;
3090 sym[sym_idx - 1].GetMangled().SetValue(
3091 ConstString(full_so_path.c_str()));
3092 add_nlist = false;
3093 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3095 } else {
3096 // This could be a relative path to a N_SO
3097 N_SO_index = sym_idx;
3100 break;
3102 case N_OSO:
3103 // object file name: name,,0,0,st_mtime
3104 type = eSymbolTypeObjectFile;
3105 break;
3107 case N_LSYM:
3108 // local sym: name,,NO_SECT,type,offset
3109 type = eSymbolTypeLocal;
3110 break;
3112 // INCL scopes
3113 case N_BINCL:
3114 // include file beginning: name,,NO_SECT,0,sum We use
3115 // the current number of symbols in the symbol table
3116 // in lieu of using nlist_idx in case we ever start
3117 // trimming entries out
3118 N_INCL_indexes.push_back(sym_idx);
3119 type = eSymbolTypeScopeBegin;
3120 break;
3122 case N_EINCL:
3123 // include file end: name,,NO_SECT,0,0
3124 // Set the size of the N_BINCL to the terminating
3125 // index of this N_EINCL so that we can always skip
3126 // the entire symbol if we need to navigate more
3127 // quickly at the source level when parsing STABS
3128 if (!N_INCL_indexes.empty()) {
3129 symbol_ptr =
3130 symtab.SymbolAtIndex(N_INCL_indexes.back());
3131 symbol_ptr->SetByteSize(sym_idx + 1);
3132 symbol_ptr->SetSizeIsSibling(true);
3133 N_INCL_indexes.pop_back();
3135 type = eSymbolTypeScopeEnd;
3136 break;
3138 case N_SOL:
3139 // #included file name: name,,n_sect,0,address
3140 type = eSymbolTypeHeaderFile;
3142 // We currently don't use the header files on darwin
3143 add_nlist = false;
3144 break;
3146 case N_PARAMS:
3147 // compiler parameters: name,,NO_SECT,0,0
3148 type = eSymbolTypeCompiler;
3149 break;
3151 case N_VERSION:
3152 // compiler version: name,,NO_SECT,0,0
3153 type = eSymbolTypeCompiler;
3154 break;
3156 case N_OLEVEL:
3157 // compiler -O level: name,,NO_SECT,0,0
3158 type = eSymbolTypeCompiler;
3159 break;
3161 case N_PSYM:
3162 // parameter: name,,NO_SECT,type,offset
3163 type = eSymbolTypeVariable;
3164 break;
3166 case N_ENTRY:
3167 // alternate entry: name,,n_sect,linenumber,address
3168 symbol_section = section_info.GetSection(nlist.n_sect,
3169 nlist.n_value);
3170 type = eSymbolTypeLineEntry;
3171 break;
3173 // Left and Right Braces
3174 case N_LBRAC:
3175 // left bracket: 0,,NO_SECT,nesting level,address We
3176 // use the current number of symbols in the symbol
3177 // table in lieu of using nlist_idx in case we ever
3178 // start trimming entries out
3179 symbol_section = section_info.GetSection(nlist.n_sect,
3180 nlist.n_value);
3181 N_BRAC_indexes.push_back(sym_idx);
3182 type = eSymbolTypeScopeBegin;
3183 break;
3185 case N_RBRAC:
3186 // right bracket: 0,,NO_SECT,nesting level,address
3187 // Set the size of the N_LBRAC to the terminating
3188 // index of this N_RBRAC so that we can always skip
3189 // the entire symbol if we need to navigate more
3190 // quickly at the source level when parsing STABS
3191 symbol_section = section_info.GetSection(nlist.n_sect,
3192 nlist.n_value);
3193 if (!N_BRAC_indexes.empty()) {
3194 symbol_ptr =
3195 symtab.SymbolAtIndex(N_BRAC_indexes.back());
3196 symbol_ptr->SetByteSize(sym_idx + 1);
3197 symbol_ptr->SetSizeIsSibling(true);
3198 N_BRAC_indexes.pop_back();
3200 type = eSymbolTypeScopeEnd;
3201 break;
3203 case N_EXCL:
3204 // deleted include file: name,,NO_SECT,0,sum
3205 type = eSymbolTypeHeaderFile;
3206 break;
3208 // COMM scopes
3209 case N_BCOMM:
3210 // begin common: name,,NO_SECT,0,0
3211 // We use the current number of symbols in the symbol
3212 // table in lieu of using nlist_idx in case we ever
3213 // start trimming entries out
3214 type = eSymbolTypeScopeBegin;
3215 N_COMM_indexes.push_back(sym_idx);
3216 break;
3218 case N_ECOML:
3219 // end common (local name): 0,,n_sect,0,address
3220 symbol_section = section_info.GetSection(nlist.n_sect,
3221 nlist.n_value);
3222 // Fall through
3224 case N_ECOMM:
3225 // end common: name,,n_sect,0,0
3226 // Set the size of the N_BCOMM to the terminating
3227 // index of this N_ECOMM/N_ECOML so that we can
3228 // always skip the entire symbol if we need to
3229 // navigate more quickly at the source level when
3230 // parsing STABS
3231 if (!N_COMM_indexes.empty()) {
3232 symbol_ptr =
3233 symtab.SymbolAtIndex(N_COMM_indexes.back());
3234 symbol_ptr->SetByteSize(sym_idx + 1);
3235 symbol_ptr->SetSizeIsSibling(true);
3236 N_COMM_indexes.pop_back();
3238 type = eSymbolTypeScopeEnd;
3239 break;
3241 case N_LENG:
3242 // second stab entry with length information
3243 type = eSymbolTypeAdditional;
3244 break;
3246 default:
3247 break;
3249 } else {
3250 // uint8_t n_pext = N_PEXT & nlist.n_type;
3251 uint8_t n_type = N_TYPE & nlist.n_type;
3252 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3254 switch (n_type) {
3255 case N_INDR: {
3256 const char *reexport_name_cstr =
3257 strtab_data.PeekCStr(nlist.n_value);
3258 if (reexport_name_cstr && reexport_name_cstr[0]) {
3259 type = eSymbolTypeReExported;
3260 ConstString reexport_name(
3261 reexport_name_cstr +
3262 ((reexport_name_cstr[0] == '_') ? 1 : 0));
3263 sym[sym_idx].SetReExportedSymbolName(reexport_name);
3264 set_value = false;
3265 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3266 indirect_symbol_names.insert(ConstString(
3267 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3268 } else
3269 type = eSymbolTypeUndefined;
3270 } break;
3272 case N_UNDF:
3273 if (symbol_name && symbol_name[0]) {
3274 ConstString undefined_name(
3275 symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3276 undefined_name_to_desc[undefined_name] = nlist.n_desc;
3278 // Fall through
3279 case N_PBUD:
3280 type = eSymbolTypeUndefined;
3281 break;
3283 case N_ABS:
3284 type = eSymbolTypeAbsolute;
3285 break;
3287 case N_SECT: {
3288 symbol_section = section_info.GetSection(nlist.n_sect,
3289 nlist.n_value);
3291 if (symbol_section == NULL) {
3292 // TODO: warn about this?
3293 add_nlist = false;
3294 break;
3297 if (TEXT_eh_frame_sectID == nlist.n_sect) {
3298 type = eSymbolTypeException;
3299 } else {
3300 uint32_t section_type =
3301 symbol_section->Get() & SECTION_TYPE;
3303 switch (section_type) {
3304 case S_CSTRING_LITERALS:
3305 type = eSymbolTypeData;
3306 break; // section with only literal C strings
3307 case S_4BYTE_LITERALS:
3308 type = eSymbolTypeData;
3309 break; // section with only 4 byte literals
3310 case S_8BYTE_LITERALS:
3311 type = eSymbolTypeData;
3312 break; // section with only 8 byte literals
3313 case S_LITERAL_POINTERS:
3314 type = eSymbolTypeTrampoline;
3315 break; // section with only pointers to literals
3316 case S_NON_LAZY_SYMBOL_POINTERS:
3317 type = eSymbolTypeTrampoline;
3318 break; // section with only non-lazy symbol
3319 // pointers
3320 case S_LAZY_SYMBOL_POINTERS:
3321 type = eSymbolTypeTrampoline;
3322 break; // section with only lazy symbol pointers
3323 case S_SYMBOL_STUBS:
3324 type = eSymbolTypeTrampoline;
3325 break; // section with only symbol stubs, byte
3326 // size of stub in the reserved2 field
3327 case S_MOD_INIT_FUNC_POINTERS:
3328 type = eSymbolTypeCode;
3329 break; // section with only function pointers for
3330 // initialization
3331 case S_MOD_TERM_FUNC_POINTERS:
3332 type = eSymbolTypeCode;
3333 break; // section with only function pointers for
3334 // termination
3335 case S_INTERPOSING:
3336 type = eSymbolTypeTrampoline;
3337 break; // section with only pairs of function
3338 // pointers for interposing
3339 case S_16BYTE_LITERALS:
3340 type = eSymbolTypeData;
3341 break; // section with only 16 byte literals
3342 case S_DTRACE_DOF:
3343 type = eSymbolTypeInstrumentation;
3344 break;
3345 case S_LAZY_DYLIB_SYMBOL_POINTERS:
3346 type = eSymbolTypeTrampoline;
3347 break;
3348 default:
3349 switch (symbol_section->GetType()) {
3350 case lldb::eSectionTypeCode:
3351 type = eSymbolTypeCode;
3352 break;
3353 case eSectionTypeData:
3354 case eSectionTypeDataCString: // Inlined C string
3355 // data
3356 case eSectionTypeDataCStringPointers: // Pointers
3357 // to C
3358 // string
3359 // data
3360 case eSectionTypeDataSymbolAddress: // Address of
3361 // a symbol in
3362 // the symbol
3363 // table
3364 case eSectionTypeData4:
3365 case eSectionTypeData8:
3366 case eSectionTypeData16:
3367 type = eSymbolTypeData;
3368 break;
3369 default:
3370 break;
3372 break;
3375 if (type == eSymbolTypeInvalid) {
3376 const char *symbol_sect_name =
3377 symbol_section->GetName().AsCString();
3378 if (symbol_section->IsDescendant(
3379 text_section_sp.get())) {
3380 if (symbol_section->IsClear(
3381 S_ATTR_PURE_INSTRUCTIONS |
3382 S_ATTR_SELF_MODIFYING_CODE |
3383 S_ATTR_SOME_INSTRUCTIONS))
3384 type = eSymbolTypeData;
3385 else
3386 type = eSymbolTypeCode;
3387 } else if (symbol_section->IsDescendant(
3388 data_section_sp.get()) ||
3389 symbol_section->IsDescendant(
3390 data_dirty_section_sp.get()) ||
3391 symbol_section->IsDescendant(
3392 data_const_section_sp.get())) {
3393 if (symbol_sect_name &&
3394 ::strstr(symbol_sect_name, "__objc") ==
3395 symbol_sect_name) {
3396 type = eSymbolTypeRuntime;
3398 if (symbol_name) {
3399 llvm::StringRef symbol_name_ref(symbol_name);
3400 if (symbol_name_ref.starts_with("_OBJC_")) {
3401 llvm::StringRef
3402 g_objc_v2_prefix_class(
3403 "_OBJC_CLASS_$_");
3404 llvm::StringRef
3405 g_objc_v2_prefix_metaclass(
3406 "_OBJC_METACLASS_$_");
3407 llvm::StringRef
3408 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3409 if (symbol_name_ref.starts_with(
3410 g_objc_v2_prefix_class)) {
3411 symbol_name_non_abi_mangled =
3412 symbol_name + 1;
3413 symbol_name =
3414 symbol_name +
3415 g_objc_v2_prefix_class.size();
3416 type = eSymbolTypeObjCClass;
3417 demangled_is_synthesized = true;
3418 } else if (
3419 symbol_name_ref.starts_with(
3420 g_objc_v2_prefix_metaclass)) {
3421 symbol_name_non_abi_mangled =
3422 symbol_name + 1;
3423 symbol_name =
3424 symbol_name +
3425 g_objc_v2_prefix_metaclass.size();
3426 type = eSymbolTypeObjCMetaClass;
3427 demangled_is_synthesized = true;
3428 } else if (symbol_name_ref.starts_with(
3429 g_objc_v2_prefix_ivar)) {
3430 symbol_name_non_abi_mangled =
3431 symbol_name + 1;
3432 symbol_name =
3433 symbol_name +
3434 g_objc_v2_prefix_ivar.size();
3435 type = eSymbolTypeObjCIVar;
3436 demangled_is_synthesized = true;
3440 } else if (symbol_sect_name &&
3441 ::strstr(symbol_sect_name,
3442 "__gcc_except_tab") ==
3443 symbol_sect_name) {
3444 type = eSymbolTypeException;
3445 } else {
3446 type = eSymbolTypeData;
3448 } else if (symbol_sect_name &&
3449 ::strstr(symbol_sect_name, "__IMPORT") ==
3450 symbol_sect_name) {
3451 type = eSymbolTypeTrampoline;
3452 } else if (symbol_section->IsDescendant(
3453 objc_section_sp.get())) {
3454 type = eSymbolTypeRuntime;
3455 if (symbol_name && symbol_name[0] == '.') {
3456 llvm::StringRef symbol_name_ref(symbol_name);
3457 llvm::StringRef
3458 g_objc_v1_prefix_class(".objc_class_name_");
3459 if (symbol_name_ref.starts_with(
3460 g_objc_v1_prefix_class)) {
3461 symbol_name_non_abi_mangled = symbol_name;
3462 symbol_name = symbol_name +
3463 g_objc_v1_prefix_class.size();
3464 type = eSymbolTypeObjCClass;
3465 demangled_is_synthesized = true;
3471 } break;
3475 if (add_nlist) {
3476 uint64_t symbol_value = nlist.n_value;
3477 if (symbol_name_non_abi_mangled) {
3478 sym[sym_idx].GetMangled().SetMangledName(
3479 ConstString(symbol_name_non_abi_mangled));
3480 sym[sym_idx].GetMangled().SetDemangledName(
3481 ConstString(symbol_name));
3482 } else {
3483 if (symbol_name && symbol_name[0] == '_') {
3484 symbol_name++; // Skip the leading underscore
3487 if (symbol_name) {
3488 ConstString const_symbol_name(symbol_name);
3489 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
3490 if (is_gsym && is_debug) {
3491 const char *gsym_name =
3492 sym[sym_idx]
3493 .GetMangled()
3494 .GetName(Mangled::ePreferMangled)
3495 .GetCString();
3496 if (gsym_name)
3497 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3501 if (symbol_section) {
3502 const addr_t section_file_addr =
3503 symbol_section->GetFileAddress();
3504 if (symbol_byte_size == 0 &&
3505 function_starts_count > 0) {
3506 addr_t symbol_lookup_file_addr = nlist.n_value;
3507 // Do an exact address match for non-ARM addresses,
3508 // else get the closest since the symbol might be a
3509 // thumb symbol which has an address with bit zero
3510 // set
3511 FunctionStarts::Entry *func_start_entry =
3512 function_starts.FindEntry(symbol_lookup_file_addr,
3513 !is_arm);
3514 if (is_arm && func_start_entry) {
3515 // Verify that the function start address is the
3516 // symbol address (ARM) or the symbol address + 1
3517 // (thumb)
3518 if (func_start_entry->addr !=
3519 symbol_lookup_file_addr &&
3520 func_start_entry->addr !=
3521 (symbol_lookup_file_addr + 1)) {
3522 // Not the right entry, NULL it out...
3523 func_start_entry = NULL;
3526 if (func_start_entry) {
3527 func_start_entry->data = true;
3529 addr_t symbol_file_addr = func_start_entry->addr;
3530 uint32_t symbol_flags = 0;
3531 if (is_arm) {
3532 if (symbol_file_addr & 1)
3533 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3534 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3537 const FunctionStarts::Entry *next_func_start_entry =
3538 function_starts.FindNextEntry(func_start_entry);
3539 const addr_t section_end_file_addr =
3540 section_file_addr +
3541 symbol_section->GetByteSize();
3542 if (next_func_start_entry) {
3543 addr_t next_symbol_file_addr =
3544 next_func_start_entry->addr;
3545 // Be sure the clear the Thumb address bit when
3546 // we calculate the size from the current and
3547 // next address
3548 if (is_arm)
3549 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3550 symbol_byte_size = std::min<lldb::addr_t>(
3551 next_symbol_file_addr - symbol_file_addr,
3552 section_end_file_addr - symbol_file_addr);
3553 } else {
3554 symbol_byte_size =
3555 section_end_file_addr - symbol_file_addr;
3559 symbol_value -= section_file_addr;
3562 if (is_debug == false) {
3563 if (type == eSymbolTypeCode) {
3564 // See if we can find a N_FUN entry for any code
3565 // symbols. If we do find a match, and the name
3566 // matches, then we can merge the two into just the
3567 // function symbol to avoid duplicate entries in
3568 // the symbol table
3569 auto range =
3570 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3571 if (range.first != range.second) {
3572 bool found_it = false;
3573 for (auto pos = range.first; pos != range.second;
3574 ++pos) {
3575 if (sym[sym_idx].GetMangled().GetName(
3576 Mangled::ePreferMangled) ==
3577 sym[pos->second].GetMangled().GetName(
3578 Mangled::ePreferMangled)) {
3579 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3580 // We just need the flags from the linker
3581 // symbol, so put these flags
3582 // into the N_FUN flags to avoid duplicate
3583 // symbols in the symbol table
3584 sym[pos->second].SetExternal(
3585 sym[sym_idx].IsExternal());
3586 sym[pos->second].SetFlags(nlist.n_type << 16 |
3587 nlist.n_desc);
3588 if (resolver_addresses.find(nlist.n_value) !=
3589 resolver_addresses.end())
3590 sym[pos->second].SetType(eSymbolTypeResolver);
3591 sym[sym_idx].Clear();
3592 found_it = true;
3593 break;
3596 if (found_it)
3597 continue;
3598 } else {
3599 if (resolver_addresses.find(nlist.n_value) !=
3600 resolver_addresses.end())
3601 type = eSymbolTypeResolver;
3603 } else if (type == eSymbolTypeData ||
3604 type == eSymbolTypeObjCClass ||
3605 type == eSymbolTypeObjCMetaClass ||
3606 type == eSymbolTypeObjCIVar) {
3607 // See if we can find a N_STSYM entry for any data
3608 // symbols. If we do find a match, and the name
3609 // matches, then we can merge the two into just the
3610 // Static symbol to avoid duplicate entries in the
3611 // symbol table
3612 auto range = N_STSYM_addr_to_sym_idx.equal_range(
3613 nlist.n_value);
3614 if (range.first != range.second) {
3615 bool found_it = false;
3616 for (auto pos = range.first; pos != range.second;
3617 ++pos) {
3618 if (sym[sym_idx].GetMangled().GetName(
3619 Mangled::ePreferMangled) ==
3620 sym[pos->second].GetMangled().GetName(
3621 Mangled::ePreferMangled)) {
3622 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3623 // We just need the flags from the linker
3624 // symbol, so put these flags
3625 // into the N_STSYM flags to avoid duplicate
3626 // symbols in the symbol table
3627 sym[pos->second].SetExternal(
3628 sym[sym_idx].IsExternal());
3629 sym[pos->second].SetFlags(nlist.n_type << 16 |
3630 nlist.n_desc);
3631 sym[sym_idx].Clear();
3632 found_it = true;
3633 break;
3636 if (found_it)
3637 continue;
3638 } else {
3639 const char *gsym_name =
3640 sym[sym_idx]
3641 .GetMangled()
3642 .GetName(Mangled::ePreferMangled)
3643 .GetCString();
3644 if (gsym_name) {
3645 // Combine N_GSYM stab entries with the non
3646 // stab symbol
3647 ConstNameToSymbolIndexMap::const_iterator pos =
3648 N_GSYM_name_to_sym_idx.find(gsym_name);
3649 if (pos != N_GSYM_name_to_sym_idx.end()) {
3650 const uint32_t GSYM_sym_idx = pos->second;
3651 m_nlist_idx_to_sym_idx[nlist_idx] =
3652 GSYM_sym_idx;
3653 // Copy the address, because often the N_GSYM
3654 // address has an invalid address of zero
3655 // when the global is a common symbol
3656 sym[GSYM_sym_idx].GetAddressRef().SetSection(
3657 symbol_section);
3658 sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3659 symbol_value);
3660 add_symbol_addr(sym[GSYM_sym_idx]
3661 .GetAddress()
3662 .GetFileAddress());
3663 // We just need the flags from the linker
3664 // symbol, so put these flags
3665 // into the N_GSYM flags to avoid duplicate
3666 // symbols in the symbol table
3667 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3668 nlist.n_desc);
3669 sym[sym_idx].Clear();
3670 continue;
3677 sym[sym_idx].SetID(nlist_idx);
3678 sym[sym_idx].SetType(type);
3679 if (set_value) {
3680 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3681 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3682 add_symbol_addr(
3683 sym[sym_idx].GetAddress().GetFileAddress());
3685 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3687 if (symbol_byte_size > 0)
3688 sym[sym_idx].SetByteSize(symbol_byte_size);
3690 if (demangled_is_synthesized)
3691 sym[sym_idx].SetDemangledNameIsSynthesized(true);
3692 ++sym_idx;
3693 } else {
3694 sym[sym_idx].Clear();
3697 /////////////////////////////
3701 for (const auto &pos : reexport_shlib_needs_fixup) {
3702 const auto undef_pos = undefined_name_to_desc.find(pos.second);
3703 if (undef_pos != undefined_name_to_desc.end()) {
3704 const uint8_t dylib_ordinal =
3705 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3706 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3707 sym[pos.first].SetReExportedSymbolSharedLibrary(
3708 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3713 #endif
3714 lldb::offset_t nlist_data_offset = 0;
3716 if (nlist_data.GetByteSize() > 0) {
3718 // If the sym array was not created while parsing the DSC unmapped
3719 // symbols, create it now.
3720 if (sym == nullptr) {
3721 sym =
3722 symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3723 num_syms = symtab.GetNumSymbols();
3726 if (unmapped_local_symbols_found) {
3727 assert(m_dysymtab.ilocalsym == 0);
3728 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3729 nlist_idx = m_dysymtab.nlocalsym;
3730 } else {
3731 nlist_idx = 0;
3734 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3735 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3736 UndefinedNameToDescMap undefined_name_to_desc;
3737 SymbolIndexToName reexport_shlib_needs_fixup;
3739 // Symtab parsing is a huge mess. Everything is entangled and the code
3740 // requires access to a ridiculous amount of variables. LLDB depends
3741 // heavily on the proper merging of symbols and to get that right we need
3742 // to make sure we have parsed all the debug symbols first. Therefore we
3743 // invoke the lambda twice, once to parse only the debug symbols and then
3744 // once more to parse the remaining symbols.
3745 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3746 bool debug_only) {
3747 const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3748 if (is_debug != debug_only)
3749 return true;
3751 const char *symbol_name_non_abi_mangled = nullptr;
3752 const char *symbol_name = nullptr;
3754 if (have_strtab_data) {
3755 symbol_name = strtab_data.PeekCStr(nlist.n_strx);
3757 if (symbol_name == nullptr) {
3758 // No symbol should be NULL, even the symbols with no string values
3759 // should have an offset zero which points to an empty C-string
3760 Debugger::ReportError(llvm::formatv(
3761 "symbol[{0}] has invalid string table offset {1:x} in {2}, "
3762 "ignoring symbol",
3763 nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath()));
3764 return true;
3766 if (symbol_name[0] == '\0')
3767 symbol_name = nullptr;
3768 } else {
3769 const addr_t str_addr = strtab_addr + nlist.n_strx;
3770 Status str_error;
3771 if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
3772 str_error))
3773 symbol_name = memory_symbol_name.c_str();
3776 SymbolType type = eSymbolTypeInvalid;
3777 SectionSP symbol_section;
3778 lldb::addr_t symbol_byte_size = 0;
3779 bool add_nlist = true;
3780 bool is_gsym = false;
3781 bool demangled_is_synthesized = false;
3782 bool set_value = true;
3784 assert(sym_idx < num_syms);
3785 sym[sym_idx].SetDebug(is_debug);
3787 if (is_debug) {
3788 switch (nlist.n_type) {
3789 case N_GSYM:
3790 // global symbol: name,,NO_SECT,type,0
3791 // Sometimes the N_GSYM value contains the address.
3793 // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3794 // the ObjC data. They
3795 // have the same address, but we want to ensure that we always find
3796 // only the real symbol, 'cause we don't currently correctly
3797 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3798 // type. This is a temporary hack to make sure the ObjectiveC
3799 // symbols get treated correctly. To do this right, we should
3800 // coalesce all the GSYM & global symbols that have the same
3801 // address.
3802 is_gsym = true;
3803 sym[sym_idx].SetExternal(true);
3805 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3806 llvm::StringRef symbol_name_ref(symbol_name);
3807 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) {
3808 symbol_name_non_abi_mangled = symbol_name + 1;
3809 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3810 type = eSymbolTypeObjCClass;
3811 demangled_is_synthesized = true;
3813 } else if (symbol_name_ref.starts_with(
3814 g_objc_v2_prefix_metaclass)) {
3815 symbol_name_non_abi_mangled = symbol_name + 1;
3816 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3817 type = eSymbolTypeObjCMetaClass;
3818 demangled_is_synthesized = true;
3819 } else if (symbol_name_ref.starts_with(g_objc_v2_prefix_ivar)) {
3820 symbol_name_non_abi_mangled = symbol_name + 1;
3821 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3822 type = eSymbolTypeObjCIVar;
3823 demangled_is_synthesized = true;
3825 } else {
3826 if (nlist.n_value != 0)
3827 symbol_section =
3828 section_info.GetSection(nlist.n_sect, nlist.n_value);
3829 type = eSymbolTypeData;
3831 break;
3833 case N_FNAME:
3834 // procedure name (f77 kludge): name,,NO_SECT,0,0
3835 type = eSymbolTypeCompiler;
3836 break;
3838 case N_FUN:
3839 // procedure: name,,n_sect,linenumber,address
3840 if (symbol_name) {
3841 type = eSymbolTypeCode;
3842 symbol_section =
3843 section_info.GetSection(nlist.n_sect, nlist.n_value);
3845 N_FUN_addr_to_sym_idx.insert(
3846 std::make_pair(nlist.n_value, sym_idx));
3847 // We use the current number of symbols in the symbol table in
3848 // lieu of using nlist_idx in case we ever start trimming entries
3849 // out
3850 N_FUN_indexes.push_back(sym_idx);
3851 } else {
3852 type = eSymbolTypeCompiler;
3854 if (!N_FUN_indexes.empty()) {
3855 // Copy the size of the function into the original STAB entry
3856 // so we don't have to hunt for it later
3857 symtab.SymbolAtIndex(N_FUN_indexes.back())
3858 ->SetByteSize(nlist.n_value);
3859 N_FUN_indexes.pop_back();
3860 // We don't really need the end function STAB as it contains
3861 // the size which we already placed with the original symbol,
3862 // so don't add it if we want a minimal symbol table
3863 add_nlist = false;
3866 break;
3868 case N_STSYM:
3869 // static symbol: name,,n_sect,type,address
3870 N_STSYM_addr_to_sym_idx.insert(
3871 std::make_pair(nlist.n_value, sym_idx));
3872 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3873 if (symbol_name && symbol_name[0]) {
3874 type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
3875 eSymbolTypeData);
3877 break;
3879 case N_LCSYM:
3880 // .lcomm symbol: name,,n_sect,type,address
3881 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3882 type = eSymbolTypeCommonBlock;
3883 break;
3885 case N_BNSYM:
3886 // We use the current number of symbols in the symbol table in lieu
3887 // of using nlist_idx in case we ever start trimming entries out
3888 // Skip these if we want minimal symbol tables
3889 add_nlist = false;
3890 break;
3892 case N_ENSYM:
3893 // Set the size of the N_BNSYM to the terminating index of this
3894 // N_ENSYM so that we can always skip the entire symbol if we need
3895 // to navigate more quickly at the source level when parsing STABS
3896 // Skip these if we want minimal symbol tables
3897 add_nlist = false;
3898 break;
3900 case N_OPT:
3901 // emitted with gcc2_compiled and in gcc source
3902 type = eSymbolTypeCompiler;
3903 break;
3905 case N_RSYM:
3906 // register sym: name,,NO_SECT,type,register
3907 type = eSymbolTypeVariable;
3908 break;
3910 case N_SLINE:
3911 // src line: 0,,n_sect,linenumber,address
3912 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3913 type = eSymbolTypeLineEntry;
3914 break;
3916 case N_SSYM:
3917 // structure elt: name,,NO_SECT,type,struct_offset
3918 type = eSymbolTypeVariableType;
3919 break;
3921 case N_SO:
3922 // source file name
3923 type = eSymbolTypeSourceFile;
3924 if (symbol_name == nullptr) {
3925 add_nlist = false;
3926 if (N_SO_index != UINT32_MAX) {
3927 // Set the size of the N_SO to the terminating index of this
3928 // N_SO so that we can always skip the entire N_SO if we need
3929 // to navigate more quickly at the source level when parsing
3930 // STABS
3931 symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3932 symbol_ptr->SetByteSize(sym_idx);
3933 symbol_ptr->SetSizeIsSibling(true);
3935 N_NSYM_indexes.clear();
3936 N_INCL_indexes.clear();
3937 N_BRAC_indexes.clear();
3938 N_COMM_indexes.clear();
3939 N_FUN_indexes.clear();
3940 N_SO_index = UINT32_MAX;
3941 } else {
3942 // We use the current number of symbols in the symbol table in
3943 // lieu of using nlist_idx in case we ever start trimming entries
3944 // out
3945 const bool N_SO_has_full_path = symbol_name[0] == '/';
3946 if (N_SO_has_full_path) {
3947 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
3948 // We have two consecutive N_SO entries where the first
3949 // contains a directory and the second contains a full path.
3950 sym[sym_idx - 1].GetMangled().SetValue(
3951 ConstString(symbol_name));
3952 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3953 add_nlist = false;
3954 } else {
3955 // This is the first entry in a N_SO that contains a
3956 // directory or a full path to the source file
3957 N_SO_index = sym_idx;
3959 } else if ((N_SO_index == sym_idx - 1) &&
3960 ((sym_idx - 1) < num_syms)) {
3961 // This is usually the second N_SO entry that contains just the
3962 // filename, so here we combine it with the first one if we are
3963 // minimizing the symbol table
3964 const char *so_path =
3965 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
3966 if (so_path && so_path[0]) {
3967 std::string full_so_path(so_path);
3968 const size_t double_slash_pos = full_so_path.find("//");
3969 if (double_slash_pos != std::string::npos) {
3970 // The linker has been generating bad N_SO entries with
3971 // doubled up paths in the format "%s%s" where the first
3972 // string in the DW_AT_comp_dir, and the second is the
3973 // directory for the source file so you end up with a path
3974 // that looks like "/tmp/src//tmp/src/"
3975 FileSpec so_dir(so_path);
3976 if (!FileSystem::Instance().Exists(so_dir)) {
3977 so_dir.SetFile(&full_so_path[double_slash_pos + 1],
3978 FileSpec::Style::native);
3979 if (FileSystem::Instance().Exists(so_dir)) {
3980 // Trim off the incorrect path
3981 full_so_path.erase(0, double_slash_pos + 1);
3985 if (*full_so_path.rbegin() != '/')
3986 full_so_path += '/';
3987 full_so_path += symbol_name;
3988 sym[sym_idx - 1].GetMangled().SetValue(
3989 ConstString(full_so_path.c_str()));
3990 add_nlist = false;
3991 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3993 } else {
3994 // This could be a relative path to a N_SO
3995 N_SO_index = sym_idx;
3998 break;
4000 case N_OSO:
4001 // object file name: name,,0,0,st_mtime
4002 type = eSymbolTypeObjectFile;
4003 break;
4005 case N_LSYM:
4006 // local sym: name,,NO_SECT,type,offset
4007 type = eSymbolTypeLocal;
4008 break;
4010 // INCL scopes
4011 case N_BINCL:
4012 // include file beginning: name,,NO_SECT,0,sum We use the current
4013 // number of symbols in the symbol table in lieu of using nlist_idx
4014 // in case we ever start trimming entries out
4015 N_INCL_indexes.push_back(sym_idx);
4016 type = eSymbolTypeScopeBegin;
4017 break;
4019 case N_EINCL:
4020 // include file end: name,,NO_SECT,0,0
4021 // Set the size of the N_BINCL to the terminating index of this
4022 // N_EINCL so that we can always skip the entire symbol if we need
4023 // to navigate more quickly at the source level when parsing STABS
4024 if (!N_INCL_indexes.empty()) {
4025 symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back());
4026 symbol_ptr->SetByteSize(sym_idx + 1);
4027 symbol_ptr->SetSizeIsSibling(true);
4028 N_INCL_indexes.pop_back();
4030 type = eSymbolTypeScopeEnd;
4031 break;
4033 case N_SOL:
4034 // #included file name: name,,n_sect,0,address
4035 type = eSymbolTypeHeaderFile;
4037 // We currently don't use the header files on darwin
4038 add_nlist = false;
4039 break;
4041 case N_PARAMS:
4042 // compiler parameters: name,,NO_SECT,0,0
4043 type = eSymbolTypeCompiler;
4044 break;
4046 case N_VERSION:
4047 // compiler version: name,,NO_SECT,0,0
4048 type = eSymbolTypeCompiler;
4049 break;
4051 case N_OLEVEL:
4052 // compiler -O level: name,,NO_SECT,0,0
4053 type = eSymbolTypeCompiler;
4054 break;
4056 case N_PSYM:
4057 // parameter: name,,NO_SECT,type,offset
4058 type = eSymbolTypeVariable;
4059 break;
4061 case N_ENTRY:
4062 // alternate entry: name,,n_sect,linenumber,address
4063 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4064 type = eSymbolTypeLineEntry;
4065 break;
4067 // Left and Right Braces
4068 case N_LBRAC:
4069 // left bracket: 0,,NO_SECT,nesting level,address We use the
4070 // current number of symbols in the symbol table in lieu of using
4071 // nlist_idx in case we ever start trimming entries out
4072 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4073 N_BRAC_indexes.push_back(sym_idx);
4074 type = eSymbolTypeScopeBegin;
4075 break;
4077 case N_RBRAC:
4078 // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4079 // the N_LBRAC to the terminating index of this N_RBRAC so that we
4080 // can always skip the entire symbol if we need to navigate more
4081 // quickly at the source level when parsing STABS
4082 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4083 if (!N_BRAC_indexes.empty()) {
4084 symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back());
4085 symbol_ptr->SetByteSize(sym_idx + 1);
4086 symbol_ptr->SetSizeIsSibling(true);
4087 N_BRAC_indexes.pop_back();
4089 type = eSymbolTypeScopeEnd;
4090 break;
4092 case N_EXCL:
4093 // deleted include file: name,,NO_SECT,0,sum
4094 type = eSymbolTypeHeaderFile;
4095 break;
4097 // COMM scopes
4098 case N_BCOMM:
4099 // begin common: name,,NO_SECT,0,0
4100 // We use the current number of symbols in the symbol table in lieu
4101 // of using nlist_idx in case we ever start trimming entries out
4102 type = eSymbolTypeScopeBegin;
4103 N_COMM_indexes.push_back(sym_idx);
4104 break;
4106 case N_ECOML:
4107 // end common (local name): 0,,n_sect,0,address
4108 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4109 [[fallthrough]];
4111 case N_ECOMM:
4112 // end common: name,,n_sect,0,0
4113 // Set the size of the N_BCOMM to the terminating index of this
4114 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4115 // we need to navigate more quickly at the source level when
4116 // parsing STABS
4117 if (!N_COMM_indexes.empty()) {
4118 symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back());
4119 symbol_ptr->SetByteSize(sym_idx + 1);
4120 symbol_ptr->SetSizeIsSibling(true);
4121 N_COMM_indexes.pop_back();
4123 type = eSymbolTypeScopeEnd;
4124 break;
4126 case N_LENG:
4127 // second stab entry with length information
4128 type = eSymbolTypeAdditional;
4129 break;
4131 default:
4132 break;
4134 } else {
4135 uint8_t n_type = N_TYPE & nlist.n_type;
4136 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4138 switch (n_type) {
4139 case N_INDR: {
4140 const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
4141 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) {
4142 type = eSymbolTypeReExported;
4143 ConstString reexport_name(reexport_name_cstr +
4144 ((reexport_name_cstr[0] == '_') ? 1 : 0));
4145 sym[sym_idx].SetReExportedSymbolName(reexport_name);
4146 set_value = false;
4147 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4148 indirect_symbol_names.insert(
4149 ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4150 } else
4151 type = eSymbolTypeUndefined;
4152 } break;
4154 case N_UNDF:
4155 if (symbol_name && symbol_name[0]) {
4156 ConstString undefined_name(symbol_name +
4157 ((symbol_name[0] == '_') ? 1 : 0));
4158 undefined_name_to_desc[undefined_name] = nlist.n_desc;
4160 [[fallthrough]];
4162 case N_PBUD:
4163 type = eSymbolTypeUndefined;
4164 break;
4166 case N_ABS:
4167 type = eSymbolTypeAbsolute;
4168 break;
4170 case N_SECT: {
4171 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4173 if (!symbol_section) {
4174 // TODO: warn about this?
4175 add_nlist = false;
4176 break;
4179 if (TEXT_eh_frame_sectID == nlist.n_sect) {
4180 type = eSymbolTypeException;
4181 } else {
4182 uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4184 switch (section_type) {
4185 case S_CSTRING_LITERALS:
4186 type = eSymbolTypeData;
4187 break; // section with only literal C strings
4188 case S_4BYTE_LITERALS:
4189 type = eSymbolTypeData;
4190 break; // section with only 4 byte literals
4191 case S_8BYTE_LITERALS:
4192 type = eSymbolTypeData;
4193 break; // section with only 8 byte literals
4194 case S_LITERAL_POINTERS:
4195 type = eSymbolTypeTrampoline;
4196 break; // section with only pointers to literals
4197 case S_NON_LAZY_SYMBOL_POINTERS:
4198 type = eSymbolTypeTrampoline;
4199 break; // section with only non-lazy symbol pointers
4200 case S_LAZY_SYMBOL_POINTERS:
4201 type = eSymbolTypeTrampoline;
4202 break; // section with only lazy symbol pointers
4203 case S_SYMBOL_STUBS:
4204 type = eSymbolTypeTrampoline;
4205 break; // section with only symbol stubs, byte size of stub in
4206 // the reserved2 field
4207 case S_MOD_INIT_FUNC_POINTERS:
4208 type = eSymbolTypeCode;
4209 break; // section with only function pointers for initialization
4210 case S_MOD_TERM_FUNC_POINTERS:
4211 type = eSymbolTypeCode;
4212 break; // section with only function pointers for termination
4213 case S_INTERPOSING:
4214 type = eSymbolTypeTrampoline;
4215 break; // section with only pairs of function pointers for
4216 // interposing
4217 case S_16BYTE_LITERALS:
4218 type = eSymbolTypeData;
4219 break; // section with only 16 byte literals
4220 case S_DTRACE_DOF:
4221 type = eSymbolTypeInstrumentation;
4222 break;
4223 case S_LAZY_DYLIB_SYMBOL_POINTERS:
4224 type = eSymbolTypeTrampoline;
4225 break;
4226 default:
4227 switch (symbol_section->GetType()) {
4228 case lldb::eSectionTypeCode:
4229 type = eSymbolTypeCode;
4230 break;
4231 case eSectionTypeData:
4232 case eSectionTypeDataCString: // Inlined C string data
4233 case eSectionTypeDataCStringPointers: // Pointers to C string
4234 // data
4235 case eSectionTypeDataSymbolAddress: // Address of a symbol in
4236 // the symbol table
4237 case eSectionTypeData4:
4238 case eSectionTypeData8:
4239 case eSectionTypeData16:
4240 type = eSymbolTypeData;
4241 break;
4242 default:
4243 break;
4245 break;
4248 if (type == eSymbolTypeInvalid) {
4249 const char *symbol_sect_name =
4250 symbol_section->GetName().AsCString();
4251 if (symbol_section->IsDescendant(text_section_sp.get())) {
4252 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
4253 S_ATTR_SELF_MODIFYING_CODE |
4254 S_ATTR_SOME_INSTRUCTIONS))
4255 type = eSymbolTypeData;
4256 else
4257 type = eSymbolTypeCode;
4258 } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
4259 symbol_section->IsDescendant(
4260 data_dirty_section_sp.get()) ||
4261 symbol_section->IsDescendant(
4262 data_const_section_sp.get())) {
4263 if (symbol_sect_name &&
4264 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
4265 type = eSymbolTypeRuntime;
4267 if (symbol_name) {
4268 llvm::StringRef symbol_name_ref(symbol_name);
4269 if (symbol_name_ref.starts_with("_OBJC_")) {
4270 llvm::StringRef g_objc_v2_prefix_class(
4271 "_OBJC_CLASS_$_");
4272 llvm::StringRef g_objc_v2_prefix_metaclass(
4273 "_OBJC_METACLASS_$_");
4274 llvm::StringRef g_objc_v2_prefix_ivar(
4275 "_OBJC_IVAR_$_");
4276 if (symbol_name_ref.starts_with(g_objc_v2_prefix_class)) {
4277 symbol_name_non_abi_mangled = symbol_name + 1;
4278 symbol_name =
4279 symbol_name + g_objc_v2_prefix_class.size();
4280 type = eSymbolTypeObjCClass;
4281 demangled_is_synthesized = true;
4282 } else if (symbol_name_ref.starts_with(
4283 g_objc_v2_prefix_metaclass)) {
4284 symbol_name_non_abi_mangled = symbol_name + 1;
4285 symbol_name =
4286 symbol_name + g_objc_v2_prefix_metaclass.size();
4287 type = eSymbolTypeObjCMetaClass;
4288 demangled_is_synthesized = true;
4289 } else if (symbol_name_ref.starts_with(
4290 g_objc_v2_prefix_ivar)) {
4291 symbol_name_non_abi_mangled = symbol_name + 1;
4292 symbol_name =
4293 symbol_name + g_objc_v2_prefix_ivar.size();
4294 type = eSymbolTypeObjCIVar;
4295 demangled_is_synthesized = true;
4299 } else if (symbol_sect_name &&
4300 ::strstr(symbol_sect_name, "__gcc_except_tab") ==
4301 symbol_sect_name) {
4302 type = eSymbolTypeException;
4303 } else {
4304 type = eSymbolTypeData;
4306 } else if (symbol_sect_name &&
4307 ::strstr(symbol_sect_name, "__IMPORT") ==
4308 symbol_sect_name) {
4309 type = eSymbolTypeTrampoline;
4310 } else if (symbol_section->IsDescendant(objc_section_sp.get())) {
4311 type = eSymbolTypeRuntime;
4312 if (symbol_name && symbol_name[0] == '.') {
4313 llvm::StringRef symbol_name_ref(symbol_name);
4314 llvm::StringRef g_objc_v1_prefix_class(
4315 ".objc_class_name_");
4316 if (symbol_name_ref.starts_with(g_objc_v1_prefix_class)) {
4317 symbol_name_non_abi_mangled = symbol_name;
4318 symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4319 type = eSymbolTypeObjCClass;
4320 demangled_is_synthesized = true;
4326 } break;
4330 if (!add_nlist) {
4331 sym[sym_idx].Clear();
4332 return true;
4335 uint64_t symbol_value = nlist.n_value;
4337 if (symbol_name_non_abi_mangled) {
4338 sym[sym_idx].GetMangled().SetMangledName(
4339 ConstString(symbol_name_non_abi_mangled));
4340 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4341 } else {
4343 if (symbol_name && symbol_name[0] == '_') {
4344 symbol_name++; // Skip the leading underscore
4347 if (symbol_name) {
4348 ConstString const_symbol_name(symbol_name);
4349 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
4353 if (is_gsym) {
4354 const char *gsym_name = sym[sym_idx]
4355 .GetMangled()
4356 .GetName(Mangled::ePreferMangled)
4357 .GetCString();
4358 if (gsym_name)
4359 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4362 if (symbol_section) {
4363 const addr_t section_file_addr = symbol_section->GetFileAddress();
4364 if (symbol_byte_size == 0 && function_starts_count > 0) {
4365 addr_t symbol_lookup_file_addr = nlist.n_value;
4366 // Do an exact address match for non-ARM addresses, else get the
4367 // closest since the symbol might be a thumb symbol which has an
4368 // address with bit zero set.
4369 FunctionStarts::Entry *func_start_entry =
4370 function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
4371 if (is_arm && func_start_entry) {
4372 // Verify that the function start address is the symbol address
4373 // (ARM) or the symbol address + 1 (thumb).
4374 if (func_start_entry->addr != symbol_lookup_file_addr &&
4375 func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4376 // Not the right entry, NULL it out...
4377 func_start_entry = nullptr;
4380 if (func_start_entry) {
4381 func_start_entry->data = true;
4383 addr_t symbol_file_addr = func_start_entry->addr;
4384 if (is_arm)
4385 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4387 const FunctionStarts::Entry *next_func_start_entry =
4388 function_starts.FindNextEntry(func_start_entry);
4389 const addr_t section_end_file_addr =
4390 section_file_addr + symbol_section->GetByteSize();
4391 if (next_func_start_entry) {
4392 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4393 // Be sure the clear the Thumb address bit when we calculate the
4394 // size from the current and next address
4395 if (is_arm)
4396 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4397 symbol_byte_size = std::min<lldb::addr_t>(
4398 next_symbol_file_addr - symbol_file_addr,
4399 section_end_file_addr - symbol_file_addr);
4400 } else {
4401 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4405 symbol_value -= section_file_addr;
4408 if (!is_debug) {
4409 if (type == eSymbolTypeCode) {
4410 // See if we can find a N_FUN entry for any code symbols. If we do
4411 // find a match, and the name matches, then we can merge the two into
4412 // just the function symbol to avoid duplicate entries in the symbol
4413 // table.
4414 std::pair<ValueToSymbolIndexMap::const_iterator,
4415 ValueToSymbolIndexMap::const_iterator>
4416 range;
4417 range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
4418 if (range.first != range.second) {
4419 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4420 pos != range.second; ++pos) {
4421 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4422 sym[pos->second].GetMangled().GetName(
4423 Mangled::ePreferMangled)) {
4424 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4425 // We just need the flags from the linker symbol, so put these
4426 // flags into the N_FUN flags to avoid duplicate symbols in the
4427 // symbol table.
4428 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4429 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4430 if (resolver_addresses.find(nlist.n_value) !=
4431 resolver_addresses.end())
4432 sym[pos->second].SetType(eSymbolTypeResolver);
4433 sym[sym_idx].Clear();
4434 return true;
4437 } else {
4438 if (resolver_addresses.find(nlist.n_value) !=
4439 resolver_addresses.end())
4440 type = eSymbolTypeResolver;
4442 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4443 type == eSymbolTypeObjCMetaClass ||
4444 type == eSymbolTypeObjCIVar) {
4445 // See if we can find a N_STSYM entry for any data symbols. If we do
4446 // find a match, and the name matches, then we can merge the two into
4447 // just the Static symbol to avoid duplicate entries in the symbol
4448 // table.
4449 std::pair<ValueToSymbolIndexMap::const_iterator,
4450 ValueToSymbolIndexMap::const_iterator>
4451 range;
4452 range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
4453 if (range.first != range.second) {
4454 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4455 pos != range.second; ++pos) {
4456 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4457 sym[pos->second].GetMangled().GetName(
4458 Mangled::ePreferMangled)) {
4459 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4460 // We just need the flags from the linker symbol, so put these
4461 // flags into the N_STSYM flags to avoid duplicate symbols in
4462 // the symbol table.
4463 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4464 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4465 sym[sym_idx].Clear();
4466 return true;
4469 } else {
4470 // Combine N_GSYM stab entries with the non stab symbol.
4471 const char *gsym_name = sym[sym_idx]
4472 .GetMangled()
4473 .GetName(Mangled::ePreferMangled)
4474 .GetCString();
4475 if (gsym_name) {
4476 ConstNameToSymbolIndexMap::const_iterator pos =
4477 N_GSYM_name_to_sym_idx.find(gsym_name);
4478 if (pos != N_GSYM_name_to_sym_idx.end()) {
4479 const uint32_t GSYM_sym_idx = pos->second;
4480 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4481 // Copy the address, because often the N_GSYM address has an
4482 // invalid address of zero when the global is a common symbol.
4483 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4484 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4485 add_symbol_addr(
4486 sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4487 // We just need the flags from the linker symbol, so put these
4488 // flags into the N_GSYM flags to avoid duplicate symbols in
4489 // the symbol table.
4490 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4491 sym[sym_idx].Clear();
4492 return true;
4499 sym[sym_idx].SetID(nlist_idx);
4500 sym[sym_idx].SetType(type);
4501 if (set_value) {
4502 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4503 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4504 if (symbol_section)
4505 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4507 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4508 if (nlist.n_desc & N_WEAK_REF)
4509 sym[sym_idx].SetIsWeak(true);
4511 if (symbol_byte_size > 0)
4512 sym[sym_idx].SetByteSize(symbol_byte_size);
4514 if (demangled_is_synthesized)
4515 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4517 ++sym_idx;
4518 return true;
4521 // First parse all the nlists but don't process them yet. See the next
4522 // comment for an explanation why.
4523 std::vector<struct nlist_64> nlists;
4524 nlists.reserve(symtab_load_command.nsyms);
4525 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4526 if (auto nlist =
4527 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4528 nlists.push_back(*nlist);
4529 else
4530 break;
4533 // Now parse all the debug symbols. This is needed to merge non-debug
4534 // symbols in the next step. Non-debug symbols are always coalesced into
4535 // the debug symbol. Doing this in one step would mean that some symbols
4536 // won't be merged.
4537 nlist_idx = 0;
4538 for (auto &nlist : nlists) {
4539 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4540 break;
4543 // Finally parse all the non debug symbols.
4544 nlist_idx = 0;
4545 for (auto &nlist : nlists) {
4546 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4547 break;
4550 for (const auto &pos : reexport_shlib_needs_fixup) {
4551 const auto undef_pos = undefined_name_to_desc.find(pos.second);
4552 if (undef_pos != undefined_name_to_desc.end()) {
4553 const uint8_t dylib_ordinal =
4554 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
4555 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4556 sym[pos.first].SetReExportedSymbolSharedLibrary(
4557 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
4562 // Count how many trie symbols we'll add to the symbol table
4563 int trie_symbol_table_augment_count = 0;
4564 for (auto &e : external_sym_trie_entries) {
4565 if (!symbols_added.contains(e.entry.address))
4566 trie_symbol_table_augment_count++;
4569 if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4570 num_syms = sym_idx + trie_symbol_table_augment_count;
4571 sym = symtab.Resize(num_syms);
4573 uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4575 // Add symbols from the trie to the symbol table.
4576 for (auto &e : external_sym_trie_entries) {
4577 if (symbols_added.contains(e.entry.address))
4578 continue;
4580 // Find the section that this trie address is in, use that to annotate
4581 // symbol type as we add the trie address and name to the symbol table.
4582 Address symbol_addr;
4583 if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
4584 SectionSP symbol_section(symbol_addr.GetSection());
4585 const char *symbol_name = e.entry.name.GetCString();
4586 bool demangled_is_synthesized = false;
4587 SymbolType type =
4588 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4589 data_section_sp, data_dirty_section_sp,
4590 data_const_section_sp, symbol_section);
4592 sym[sym_idx].SetType(type);
4593 if (symbol_section) {
4594 sym[sym_idx].SetID(synthetic_sym_id++);
4595 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4596 if (demangled_is_synthesized)
4597 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4598 sym[sym_idx].SetIsSynthetic(true);
4599 sym[sym_idx].SetExternal(true);
4600 sym[sym_idx].GetAddressRef() = symbol_addr;
4601 add_symbol_addr(symbol_addr.GetFileAddress());
4602 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4603 sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB);
4604 ++sym_idx;
4609 if (function_starts_count > 0) {
4610 uint32_t num_synthetic_function_symbols = 0;
4611 for (i = 0; i < function_starts_count; ++i) {
4612 if (!symbols_added.contains(function_starts.GetEntryRef(i).addr))
4613 ++num_synthetic_function_symbols;
4616 if (num_synthetic_function_symbols > 0) {
4617 if (num_syms < sym_idx + num_synthetic_function_symbols) {
4618 num_syms = sym_idx + num_synthetic_function_symbols;
4619 sym = symtab.Resize(num_syms);
4621 for (i = 0; i < function_starts_count; ++i) {
4622 const FunctionStarts::Entry *func_start_entry =
4623 function_starts.GetEntryAtIndex(i);
4624 if (!symbols_added.contains(func_start_entry->addr)) {
4625 addr_t symbol_file_addr = func_start_entry->addr;
4626 uint32_t symbol_flags = 0;
4627 if (func_start_entry->data)
4628 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4629 Address symbol_addr;
4630 if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
4631 SectionSP symbol_section(symbol_addr.GetSection());
4632 uint32_t symbol_byte_size = 0;
4633 if (symbol_section) {
4634 const addr_t section_file_addr = symbol_section->GetFileAddress();
4635 const FunctionStarts::Entry *next_func_start_entry =
4636 function_starts.FindNextEntry(func_start_entry);
4637 const addr_t section_end_file_addr =
4638 section_file_addr + symbol_section->GetByteSize();
4639 if (next_func_start_entry) {
4640 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4641 if (is_arm)
4642 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4643 symbol_byte_size = std::min<lldb::addr_t>(
4644 next_symbol_file_addr - symbol_file_addr,
4645 section_end_file_addr - symbol_file_addr);
4646 } else {
4647 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4649 sym[sym_idx].SetID(synthetic_sym_id++);
4650 // Don't set the name for any synthetic symbols, the Symbol
4651 // object will generate one if needed when the name is accessed
4652 // via accessors.
4653 sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4654 sym[sym_idx].SetType(eSymbolTypeCode);
4655 sym[sym_idx].SetIsSynthetic(true);
4656 sym[sym_idx].GetAddressRef() = symbol_addr;
4657 add_symbol_addr(symbol_addr.GetFileAddress());
4658 if (symbol_flags)
4659 sym[sym_idx].SetFlags(symbol_flags);
4660 if (symbol_byte_size)
4661 sym[sym_idx].SetByteSize(symbol_byte_size);
4662 ++sym_idx;
4670 // Trim our symbols down to just what we ended up with after removing any
4671 // symbols.
4672 if (sym_idx < num_syms) {
4673 num_syms = sym_idx;
4674 sym = symtab.Resize(num_syms);
4677 // Now synthesize indirect symbols
4678 if (m_dysymtab.nindirectsyms != 0) {
4679 if (indirect_symbol_index_data.GetByteSize()) {
4680 NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4681 m_nlist_idx_to_sym_idx.end();
4683 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4684 ++sect_idx) {
4685 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4686 S_SYMBOL_STUBS) {
4687 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4688 if (symbol_stub_byte_size == 0)
4689 continue;
4691 const uint32_t num_symbol_stubs =
4692 m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4694 if (num_symbol_stubs == 0)
4695 continue;
4697 const uint32_t symbol_stub_index_offset =
4698 m_mach_sections[sect_idx].reserved1;
4699 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4700 const uint32_t symbol_stub_index =
4701 symbol_stub_index_offset + stub_idx;
4702 const lldb::addr_t symbol_stub_addr =
4703 m_mach_sections[sect_idx].addr +
4704 (stub_idx * symbol_stub_byte_size);
4705 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4706 if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4707 symbol_stub_offset, 4)) {
4708 const uint32_t stub_sym_id =
4709 indirect_symbol_index_data.GetU32(&symbol_stub_offset);
4710 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4711 continue;
4713 NListIndexToSymbolIndexMap::const_iterator index_pos =
4714 m_nlist_idx_to_sym_idx.find(stub_sym_id);
4715 Symbol *stub_symbol = nullptr;
4716 if (index_pos != end_index_pos) {
4717 // We have a remapping from the original nlist index to a
4718 // current symbol index, so just look this up by index
4719 stub_symbol = symtab.SymbolAtIndex(index_pos->second);
4720 } else {
4721 // We need to lookup a symbol using the original nlist symbol
4722 // index since this index is coming from the S_SYMBOL_STUBS
4723 stub_symbol = symtab.FindSymbolByID(stub_sym_id);
4726 if (stub_symbol) {
4727 Address so_addr(symbol_stub_addr, section_list);
4729 if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4730 // Change the external symbol into a trampoline that makes
4731 // sense These symbols were N_UNDF N_EXT, and are useless
4732 // to us, so we can re-use them so we don't have to make up
4733 // a synthetic symbol for no good reason.
4734 if (resolver_addresses.find(symbol_stub_addr) ==
4735 resolver_addresses.end())
4736 stub_symbol->SetType(eSymbolTypeTrampoline);
4737 else
4738 stub_symbol->SetType(eSymbolTypeResolver);
4739 stub_symbol->SetExternal(false);
4740 stub_symbol->GetAddressRef() = so_addr;
4741 stub_symbol->SetByteSize(symbol_stub_byte_size);
4742 } else {
4743 // Make a synthetic symbol to describe the trampoline stub
4744 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4745 if (sym_idx >= num_syms) {
4746 sym = symtab.Resize(++num_syms);
4747 stub_symbol = nullptr; // this pointer no longer valid
4749 sym[sym_idx].SetID(synthetic_sym_id++);
4750 sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4751 if (resolver_addresses.find(symbol_stub_addr) ==
4752 resolver_addresses.end())
4753 sym[sym_idx].SetType(eSymbolTypeTrampoline);
4754 else
4755 sym[sym_idx].SetType(eSymbolTypeResolver);
4756 sym[sym_idx].SetIsSynthetic(true);
4757 sym[sym_idx].GetAddressRef() = so_addr;
4758 add_symbol_addr(so_addr.GetFileAddress());
4759 sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4760 ++sym_idx;
4762 } else {
4763 if (log)
4764 log->Warning("symbol stub referencing symbol table symbol "
4765 "%u that isn't in our minimal symbol table, "
4766 "fix this!!!",
4767 stub_sym_id);
4776 if (!reexport_trie_entries.empty()) {
4777 for (const auto &e : reexport_trie_entries) {
4778 if (e.entry.import_name) {
4779 // Only add indirect symbols from the Trie entries if we didn't have
4780 // a N_INDR nlist entry for this already
4781 if (indirect_symbol_names.find(e.entry.name) ==
4782 indirect_symbol_names.end()) {
4783 // Make a synthetic symbol to describe re-exported symbol.
4784 if (sym_idx >= num_syms)
4785 sym = symtab.Resize(++num_syms);
4786 sym[sym_idx].SetID(synthetic_sym_id++);
4787 sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4788 sym[sym_idx].SetType(eSymbolTypeReExported);
4789 sym[sym_idx].SetIsSynthetic(true);
4790 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4791 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4792 sym[sym_idx].SetReExportedSymbolSharedLibrary(
4793 dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
4795 ++sym_idx;
4802 void ObjectFileMachO::Dump(Stream *s) {
4803 ModuleSP module_sp(GetModule());
4804 if (module_sp) {
4805 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4806 s->Printf("%p: ", static_cast<void *>(this));
4807 s->Indent();
4808 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4809 s->PutCString("ObjectFileMachO64");
4810 else
4811 s->PutCString("ObjectFileMachO32");
4813 *s << ", file = '" << m_file;
4814 ModuleSpecList all_specs;
4815 ModuleSpec base_spec;
4816 GetAllArchSpecs(m_header, m_data, MachHeaderSizeFromMagic(m_header.magic),
4817 base_spec, all_specs);
4818 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4819 *s << "', triple";
4820 if (e)
4821 s->Printf("[%d]", i);
4822 *s << " = ";
4823 *s << all_specs.GetModuleSpecRefAtIndex(i)
4824 .GetArchitecture()
4825 .GetTriple()
4826 .getTriple();
4828 *s << "\n";
4829 SectionList *sections = GetSectionList();
4830 if (sections)
4831 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
4832 UINT32_MAX);
4834 if (m_symtab_up)
4835 m_symtab_up->Dump(s, nullptr, eSortOrderNone);
4839 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4840 const lldb_private::DataExtractor &data,
4841 lldb::offset_t lc_offset) {
4842 uint32_t i;
4843 llvm::MachO::uuid_command load_cmd;
4845 lldb::offset_t offset = lc_offset;
4846 for (i = 0; i < header.ncmds; ++i) {
4847 const lldb::offset_t cmd_offset = offset;
4848 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4849 break;
4851 if (load_cmd.cmd == LC_UUID) {
4852 const uint8_t *uuid_bytes = data.PeekData(offset, 16);
4854 if (uuid_bytes) {
4855 // OpenCL on Mac OS X uses the same UUID for each of its object files.
4856 // We pretend these object files have no UUID to prevent crashing.
4858 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4859 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4860 0xbb, 0x14, 0xf0, 0x0d};
4862 if (!memcmp(uuid_bytes, opencl_uuid, 16))
4863 return UUID();
4865 return UUID(uuid_bytes, 16);
4867 return UUID();
4869 offset = cmd_offset + load_cmd.cmdsize;
4871 return UUID();
4874 static llvm::StringRef GetOSName(uint32_t cmd) {
4875 switch (cmd) {
4876 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4877 return llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4878 case llvm::MachO::LC_VERSION_MIN_MACOSX:
4879 return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4880 case llvm::MachO::LC_VERSION_MIN_TVOS:
4881 return llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4882 case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4883 return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4884 default:
4885 llvm_unreachable("unexpected LC_VERSION load command");
4889 namespace {
4890 struct OSEnv {
4891 llvm::StringRef os_type;
4892 llvm::StringRef environment;
4893 OSEnv(uint32_t cmd) {
4894 switch (cmd) {
4895 case llvm::MachO::PLATFORM_MACOS:
4896 os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4897 return;
4898 case llvm::MachO::PLATFORM_IOS:
4899 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4900 return;
4901 case llvm::MachO::PLATFORM_TVOS:
4902 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4903 return;
4904 case llvm::MachO::PLATFORM_WATCHOS:
4905 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4906 return;
4907 case llvm::MachO::PLATFORM_BRIDGEOS:
4908 os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS);
4909 return;
4910 case llvm::MachO::PLATFORM_DRIVERKIT:
4911 os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit);
4912 return;
4913 case llvm::MachO::PLATFORM_MACCATALYST:
4914 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4915 environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI);
4916 return;
4917 case llvm::MachO::PLATFORM_IOSSIMULATOR:
4918 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4919 environment =
4920 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4921 return;
4922 case llvm::MachO::PLATFORM_TVOSSIMULATOR:
4923 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4924 environment =
4925 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4926 return;
4927 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
4928 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4929 environment =
4930 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4931 return;
4932 case llvm::MachO::PLATFORM_XROS:
4933 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS);
4934 return;
4935 case llvm::MachO::PLATFORM_XROS_SIMULATOR:
4936 os_type = llvm::Triple::getOSTypeName(llvm::Triple::XROS);
4937 environment =
4938 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4939 return;
4940 default: {
4941 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
4942 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
4948 struct MinOS {
4949 uint32_t major_version, minor_version, patch_version;
4950 MinOS(uint32_t version)
4951 : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
4952 patch_version(version & 0xffu) {}
4954 } // namespace
4956 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
4957 const lldb_private::DataExtractor &data,
4958 lldb::offset_t lc_offset,
4959 ModuleSpec &base_spec,
4960 lldb_private::ModuleSpecList &all_specs) {
4961 auto &base_arch = base_spec.GetArchitecture();
4962 base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype);
4963 if (!base_arch.IsValid())
4964 return;
4966 bool found_any = false;
4967 auto add_triple = [&](const llvm::Triple &triple) {
4968 auto spec = base_spec;
4969 spec.GetArchitecture().GetTriple() = triple;
4970 if (spec.GetArchitecture().IsValid()) {
4971 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
4972 all_specs.Append(spec);
4973 found_any = true;
4977 // Set OS to an unspecified unknown or a "*" so it can match any OS
4978 llvm::Triple base_triple = base_arch.GetTriple();
4979 base_triple.setOS(llvm::Triple::UnknownOS);
4980 base_triple.setOSName(llvm::StringRef());
4982 if (header.filetype == MH_PRELOAD) {
4983 if (header.cputype == CPU_TYPE_ARM) {
4984 // If this is a 32-bit arm binary, and it's a standalone binary, force
4985 // the Vendor to Apple so we don't accidentally pick up the generic
4986 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
4987 // frame pointer register; most other armv7 ABIs use a combination of
4988 // r7 and r11.
4989 base_triple.setVendor(llvm::Triple::Apple);
4990 } else {
4991 // Set vendor to an unspecified unknown or a "*" so it can match any
4992 // vendor This is required for correct behavior of EFI debugging on
4993 // x86_64
4994 base_triple.setVendor(llvm::Triple::UnknownVendor);
4995 base_triple.setVendorName(llvm::StringRef());
4997 return add_triple(base_triple);
5000 llvm::MachO::load_command load_cmd;
5002 // See if there is an LC_VERSION_MIN_* load command that can give
5003 // us the OS type.
5004 lldb::offset_t offset = lc_offset;
5005 for (uint32_t i = 0; i < header.ncmds; ++i) {
5006 const lldb::offset_t cmd_offset = offset;
5007 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
5008 break;
5010 llvm::MachO::version_min_command version_min;
5011 switch (load_cmd.cmd) {
5012 case llvm::MachO::LC_VERSION_MIN_MACOSX:
5013 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
5014 case llvm::MachO::LC_VERSION_MIN_TVOS:
5015 case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
5016 if (load_cmd.cmdsize != sizeof(version_min))
5017 break;
5018 if (data.ExtractBytes(cmd_offset, sizeof(version_min),
5019 data.GetByteOrder(), &version_min) == 0)
5020 break;
5021 MinOS min_os(version_min.version);
5022 llvm::SmallString<32> os_name;
5023 llvm::raw_svector_ostream os(os_name);
5024 os << GetOSName(load_cmd.cmd) << min_os.major_version << '.'
5025 << min_os.minor_version << '.' << min_os.patch_version;
5027 auto triple = base_triple;
5028 triple.setOSName(os.str());
5030 // Disambiguate legacy simulator platforms.
5031 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
5032 (base_triple.getArch() == llvm::Triple::x86_64 ||
5033 base_triple.getArch() == llvm::Triple::x86)) {
5034 // The combination of legacy LC_VERSION_MIN load command and
5035 // x86 architecture always indicates a simulator environment.
5036 // The combination of LC_VERSION_MIN and arm architecture only
5037 // appears for native binaries. Back-deploying simulator
5038 // binaries on Apple Silicon Macs use the modern unambigous
5039 // LC_BUILD_VERSION load commands; no special handling required.
5040 triple.setEnvironment(llvm::Triple::Simulator);
5042 add_triple(triple);
5043 break;
5045 default:
5046 break;
5049 offset = cmd_offset + load_cmd.cmdsize;
5052 // See if there are LC_BUILD_VERSION load commands that can give
5053 // us the OS type.
5054 offset = lc_offset;
5055 for (uint32_t i = 0; i < header.ncmds; ++i) {
5056 const lldb::offset_t cmd_offset = offset;
5057 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
5058 break;
5060 do {
5061 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
5062 llvm::MachO::build_version_command build_version;
5063 if (load_cmd.cmdsize < sizeof(build_version)) {
5064 // Malformed load command.
5065 break;
5067 if (data.ExtractBytes(cmd_offset, sizeof(build_version),
5068 data.GetByteOrder(), &build_version) == 0)
5069 break;
5070 MinOS min_os(build_version.minos);
5071 OSEnv os_env(build_version.platform);
5072 llvm::SmallString<16> os_name;
5073 llvm::raw_svector_ostream os(os_name);
5074 os << os_env.os_type << min_os.major_version << '.'
5075 << min_os.minor_version << '.' << min_os.patch_version;
5076 auto triple = base_triple;
5077 triple.setOSName(os.str());
5078 os_name.clear();
5079 if (!os_env.environment.empty())
5080 triple.setEnvironmentName(os_env.environment);
5081 add_triple(triple);
5083 } while (false);
5084 offset = cmd_offset + load_cmd.cmdsize;
5087 if (!found_any) {
5088 add_triple(base_triple);
5092 ArchSpec ObjectFileMachO::GetArchitecture(
5093 ModuleSP module_sp, const llvm::MachO::mach_header &header,
5094 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5095 ModuleSpecList all_specs;
5096 ModuleSpec base_spec;
5097 GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic),
5098 base_spec, all_specs);
5100 // If the object file offers multiple alternative load commands,
5101 // pick the one that matches the module.
5102 if (module_sp) {
5103 const ArchSpec &module_arch = module_sp->GetArchitecture();
5104 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5105 ArchSpec mach_arch =
5106 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture();
5107 if (module_arch.IsCompatibleMatch(mach_arch))
5108 return mach_arch;
5112 // Return the first arch we found.
5113 if (all_specs.GetSize() == 0)
5114 return {};
5115 return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture();
5118 UUID ObjectFileMachO::GetUUID() {
5119 ModuleSP module_sp(GetModule());
5120 if (module_sp) {
5121 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5122 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
5123 return GetUUID(m_header, m_data, offset);
5125 return UUID();
5128 uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) {
5129 ModuleSP module_sp = GetModule();
5130 if (!module_sp)
5131 return 0;
5133 uint32_t count = 0;
5134 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5135 llvm::MachO::load_command load_cmd;
5136 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
5137 std::vector<std::string> rpath_paths;
5138 std::vector<std::string> rpath_relative_paths;
5139 std::vector<std::string> at_exec_relative_paths;
5140 uint32_t i;
5141 for (i = 0; i < m_header.ncmds; ++i) {
5142 const uint32_t cmd_offset = offset;
5143 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5144 break;
5146 switch (load_cmd.cmd) {
5147 case LC_RPATH:
5148 case LC_LOAD_DYLIB:
5149 case LC_LOAD_WEAK_DYLIB:
5150 case LC_REEXPORT_DYLIB:
5151 case LC_LOAD_DYLINKER:
5152 case LC_LOADFVMLIB:
5153 case LC_LOAD_UPWARD_DYLIB: {
5154 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
5155 // For LC_LOAD_DYLIB there is an alternate encoding
5156 // which adds a uint32_t `flags` field for `DYLD_USE_*`
5157 // flags. This can be detected by a timestamp field with
5158 // the `DYLIB_USE_MARKER` constant value.
5159 bool is_delayed_init = false;
5160 uint32_t use_command_marker = m_data.GetU32(&offset);
5161 if (use_command_marker == 0x1a741800 /* DYLIB_USE_MARKER */) {
5162 offset += 4; /* uint32_t current_version */
5163 offset += 4; /* uint32_t compat_version */
5164 uint32_t flags = m_data.GetU32(&offset);
5165 // If this LC_LOAD_DYLIB is marked delay-init,
5166 // don't report it as a dependent library -- it
5167 // may be loaded in the process at some point,
5168 // but will most likely not be load at launch.
5169 if (flags & 0x08 /* DYLIB_USE_DELAYED_INIT */)
5170 is_delayed_init = true;
5172 const char *path = m_data.PeekCStr(name_offset);
5173 if (path && !is_delayed_init) {
5174 if (load_cmd.cmd == LC_RPATH)
5175 rpath_paths.push_back(path);
5176 else {
5177 if (path[0] == '@') {
5178 if (strncmp(path, "@rpath", strlen("@rpath")) == 0)
5179 rpath_relative_paths.push_back(path + strlen("@rpath"));
5180 else if (strncmp(path, "@executable_path",
5181 strlen("@executable_path")) == 0)
5182 at_exec_relative_paths.push_back(path +
5183 strlen("@executable_path"));
5184 } else {
5185 FileSpec file_spec(path);
5186 if (files.AppendIfUnique(file_spec))
5187 count++;
5191 } break;
5193 default:
5194 break;
5196 offset = cmd_offset + load_cmd.cmdsize;
5199 FileSpec this_file_spec(m_file);
5200 FileSystem::Instance().Resolve(this_file_spec);
5202 if (!rpath_paths.empty()) {
5203 // Fixup all LC_RPATH values to be absolute paths.
5204 const std::string this_directory =
5205 this_file_spec.GetDirectory().GetString();
5206 for (auto &rpath : rpath_paths) {
5207 if (llvm::StringRef(rpath).starts_with(g_loader_path))
5208 rpath = this_directory + rpath.substr(g_loader_path.size());
5209 else if (llvm::StringRef(rpath).starts_with(g_executable_path))
5210 rpath = this_directory + rpath.substr(g_executable_path.size());
5213 for (const auto &rpath_relative_path : rpath_relative_paths) {
5214 for (const auto &rpath : rpath_paths) {
5215 std::string path = rpath;
5216 path += rpath_relative_path;
5217 // It is OK to resolve this path because we must find a file on disk
5218 // for us to accept it anyway if it is rpath relative.
5219 FileSpec file_spec(path);
5220 FileSystem::Instance().Resolve(file_spec);
5221 if (FileSystem::Instance().Exists(file_spec) &&
5222 files.AppendIfUnique(file_spec)) {
5223 count++;
5224 break;
5230 // We may have @executable_paths but no RPATHS. Figure those out here.
5231 // Only do this if this object file is the executable. We have no way to
5232 // get back to the actual executable otherwise, so we won't get the right
5233 // path.
5234 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5235 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5236 for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5237 FileSpec file_spec =
5238 exec_dir.CopyByAppendingPathComponent(at_exec_relative_path);
5239 if (FileSystem::Instance().Exists(file_spec) &&
5240 files.AppendIfUnique(file_spec))
5241 count++;
5244 return count;
5247 lldb_private::Address ObjectFileMachO::GetEntryPointAddress() {
5248 // If the object file is not an executable it can't hold the entry point.
5249 // m_entry_point_address is initialized to an invalid address, so we can just
5250 // return that. If m_entry_point_address is valid it means we've found it
5251 // already, so return the cached value.
5253 if ((!IsExecutable() && !IsDynamicLoader()) ||
5254 m_entry_point_address.IsValid()) {
5255 return m_entry_point_address;
5258 // Otherwise, look for the UnixThread or Thread command. The data for the
5259 // Thread command is given in /usr/include/mach-o.h, but it is basically:
5261 // uint32_t flavor - this is the flavor argument you would pass to
5262 // thread_get_state
5263 // uint32_t count - this is the count of longs in the thread state data
5264 // struct XXX_thread_state state - this is the structure from
5265 // <machine/thread_status.h> corresponding to the flavor.
5266 // <repeat this trio>
5268 // So we just keep reading the various register flavors till we find the GPR
5269 // one, then read the PC out of there.
5270 // FIXME: We will need to have a "RegisterContext data provider" class at some
5271 // point that can get all the registers
5272 // out of data in this form & attach them to a given thread. That should
5273 // underlie the MacOS X User process plugin, and we'll also need it for the
5274 // MacOS X Core File process plugin. When we have that we can also use it
5275 // here.
5277 // For now we hard-code the offsets and flavors we need:
5281 ModuleSP module_sp(GetModule());
5282 if (module_sp) {
5283 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5284 llvm::MachO::load_command load_cmd;
5285 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
5286 uint32_t i;
5287 lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5288 bool done = false;
5290 for (i = 0; i < m_header.ncmds; ++i) {
5291 const lldb::offset_t cmd_offset = offset;
5292 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5293 break;
5295 switch (load_cmd.cmd) {
5296 case LC_UNIXTHREAD:
5297 case LC_THREAD: {
5298 while (offset < cmd_offset + load_cmd.cmdsize) {
5299 uint32_t flavor = m_data.GetU32(&offset);
5300 uint32_t count = m_data.GetU32(&offset);
5301 if (count == 0) {
5302 // We've gotten off somehow, log and exit;
5303 return m_entry_point_address;
5306 switch (m_header.cputype) {
5307 case llvm::MachO::CPU_TYPE_ARM:
5308 if (flavor == 1 ||
5309 flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5310 // from mach/arm/thread_status.h
5312 offset += 60; // This is the offset of pc in the GPR thread state
5313 // data structure.
5314 start_address = m_data.GetU32(&offset);
5315 done = true;
5317 break;
5318 case llvm::MachO::CPU_TYPE_ARM64:
5319 case llvm::MachO::CPU_TYPE_ARM64_32:
5320 if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5322 offset += 256; // This is the offset of pc in the GPR thread state
5323 // data structure.
5324 start_address = m_data.GetU64(&offset);
5325 done = true;
5327 break;
5328 case llvm::MachO::CPU_TYPE_I386:
5329 if (flavor ==
5330 1) // x86_THREAD_STATE32 from mach/i386/thread_status.h
5332 offset += 40; // This is the offset of eip in the GPR thread state
5333 // data structure.
5334 start_address = m_data.GetU32(&offset);
5335 done = true;
5337 break;
5338 case llvm::MachO::CPU_TYPE_X86_64:
5339 if (flavor ==
5340 4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5342 offset += 16 * 8; // This is the offset of rip in the GPR thread
5343 // state data structure.
5344 start_address = m_data.GetU64(&offset);
5345 done = true;
5347 break;
5348 default:
5349 return m_entry_point_address;
5351 // Haven't found the GPR flavor yet, skip over the data for this
5352 // flavor:
5353 if (done)
5354 break;
5355 offset += count * 4;
5357 } break;
5358 case LC_MAIN: {
5359 uint64_t entryoffset = m_data.GetU64(&offset);
5360 SectionSP text_segment_sp =
5361 GetSectionList()->FindSectionByName(GetSegmentNameTEXT());
5362 if (text_segment_sp) {
5363 done = true;
5364 start_address = text_segment_sp->GetFileAddress() + entryoffset;
5366 } break;
5368 default:
5369 break;
5371 if (done)
5372 break;
5374 // Go to the next load command:
5375 offset = cmd_offset + load_cmd.cmdsize;
5378 if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5379 if (GetSymtab()) {
5380 Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5381 ConstString("_dyld_start"), SymbolType::eSymbolTypeCode,
5382 Symtab::eDebugAny, Symtab::eVisibilityAny);
5383 if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5384 start_address = dyld_start_sym->GetAddress().GetFileAddress();
5389 if (start_address != LLDB_INVALID_ADDRESS) {
5390 // We got the start address from the load commands, so now resolve that
5391 // address in the sections of this ObjectFile:
5392 if (!m_entry_point_address.ResolveAddressUsingFileSections(
5393 start_address, GetSectionList())) {
5394 m_entry_point_address.Clear();
5396 } else {
5397 // We couldn't read the UnixThread load command - maybe it wasn't there.
5398 // As a fallback look for the "start" symbol in the main executable.
5400 ModuleSP module_sp(GetModule());
5402 if (module_sp) {
5403 SymbolContextList contexts;
5404 SymbolContext context;
5405 module_sp->FindSymbolsWithNameAndType(ConstString("start"),
5406 eSymbolTypeCode, contexts);
5407 if (contexts.GetSize()) {
5408 if (contexts.GetContextAtIndex(0, context))
5409 m_entry_point_address = context.symbol->GetAddress();
5415 return m_entry_point_address;
5418 lldb_private::Address ObjectFileMachO::GetBaseAddress() {
5419 lldb_private::Address header_addr;
5420 SectionList *section_list = GetSectionList();
5421 if (section_list) {
5422 SectionSP text_segment_sp(
5423 section_list->FindSectionByName(GetSegmentNameTEXT()));
5424 if (text_segment_sp) {
5425 header_addr.SetSection(text_segment_sp);
5426 header_addr.SetOffset(0);
5429 return header_addr;
5432 uint32_t ObjectFileMachO::GetNumThreadContexts() {
5433 ModuleSP module_sp(GetModule());
5434 if (module_sp) {
5435 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5436 if (!m_thread_context_offsets_valid) {
5437 m_thread_context_offsets_valid = true;
5438 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
5439 FileRangeArray::Entry file_range;
5440 llvm::MachO::thread_command thread_cmd;
5441 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5442 const uint32_t cmd_offset = offset;
5443 if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr)
5444 break;
5446 if (thread_cmd.cmd == LC_THREAD) {
5447 file_range.SetRangeBase(offset);
5448 file_range.SetByteSize(thread_cmd.cmdsize - 8);
5449 m_thread_context_offsets.Append(file_range);
5451 offset = cmd_offset + thread_cmd.cmdsize;
5455 return m_thread_context_offsets.GetSize();
5458 std::vector<std::tuple<offset_t, offset_t>>
5459 ObjectFileMachO::FindLC_NOTEByName(std::string name) {
5460 std::vector<std::tuple<offset_t, offset_t>> results;
5461 ModuleSP module_sp(GetModule());
5462 if (module_sp) {
5463 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5465 offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
5466 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5467 const uint32_t cmd_offset = offset;
5468 llvm::MachO::load_command lc = {};
5469 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5470 break;
5471 if (lc.cmd == LC_NOTE) {
5472 char data_owner[17];
5473 m_data.CopyData(offset, 16, data_owner);
5474 data_owner[16] = '\0';
5475 offset += 16;
5477 if (name == data_owner) {
5478 offset_t payload_offset = m_data.GetU64_unchecked(&offset);
5479 offset_t payload_size = m_data.GetU64_unchecked(&offset);
5480 results.push_back({payload_offset, payload_size});
5483 offset = cmd_offset + lc.cmdsize;
5486 return results;
5489 std::string ObjectFileMachO::GetIdentifierString() {
5490 Log *log(
5491 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader));
5492 ModuleSP module_sp(GetModule());
5493 if (module_sp) {
5494 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5496 auto lc_notes = FindLC_NOTEByName("kern ver str");
5497 for (auto lc_note : lc_notes) {
5498 offset_t payload_offset = std::get<0>(lc_note);
5499 offset_t payload_size = std::get<1>(lc_note);
5500 uint32_t version;
5501 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) {
5502 if (version == 1) {
5503 uint32_t strsize = payload_size - sizeof(uint32_t);
5504 std::string result(strsize, '\0');
5505 m_data.CopyData(payload_offset, strsize, result.data());
5506 LLDB_LOGF(log, "LC_NOTE 'kern ver str' found with text '%s'",
5507 result.c_str());
5508 return result;
5513 // Second, make a pass over the load commands looking for an obsolete
5514 // LC_IDENT load command.
5515 offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
5516 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5517 const uint32_t cmd_offset = offset;
5518 llvm::MachO::ident_command ident_command;
5519 if (m_data.GetU32(&offset, &ident_command, 2) == nullptr)
5520 break;
5521 if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
5522 std::string result(ident_command.cmdsize, '\0');
5523 if (m_data.CopyData(offset, ident_command.cmdsize, result.data()) ==
5524 ident_command.cmdsize) {
5525 LLDB_LOGF(log, "LC_IDENT found with text '%s'", result.c_str());
5526 return result;
5529 offset = cmd_offset + ident_command.cmdsize;
5532 return {};
5535 AddressableBits ObjectFileMachO::GetAddressableBits() {
5536 AddressableBits addressable_bits;
5538 Log *log(GetLog(LLDBLog::Process));
5539 ModuleSP module_sp(GetModule());
5540 if (module_sp) {
5541 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5542 auto lc_notes = FindLC_NOTEByName("addrable bits");
5543 for (auto lc_note : lc_notes) {
5544 offset_t payload_offset = std::get<0>(lc_note);
5545 uint32_t version;
5546 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr) {
5547 if (version == 3) {
5548 uint32_t num_addr_bits = m_data.GetU32_unchecked(&payload_offset);
5549 addressable_bits.SetAddressableBits(num_addr_bits);
5550 LLDB_LOGF(log,
5551 "LC_NOTE 'addrable bits' v3 found, value %d "
5552 "bits",
5553 num_addr_bits);
5555 if (version == 4) {
5556 uint32_t lo_addr_bits = m_data.GetU32_unchecked(&payload_offset);
5557 uint32_t hi_addr_bits = m_data.GetU32_unchecked(&payload_offset);
5559 if (lo_addr_bits == hi_addr_bits)
5560 addressable_bits.SetAddressableBits(lo_addr_bits);
5561 else
5562 addressable_bits.SetAddressableBits(lo_addr_bits, hi_addr_bits);
5563 LLDB_LOGF(log, "LC_NOTE 'addrable bits' v4 found, value %d & %d bits",
5564 lo_addr_bits, hi_addr_bits);
5569 return addressable_bits;
5572 bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value,
5573 bool &value_is_offset,
5574 UUID &uuid,
5575 ObjectFile::BinaryType &type) {
5576 Log *log(
5577 GetLog(LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader));
5578 value = LLDB_INVALID_ADDRESS;
5579 value_is_offset = false;
5580 uuid.Clear();
5581 uint32_t log2_pagesize = 0; // not currently passed up to caller
5582 uint32_t platform = 0; // not currently passed up to caller
5583 ModuleSP module_sp(GetModule());
5584 if (module_sp) {
5585 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5587 auto lc_notes = FindLC_NOTEByName("main bin spec");
5588 for (auto lc_note : lc_notes) {
5589 offset_t payload_offset = std::get<0>(lc_note);
5591 // struct main_bin_spec
5592 // {
5593 // uint32_t version; // currently 2
5594 // uint32_t type; // 0 == unspecified, 1 == kernel,
5595 // // 2 == user process,
5596 // // 3 == standalone binary
5597 // uint64_t address; // UINT64_MAX if address not specified
5598 // uint64_t slide; // slide, UINT64_MAX if unspecified
5599 // // 0 if no slide needs to be applied to
5600 // // file address
5601 // uuid_t uuid; // all zero's if uuid not specified
5602 // uint32_t log2_pagesize; // process page size in log base 2,
5603 // // e.g. 4k pages are 12.
5604 // // 0 for unspecified
5605 // uint32_t platform; // The Mach-O platform for this corefile.
5606 // // 0 for unspecified.
5607 // // The values are defined in
5608 // // <mach-o/loader.h>, PLATFORM_*.
5609 // } __attribute((packed));
5611 // "main bin spec" (main binary specification) data payload is
5612 // formatted:
5613 // uint32_t version [currently 1]
5614 // uint32_t type [0 == unspecified, 1 == kernel,
5615 // 2 == user process, 3 == firmware ]
5616 // uint64_t address [ UINT64_MAX if address not specified ]
5617 // uuid_t uuid [ all zero's if uuid not specified ]
5618 // uint32_t log2_pagesize [ process page size in log base
5619 // 2, e.g. 4k pages are 12.
5620 // 0 for unspecified ]
5621 // uint32_t unused [ for alignment ]
5623 uint32_t version;
5624 if (m_data.GetU32(&payload_offset, &version, 1) != nullptr &&
5625 version <= 2) {
5626 uint32_t binspec_type = 0;
5627 uuid_t raw_uuid;
5628 memset(raw_uuid, 0, sizeof(uuid_t));
5630 if (!m_data.GetU32(&payload_offset, &binspec_type, 1))
5631 return false;
5632 if (!m_data.GetU64(&payload_offset, &value, 1))
5633 return false;
5634 uint64_t slide = LLDB_INVALID_ADDRESS;
5635 if (version > 1 && !m_data.GetU64(&payload_offset, &slide, 1))
5636 return false;
5637 if (value == LLDB_INVALID_ADDRESS && slide != LLDB_INVALID_ADDRESS) {
5638 value = slide;
5639 value_is_offset = true;
5642 if (m_data.CopyData(payload_offset, sizeof(uuid_t), raw_uuid) != 0) {
5643 uuid = UUID(raw_uuid, sizeof(uuid_t));
5644 // convert the "main bin spec" type into our
5645 // ObjectFile::BinaryType enum
5646 const char *typestr = "unrecognized type";
5647 switch (binspec_type) {
5648 case 0:
5649 type = eBinaryTypeUnknown;
5650 typestr = "uknown";
5651 break;
5652 case 1:
5653 type = eBinaryTypeKernel;
5654 typestr = "xnu kernel";
5655 break;
5656 case 2:
5657 type = eBinaryTypeUser;
5658 typestr = "userland dyld";
5659 break;
5660 case 3:
5661 type = eBinaryTypeStandalone;
5662 typestr = "standalone";
5663 break;
5665 LLDB_LOGF(log,
5666 "LC_NOTE 'main bin spec' found, version %d type %d "
5667 "(%s), value 0x%" PRIx64 " value-is-slide==%s uuid %s",
5668 version, type, typestr, value,
5669 value_is_offset ? "true" : "false",
5670 uuid.GetAsString().c_str());
5671 if (!m_data.GetU32(&payload_offset, &log2_pagesize, 1))
5672 return false;
5673 if (version > 1 && !m_data.GetU32(&payload_offset, &platform, 1))
5674 return false;
5675 return true;
5680 return false;
5683 bool ObjectFileMachO::GetCorefileThreadExtraInfos(
5684 std::vector<lldb::tid_t> &tids) {
5685 tids.clear();
5686 ModuleSP module_sp(GetModule());
5687 if (module_sp) {
5688 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5690 Log *log(GetLog(LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread));
5691 auto lc_notes = FindLC_NOTEByName("process metadata");
5692 for (auto lc_note : lc_notes) {
5693 offset_t payload_offset = std::get<0>(lc_note);
5694 offset_t strsize = std::get<1>(lc_note);
5695 std::string buf(strsize, '\0');
5696 if (m_data.CopyData(payload_offset, strsize, buf.data()) != strsize) {
5697 LLDB_LOGF(log,
5698 "Unable to read %" PRIu64
5699 " bytes of 'process metadata' LC_NOTE JSON contents",
5700 strsize);
5701 return false;
5703 while (buf.back() == '\0')
5704 buf.resize(buf.size() - 1);
5705 StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(buf);
5706 StructuredData::Dictionary *dict = object_sp->GetAsDictionary();
5707 if (!dict) {
5708 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not "
5709 "get a dictionary.");
5710 return false;
5712 StructuredData::Array *threads;
5713 if (!dict->GetValueForKeyAsArray("threads", threads) || !threads) {
5714 LLDB_LOGF(log,
5715 "'process metadata' LC_NOTE does not have a 'threads' key");
5716 return false;
5718 if (threads->GetSize() != GetNumThreadContexts()) {
5719 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, number of "
5720 "threads does not match number of LC_THREADS.");
5721 return false;
5723 const size_t num_threads = threads->GetSize();
5724 for (size_t i = 0; i < num_threads; i++) {
5725 std::optional<StructuredData::Dictionary *> maybe_thread =
5726 threads->GetItemAtIndexAsDictionary(i);
5727 if (!maybe_thread) {
5728 LLDB_LOGF(log,
5729 "Unable to read 'process metadata' LC_NOTE, threads "
5730 "array does not have a dictionary at index %zu.",
5732 return false;
5734 StructuredData::Dictionary *thread = *maybe_thread;
5735 lldb::tid_t tid = LLDB_INVALID_THREAD_ID;
5736 if (thread->GetValueForKeyAsInteger<lldb::tid_t>("thread_id", tid))
5737 if (tid == 0)
5738 tid = LLDB_INVALID_THREAD_ID;
5739 tids.push_back(tid);
5742 if (log) {
5743 StreamString logmsg;
5744 logmsg.Printf("LC_NOTE 'process metadata' found: ");
5745 dict->Dump(logmsg, /* pretty_print */ false);
5746 LLDB_LOGF(log, "%s", logmsg.GetData());
5748 return true;
5751 return false;
5754 lldb::RegisterContextSP
5755 ObjectFileMachO::GetThreadContextAtIndex(uint32_t idx,
5756 lldb_private::Thread &thread) {
5757 lldb::RegisterContextSP reg_ctx_sp;
5759 ModuleSP module_sp(GetModule());
5760 if (module_sp) {
5761 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5762 if (!m_thread_context_offsets_valid)
5763 GetNumThreadContexts();
5765 const FileRangeArray::Entry *thread_context_file_range =
5766 m_thread_context_offsets.GetEntryAtIndex(idx);
5767 if (thread_context_file_range) {
5769 DataExtractor data(m_data, thread_context_file_range->GetRangeBase(),
5770 thread_context_file_range->GetByteSize());
5772 switch (m_header.cputype) {
5773 case llvm::MachO::CPU_TYPE_ARM64:
5774 case llvm::MachO::CPU_TYPE_ARM64_32:
5775 reg_ctx_sp =
5776 std::make_shared<RegisterContextDarwin_arm64_Mach>(thread, data);
5777 break;
5779 case llvm::MachO::CPU_TYPE_ARM:
5780 reg_ctx_sp =
5781 std::make_shared<RegisterContextDarwin_arm_Mach>(thread, data);
5782 break;
5784 case llvm::MachO::CPU_TYPE_I386:
5785 reg_ctx_sp =
5786 std::make_shared<RegisterContextDarwin_i386_Mach>(thread, data);
5787 break;
5789 case llvm::MachO::CPU_TYPE_X86_64:
5790 reg_ctx_sp =
5791 std::make_shared<RegisterContextDarwin_x86_64_Mach>(thread, data);
5792 break;
5796 return reg_ctx_sp;
5799 ObjectFile::Type ObjectFileMachO::CalculateType() {
5800 switch (m_header.filetype) {
5801 case MH_OBJECT: // 0x1u
5802 if (GetAddressByteSize() == 4) {
5803 // 32 bit kexts are just object files, but they do have a valid
5804 // UUID load command.
5805 if (GetUUID()) {
5806 // this checking for the UUID load command is not enough we could
5807 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5808 // this is required of kexts
5809 if (m_strata == eStrataInvalid)
5810 m_strata = eStrataKernel;
5811 return eTypeSharedLibrary;
5814 return eTypeObjectFile;
5816 case MH_EXECUTE:
5817 return eTypeExecutable; // 0x2u
5818 case MH_FVMLIB:
5819 return eTypeSharedLibrary; // 0x3u
5820 case MH_CORE:
5821 return eTypeCoreFile; // 0x4u
5822 case MH_PRELOAD:
5823 return eTypeSharedLibrary; // 0x5u
5824 case MH_DYLIB:
5825 return eTypeSharedLibrary; // 0x6u
5826 case MH_DYLINKER:
5827 return eTypeDynamicLinker; // 0x7u
5828 case MH_BUNDLE:
5829 return eTypeSharedLibrary; // 0x8u
5830 case MH_DYLIB_STUB:
5831 return eTypeStubLibrary; // 0x9u
5832 case MH_DSYM:
5833 return eTypeDebugInfo; // 0xAu
5834 case MH_KEXT_BUNDLE:
5835 return eTypeSharedLibrary; // 0xBu
5836 default:
5837 break;
5839 return eTypeUnknown;
5842 ObjectFile::Strata ObjectFileMachO::CalculateStrata() {
5843 switch (m_header.filetype) {
5844 case MH_OBJECT: // 0x1u
5846 // 32 bit kexts are just object files, but they do have a valid
5847 // UUID load command.
5848 if (GetUUID()) {
5849 // this checking for the UUID load command is not enough we could
5850 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5851 // this is required of kexts
5852 if (m_type == eTypeInvalid)
5853 m_type = eTypeSharedLibrary;
5855 return eStrataKernel;
5858 return eStrataUnknown;
5860 case MH_EXECUTE: // 0x2u
5861 // Check for the MH_DYLDLINK bit in the flags
5862 if (m_header.flags & MH_DYLDLINK) {
5863 return eStrataUser;
5864 } else {
5865 SectionList *section_list = GetSectionList();
5866 if (section_list) {
5867 static ConstString g_kld_section_name("__KLD");
5868 if (section_list->FindSectionByName(g_kld_section_name))
5869 return eStrataKernel;
5872 return eStrataRawImage;
5874 case MH_FVMLIB:
5875 return eStrataUser; // 0x3u
5876 case MH_CORE:
5877 return eStrataUnknown; // 0x4u
5878 case MH_PRELOAD:
5879 return eStrataRawImage; // 0x5u
5880 case MH_DYLIB:
5881 return eStrataUser; // 0x6u
5882 case MH_DYLINKER:
5883 return eStrataUser; // 0x7u
5884 case MH_BUNDLE:
5885 return eStrataUser; // 0x8u
5886 case MH_DYLIB_STUB:
5887 return eStrataUser; // 0x9u
5888 case MH_DSYM:
5889 return eStrataUnknown; // 0xAu
5890 case MH_KEXT_BUNDLE:
5891 return eStrataKernel; // 0xBu
5892 default:
5893 break;
5895 return eStrataUnknown;
5898 llvm::VersionTuple ObjectFileMachO::GetVersion() {
5899 ModuleSP module_sp(GetModule());
5900 if (module_sp) {
5901 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5902 llvm::MachO::dylib_command load_cmd;
5903 lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
5904 uint32_t version_cmd = 0;
5905 uint64_t version = 0;
5906 uint32_t i;
5907 for (i = 0; i < m_header.ncmds; ++i) {
5908 const lldb::offset_t cmd_offset = offset;
5909 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5910 break;
5912 if (load_cmd.cmd == LC_ID_DYLIB) {
5913 if (version_cmd == 0) {
5914 version_cmd = load_cmd.cmd;
5915 if (m_data.GetU32(&offset, &load_cmd.dylib, 4) == nullptr)
5916 break;
5917 version = load_cmd.dylib.current_version;
5919 break; // Break for now unless there is another more complete version
5920 // number load command in the future.
5922 offset = cmd_offset + load_cmd.cmdsize;
5925 if (version_cmd == LC_ID_DYLIB) {
5926 unsigned major = (version & 0xFFFF0000ull) >> 16;
5927 unsigned minor = (version & 0x0000FF00ull) >> 8;
5928 unsigned subminor = (version & 0x000000FFull);
5929 return llvm::VersionTuple(major, minor, subminor);
5932 return llvm::VersionTuple();
5935 ArchSpec ObjectFileMachO::GetArchitecture() {
5936 ModuleSP module_sp(GetModule());
5937 ArchSpec arch;
5938 if (module_sp) {
5939 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5941 return GetArchitecture(module_sp, m_header, m_data,
5942 MachHeaderSizeFromMagic(m_header.magic));
5944 return arch;
5947 void ObjectFileMachO::GetProcessSharedCacheUUID(Process *process,
5948 addr_t &base_addr, UUID &uuid) {
5949 uuid.Clear();
5950 base_addr = LLDB_INVALID_ADDRESS;
5951 if (process && process->GetDynamicLoader()) {
5952 DynamicLoader *dl = process->GetDynamicLoader();
5953 LazyBool using_shared_cache;
5954 LazyBool private_shared_cache;
5955 dl->GetSharedCacheInformation(base_addr, uuid, using_shared_cache,
5956 private_shared_cache);
5958 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
5959 LLDB_LOGF(
5960 log,
5961 "inferior process shared cache has a UUID of %s, base address 0x%" PRIx64,
5962 uuid.GetAsString().c_str(), base_addr);
5965 // From dyld SPI header dyld_process_info.h
5966 typedef void *dyld_process_info;
5967 struct lldb_copy__dyld_process_cache_info {
5968 uuid_t cacheUUID; // UUID of cache used by process
5969 uint64_t cacheBaseAddress; // load address of dyld shared cache
5970 bool noCache; // process is running without a dyld cache
5971 bool privateCache; // process is using a private copy of its dyld cache
5974 // #including mach/mach.h pulls in machine.h & CPU_TYPE_ARM etc conflicts with
5975 // llvm enum definitions llvm::MachO::CPU_TYPE_ARM turning them into compile
5976 // errors. So we need to use the actual underlying types of task_t and
5977 // kern_return_t below.
5978 extern "C" unsigned int /*task_t*/ mach_task_self();
5980 void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) {
5981 uuid.Clear();
5982 base_addr = LLDB_INVALID_ADDRESS;
5984 #if defined(__APPLE__)
5985 uint8_t *(*dyld_get_all_image_infos)(void);
5986 dyld_get_all_image_infos =
5987 (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos");
5988 if (dyld_get_all_image_infos) {
5989 uint8_t *dyld_all_image_infos_address = dyld_get_all_image_infos();
5990 if (dyld_all_image_infos_address) {
5991 uint32_t *version = (uint32_t *)
5992 dyld_all_image_infos_address; // version <mach-o/dyld_images.h>
5993 if (*version >= 13) {
5994 uuid_t *sharedCacheUUID_address = 0;
5995 int wordsize = sizeof(uint8_t *);
5996 if (wordsize == 8) {
5997 sharedCacheUUID_address =
5998 (uuid_t *)((uint8_t *)dyld_all_image_infos_address +
5999 160); // sharedCacheUUID <mach-o/dyld_images.h>
6000 if (*version >= 15)
6001 base_addr =
6002 *(uint64_t
6003 *)((uint8_t *)dyld_all_image_infos_address +
6004 176); // sharedCacheBaseAddress <mach-o/dyld_images.h>
6005 } else {
6006 sharedCacheUUID_address =
6007 (uuid_t *)((uint8_t *)dyld_all_image_infos_address +
6008 84); // sharedCacheUUID <mach-o/dyld_images.h>
6009 if (*version >= 15) {
6010 base_addr = 0;
6011 base_addr =
6012 *(uint32_t
6013 *)((uint8_t *)dyld_all_image_infos_address +
6014 100); // sharedCacheBaseAddress <mach-o/dyld_images.h>
6017 uuid = UUID(sharedCacheUUID_address, sizeof(uuid_t));
6020 } else {
6021 // Exists in macOS 10.12 and later, iOS 10.0 and later - dyld SPI
6022 dyld_process_info (*dyld_process_info_create)(
6023 unsigned int /* task_t */ task, uint64_t timestamp,
6024 unsigned int /*kern_return_t*/ *kernelError);
6025 void (*dyld_process_info_get_cache)(void *info, void *cacheInfo);
6026 void (*dyld_process_info_release)(dyld_process_info info);
6028 dyld_process_info_create = (void *(*)(unsigned int /* task_t */, uint64_t,
6029 unsigned int /*kern_return_t*/ *))
6030 dlsym(RTLD_DEFAULT, "_dyld_process_info_create");
6031 dyld_process_info_get_cache = (void (*)(void *, void *))dlsym(
6032 RTLD_DEFAULT, "_dyld_process_info_get_cache");
6033 dyld_process_info_release =
6034 (void (*)(void *))dlsym(RTLD_DEFAULT, "_dyld_process_info_release");
6036 if (dyld_process_info_create && dyld_process_info_get_cache) {
6037 unsigned int /*kern_return_t */ kern_ret;
6038 dyld_process_info process_info =
6039 dyld_process_info_create(::mach_task_self(), 0, &kern_ret);
6040 if (process_info) {
6041 struct lldb_copy__dyld_process_cache_info sc_info;
6042 memset(&sc_info, 0, sizeof(struct lldb_copy__dyld_process_cache_info));
6043 dyld_process_info_get_cache(process_info, &sc_info);
6044 if (sc_info.cacheBaseAddress != 0) {
6045 base_addr = sc_info.cacheBaseAddress;
6046 uuid = UUID(sc_info.cacheUUID, sizeof(uuid_t));
6048 dyld_process_info_release(process_info);
6052 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
6053 if (log && uuid.IsValid())
6054 LLDB_LOGF(log,
6055 "lldb's in-memory shared cache has a UUID of %s base address of "
6056 "0x%" PRIx64,
6057 uuid.GetAsString().c_str(), base_addr);
6058 #endif
6061 static llvm::VersionTuple FindMinimumVersionInfo(DataExtractor &data,
6062 lldb::offset_t offset,
6063 size_t ncmds) {
6064 for (size_t i = 0; i < ncmds; i++) {
6065 const lldb::offset_t load_cmd_offset = offset;
6066 llvm::MachO::load_command lc = {};
6067 if (data.GetU32(&offset, &lc.cmd, 2) == nullptr)
6068 break;
6070 uint32_t version = 0;
6071 if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX ||
6072 lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS ||
6073 lc.cmd == llvm::MachO::LC_VERSION_MIN_TVOS ||
6074 lc.cmd == llvm::MachO::LC_VERSION_MIN_WATCHOS) {
6075 // struct version_min_command {
6076 // uint32_t cmd; // LC_VERSION_MIN_*
6077 // uint32_t cmdsize;
6078 // uint32_t version; // X.Y.Z encoded in nibbles xxxx.yy.zz
6079 // uint32_t sdk;
6080 // };
6081 // We want to read version.
6082 version = data.GetU32(&offset);
6083 } else if (lc.cmd == llvm::MachO::LC_BUILD_VERSION) {
6084 // struct build_version_command {
6085 // uint32_t cmd; // LC_BUILD_VERSION
6086 // uint32_t cmdsize;
6087 // uint32_t platform;
6088 // uint32_t minos; // X.Y.Z encoded in nibbles xxxx.yy.zz
6089 // uint32_t sdk;
6090 // uint32_t ntools;
6091 // };
6092 // We want to read minos.
6093 offset += sizeof(uint32_t); // Skip over platform
6094 version = data.GetU32(&offset); // Extract minos
6097 if (version) {
6098 const uint32_t xxxx = version >> 16;
6099 const uint32_t yy = (version >> 8) & 0xffu;
6100 const uint32_t zz = version & 0xffu;
6101 if (xxxx)
6102 return llvm::VersionTuple(xxxx, yy, zz);
6104 offset = load_cmd_offset + lc.cmdsize;
6106 return llvm::VersionTuple();
6109 llvm::VersionTuple ObjectFileMachO::GetMinimumOSVersion() {
6110 if (!m_min_os_version)
6111 m_min_os_version = FindMinimumVersionInfo(
6112 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds);
6113 return *m_min_os_version;
6116 llvm::VersionTuple ObjectFileMachO::GetSDKVersion() {
6117 if (!m_sdk_versions)
6118 m_sdk_versions = FindMinimumVersionInfo(
6119 m_data, MachHeaderSizeFromMagic(m_header.magic), m_header.ncmds);
6120 return *m_sdk_versions;
6123 bool ObjectFileMachO::GetIsDynamicLinkEditor() {
6124 return m_header.filetype == llvm::MachO::MH_DYLINKER;
6127 bool ObjectFileMachO::CanTrustAddressRanges() {
6128 // Dsymutil guarantees that the .debug_aranges accelerator is complete and can
6129 // be trusted by LLDB.
6130 return m_header.filetype == llvm::MachO::MH_DSYM;
6133 bool ObjectFileMachO::AllowAssemblyEmulationUnwindPlans() {
6134 return m_allow_assembly_emulation_unwind_plans;
6137 Section *ObjectFileMachO::GetMachHeaderSection() {
6138 // Find the first address of the mach header which is the first non-zero file
6139 // sized section whose file offset is zero. This is the base file address of
6140 // the mach-o file which can be subtracted from the vmaddr of the other
6141 // segments found in memory and added to the load address
6142 ModuleSP module_sp = GetModule();
6143 if (!module_sp)
6144 return nullptr;
6145 SectionList *section_list = GetSectionList();
6146 if (!section_list)
6147 return nullptr;
6149 // Some binaries can have a TEXT segment with a non-zero file offset.
6150 // Binaries in the shared cache are one example. Some hand-generated
6151 // binaries may not be laid out in the normal TEXT,DATA,LC_SYMTAB order
6152 // in the file, even though they're laid out correctly in vmaddr terms.
6153 SectionSP text_segment_sp =
6154 section_list->FindSectionByName(GetSegmentNameTEXT());
6155 if (text_segment_sp.get() && SectionIsLoadable(text_segment_sp.get()))
6156 return text_segment_sp.get();
6158 const size_t num_sections = section_list->GetSize();
6159 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6160 Section *section = section_list->GetSectionAtIndex(sect_idx).get();
6161 if (section->GetFileOffset() == 0 && SectionIsLoadable(section))
6162 return section;
6165 return nullptr;
6168 bool ObjectFileMachO::SectionIsLoadable(const Section *section) {
6169 if (!section)
6170 return false;
6171 if (section->IsThreadSpecific())
6172 return false;
6173 if (GetModule().get() != section->GetModule().get())
6174 return false;
6175 // firmware style binaries with llvm gcov segment do
6176 // not have that segment mapped into memory.
6177 if (section->GetName() == GetSegmentNameLLVM_COV()) {
6178 const Strata strata = GetStrata();
6179 if (strata == eStrataKernel || strata == eStrataRawImage)
6180 return false;
6182 // Be careful with __LINKEDIT and __DWARF segments
6183 if (section->GetName() == GetSegmentNameLINKEDIT() ||
6184 section->GetName() == GetSegmentNameDWARF()) {
6185 // Only map __LINKEDIT and __DWARF if we have an in memory image and
6186 // this isn't a kernel binary like a kext or mach_kernel.
6187 const bool is_memory_image = (bool)m_process_wp.lock();
6188 const Strata strata = GetStrata();
6189 if (is_memory_image == false || strata == eStrataKernel)
6190 return false;
6192 return true;
6195 lldb::addr_t ObjectFileMachO::CalculateSectionLoadAddressForMemoryImage(
6196 lldb::addr_t header_load_address, const Section *header_section,
6197 const Section *section) {
6198 ModuleSP module_sp = GetModule();
6199 if (module_sp && header_section && section &&
6200 header_load_address != LLDB_INVALID_ADDRESS) {
6201 lldb::addr_t file_addr = header_section->GetFileAddress();
6202 if (file_addr != LLDB_INVALID_ADDRESS && SectionIsLoadable(section))
6203 return section->GetFileAddress() - file_addr + header_load_address;
6205 return LLDB_INVALID_ADDRESS;
6208 bool ObjectFileMachO::SetLoadAddress(Target &target, lldb::addr_t value,
6209 bool value_is_offset) {
6210 Log *log(GetLog(LLDBLog::DynamicLoader));
6211 ModuleSP module_sp = GetModule();
6212 if (!module_sp)
6213 return false;
6215 SectionList *section_list = GetSectionList();
6216 if (!section_list)
6217 return false;
6219 size_t num_loaded_sections = 0;
6220 const size_t num_sections = section_list->GetSize();
6222 // Warn if some top-level segments map to the same address. The binary may be
6223 // malformed.
6224 const bool warn_multiple = true;
6226 if (log) {
6227 StreamString logmsg;
6228 logmsg << "ObjectFileMachO::SetLoadAddress ";
6229 if (GetFileSpec())
6230 logmsg << "path='" << GetFileSpec().GetPath() << "' ";
6231 if (GetUUID()) {
6232 logmsg << "uuid=" << GetUUID().GetAsString();
6234 LLDB_LOGF(log, "%s", logmsg.GetData());
6236 if (value_is_offset) {
6237 // "value" is an offset to apply to each top level segment
6238 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6239 // Iterate through the object file sections to find all of the
6240 // sections that size on disk (to avoid __PAGEZERO) and load them
6241 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
6242 if (SectionIsLoadable(section_sp.get())) {
6243 LLDB_LOGF(log,
6244 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is "
6245 "0x%" PRIx64,
6246 section_sp->GetName().AsCString(),
6247 section_sp->GetFileAddress() + value);
6248 if (target.GetSectionLoadList().SetSectionLoadAddress(
6249 section_sp, section_sp->GetFileAddress() + value,
6250 warn_multiple))
6251 ++num_loaded_sections;
6254 } else {
6255 // "value" is the new base address of the mach_header, adjust each
6256 // section accordingly
6258 Section *mach_header_section = GetMachHeaderSection();
6259 if (mach_header_section) {
6260 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6261 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
6263 lldb::addr_t section_load_addr =
6264 CalculateSectionLoadAddressForMemoryImage(
6265 value, mach_header_section, section_sp.get());
6266 if (section_load_addr != LLDB_INVALID_ADDRESS) {
6267 LLDB_LOGF(log,
6268 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is "
6269 "0x%" PRIx64,
6270 section_sp->GetName().AsCString(), section_load_addr);
6271 if (target.GetSectionLoadList().SetSectionLoadAddress(
6272 section_sp, section_load_addr, warn_multiple))
6273 ++num_loaded_sections;
6278 return num_loaded_sections > 0;
6281 struct all_image_infos_header {
6282 uint32_t version; // currently 1
6283 uint32_t imgcount; // number of binary images
6284 uint64_t entries_fileoff; // file offset in the corefile of where the array of
6285 // struct entry's begin.
6286 uint32_t entries_size; // size of 'struct entry'.
6287 uint32_t unused;
6290 struct image_entry {
6291 uint64_t filepath_offset; // offset in corefile to c-string of the file path,
6292 // UINT64_MAX if unavailable.
6293 uuid_t uuid; // uint8_t[16]. should be set to all zeroes if
6294 // uuid is unknown.
6295 uint64_t load_address; // UINT64_MAX if unknown.
6296 uint64_t seg_addrs_offset; // offset to the array of struct segment_vmaddr's.
6297 uint32_t segment_count; // The number of segments for this binary.
6298 uint32_t unused;
6300 image_entry() {
6301 filepath_offset = UINT64_MAX;
6302 memset(&uuid, 0, sizeof(uuid_t));
6303 segment_count = 0;
6304 load_address = UINT64_MAX;
6305 seg_addrs_offset = UINT64_MAX;
6306 unused = 0;
6308 image_entry(const image_entry &rhs) {
6309 filepath_offset = rhs.filepath_offset;
6310 memcpy(&uuid, &rhs.uuid, sizeof(uuid_t));
6311 segment_count = rhs.segment_count;
6312 seg_addrs_offset = rhs.seg_addrs_offset;
6313 load_address = rhs.load_address;
6314 unused = rhs.unused;
6318 struct segment_vmaddr {
6319 char segname[16];
6320 uint64_t vmaddr;
6321 uint64_t unused;
6323 segment_vmaddr() {
6324 memset(&segname, 0, 16);
6325 vmaddr = UINT64_MAX;
6326 unused = 0;
6328 segment_vmaddr(const segment_vmaddr &rhs) {
6329 memcpy(&segname, &rhs.segname, 16);
6330 vmaddr = rhs.vmaddr;
6331 unused = rhs.unused;
6335 // Write the payload for the "all image infos" LC_NOTE into
6336 // the supplied all_image_infos_payload, assuming that this
6337 // will be written into the corefile starting at
6338 // initial_file_offset.
6340 // The placement of this payload is a little tricky. We're
6341 // laying this out as
6343 // 1. header (struct all_image_info_header)
6344 // 2. Array of fixed-size (struct image_entry)'s, one
6345 // per binary image present in the process.
6346 // 3. Arrays of (struct segment_vmaddr)'s, a varying number
6347 // for each binary image.
6348 // 4. Variable length c-strings of binary image filepaths,
6349 // one per binary.
6351 // To compute where everything will be laid out in the
6352 // payload, we need to iterate over the images and calculate
6353 // how many segment_vmaddr structures each image will need,
6354 // and how long each image's filepath c-string is. There
6355 // are some multiple passes over the image list while calculating
6356 // everything.
6358 static offset_t
6359 CreateAllImageInfosPayload(const lldb::ProcessSP &process_sp,
6360 offset_t initial_file_offset,
6361 StreamString &all_image_infos_payload,
6362 lldb_private::SaveCoreOptions &options) {
6363 Target &target = process_sp->GetTarget();
6364 ModuleList modules = target.GetImages();
6366 // stack-only corefiles have no reason to include binaries that
6367 // are not executing; we're trying to make the smallest corefile
6368 // we can, so leave the rest out.
6369 if (options.GetStyle() == SaveCoreStyle::eSaveCoreStackOnly)
6370 modules.Clear();
6372 std::set<std::string> executing_uuids;
6373 std::vector<ThreadSP> thread_list =
6374 process_sp->CalculateCoreFileThreadList(options);
6375 for (const ThreadSP &thread_sp : thread_list) {
6376 uint32_t stack_frame_count = thread_sp->GetStackFrameCount();
6377 for (uint32_t j = 0; j < stack_frame_count; j++) {
6378 StackFrameSP stack_frame_sp = thread_sp->GetStackFrameAtIndex(j);
6379 Address pc = stack_frame_sp->GetFrameCodeAddress();
6380 ModuleSP module_sp = pc.GetModule();
6381 if (module_sp) {
6382 UUID uuid = module_sp->GetUUID();
6383 if (uuid.IsValid()) {
6384 executing_uuids.insert(uuid.GetAsString());
6385 modules.AppendIfNeeded(module_sp);
6390 size_t modules_count = modules.GetSize();
6392 struct all_image_infos_header infos;
6393 infos.version = 1;
6394 infos.imgcount = modules_count;
6395 infos.entries_size = sizeof(image_entry);
6396 infos.entries_fileoff = initial_file_offset + sizeof(all_image_infos_header);
6397 infos.unused = 0;
6399 all_image_infos_payload.PutHex32(infos.version);
6400 all_image_infos_payload.PutHex32(infos.imgcount);
6401 all_image_infos_payload.PutHex64(infos.entries_fileoff);
6402 all_image_infos_payload.PutHex32(infos.entries_size);
6403 all_image_infos_payload.PutHex32(infos.unused);
6405 // First create the structures for all of the segment name+vmaddr vectors
6406 // for each module, so we will know the size of them as we add the
6407 // module entries.
6408 std::vector<std::vector<segment_vmaddr>> modules_segment_vmaddrs;
6409 for (size_t i = 0; i < modules_count; i++) {
6410 ModuleSP module = modules.GetModuleAtIndex(i);
6412 SectionList *sections = module->GetSectionList();
6413 size_t sections_count = sections->GetSize();
6414 std::vector<segment_vmaddr> segment_vmaddrs;
6415 for (size_t j = 0; j < sections_count; j++) {
6416 SectionSP section = sections->GetSectionAtIndex(j);
6417 if (!section->GetParent().get()) {
6418 addr_t vmaddr = section->GetLoadBaseAddress(&target);
6419 if (vmaddr == LLDB_INVALID_ADDRESS)
6420 continue;
6421 ConstString name = section->GetName();
6422 segment_vmaddr seg_vmaddr;
6423 // This is the uncommon case where strncpy is exactly
6424 // the right one, doesn't need to be nul terminated.
6425 // The segment name in a Mach-O LC_SEGMENT/LC_SEGMENT_64 is char[16] and
6426 // is not guaranteed to be nul-terminated if all 16 characters are
6427 // used.
6428 // coverity[buffer_size_warning]
6429 strncpy(seg_vmaddr.segname, name.AsCString(),
6430 sizeof(seg_vmaddr.segname));
6431 seg_vmaddr.vmaddr = vmaddr;
6432 seg_vmaddr.unused = 0;
6433 segment_vmaddrs.push_back(seg_vmaddr);
6436 modules_segment_vmaddrs.push_back(segment_vmaddrs);
6439 offset_t size_of_vmaddr_structs = 0;
6440 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) {
6441 size_of_vmaddr_structs +=
6442 modules_segment_vmaddrs[i].size() * sizeof(segment_vmaddr);
6445 offset_t size_of_filepath_cstrings = 0;
6446 for (size_t i = 0; i < modules_count; i++) {
6447 ModuleSP module_sp = modules.GetModuleAtIndex(i);
6448 size_of_filepath_cstrings += module_sp->GetFileSpec().GetPath().size() + 1;
6451 // Calculate the file offsets of our "all image infos" payload in the
6452 // corefile. initial_file_offset the original value passed in to this method.
6454 offset_t start_of_entries =
6455 initial_file_offset + sizeof(all_image_infos_header);
6456 offset_t start_of_seg_vmaddrs =
6457 start_of_entries + sizeof(image_entry) * modules_count;
6458 offset_t start_of_filenames = start_of_seg_vmaddrs + size_of_vmaddr_structs;
6460 offset_t final_file_offset = start_of_filenames + size_of_filepath_cstrings;
6462 // Now write the one-per-module 'struct image_entry' into the
6463 // StringStream; keep track of where the struct segment_vmaddr
6464 // entries for each module will end up in the corefile.
6466 offset_t current_string_offset = start_of_filenames;
6467 offset_t current_segaddrs_offset = start_of_seg_vmaddrs;
6468 std::vector<struct image_entry> image_entries;
6469 for (size_t i = 0; i < modules_count; i++) {
6470 ModuleSP module_sp = modules.GetModuleAtIndex(i);
6472 struct image_entry ent;
6473 memcpy(&ent.uuid, module_sp->GetUUID().GetBytes().data(), sizeof(ent.uuid));
6474 if (modules_segment_vmaddrs[i].size() > 0) {
6475 ent.segment_count = modules_segment_vmaddrs[i].size();
6476 ent.seg_addrs_offset = current_segaddrs_offset;
6478 ent.filepath_offset = current_string_offset;
6479 ObjectFile *objfile = module_sp->GetObjectFile();
6480 if (objfile) {
6481 Address base_addr(objfile->GetBaseAddress());
6482 if (base_addr.IsValid()) {
6483 ent.load_address = base_addr.GetLoadAddress(&target);
6487 all_image_infos_payload.PutHex64(ent.filepath_offset);
6488 all_image_infos_payload.PutRawBytes(ent.uuid, sizeof(ent.uuid));
6489 all_image_infos_payload.PutHex64(ent.load_address);
6490 all_image_infos_payload.PutHex64(ent.seg_addrs_offset);
6491 all_image_infos_payload.PutHex32(ent.segment_count);
6493 if (executing_uuids.find(module_sp->GetUUID().GetAsString()) !=
6494 executing_uuids.end())
6495 all_image_infos_payload.PutHex32(1);
6496 else
6497 all_image_infos_payload.PutHex32(0);
6499 current_segaddrs_offset += ent.segment_count * sizeof(segment_vmaddr);
6500 current_string_offset += module_sp->GetFileSpec().GetPath().size() + 1;
6503 // Now write the struct segment_vmaddr entries into the StringStream.
6505 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) {
6506 if (modules_segment_vmaddrs[i].size() == 0)
6507 continue;
6508 for (struct segment_vmaddr segvm : modules_segment_vmaddrs[i]) {
6509 all_image_infos_payload.PutRawBytes(segvm.segname, sizeof(segvm.segname));
6510 all_image_infos_payload.PutHex64(segvm.vmaddr);
6511 all_image_infos_payload.PutHex64(segvm.unused);
6515 for (size_t i = 0; i < modules_count; i++) {
6516 ModuleSP module_sp = modules.GetModuleAtIndex(i);
6517 std::string filepath = module_sp->GetFileSpec().GetPath();
6518 all_image_infos_payload.PutRawBytes(filepath.data(), filepath.size() + 1);
6521 return final_file_offset;
6524 // Temp struct used to combine contiguous memory regions with
6525 // identical permissions.
6526 struct page_object {
6527 addr_t addr;
6528 addr_t size;
6529 uint32_t prot;
6532 bool ObjectFileMachO::SaveCore(const lldb::ProcessSP &process_sp,
6533 lldb_private::SaveCoreOptions &options,
6534 Status &error) {
6535 // The FileSpec and Process are already checked in PluginManager::SaveCore.
6536 assert(options.GetOutputFile().has_value());
6537 assert(process_sp);
6538 const FileSpec outfile = options.GetOutputFile().value();
6540 // MachO defaults to dirty pages
6541 if (options.GetStyle() == SaveCoreStyle::eSaveCoreUnspecified)
6542 options.SetStyle(eSaveCoreDirtyOnly);
6544 Target &target = process_sp->GetTarget();
6545 const ArchSpec target_arch = target.GetArchitecture();
6546 const llvm::Triple &target_triple = target_arch.GetTriple();
6547 if (target_triple.getVendor() == llvm::Triple::Apple &&
6548 (target_triple.getOS() == llvm::Triple::MacOSX ||
6549 target_triple.getOS() == llvm::Triple::IOS ||
6550 target_triple.getOS() == llvm::Triple::WatchOS ||
6551 target_triple.getOS() == llvm::Triple::TvOS ||
6552 target_triple.getOS() == llvm::Triple::XROS)) {
6553 // NEED_BRIDGEOS_TRIPLE target_triple.getOS() == llvm::Triple::BridgeOS))
6554 // {
6555 bool make_core = false;
6556 switch (target_arch.GetMachine()) {
6557 case llvm::Triple::aarch64:
6558 case llvm::Triple::aarch64_32:
6559 case llvm::Triple::arm:
6560 case llvm::Triple::thumb:
6561 case llvm::Triple::x86:
6562 case llvm::Triple::x86_64:
6563 make_core = true;
6564 break;
6565 default:
6566 error = Status::FromErrorStringWithFormat(
6567 "unsupported core architecture: %s", target_triple.str().c_str());
6568 break;
6571 if (make_core) {
6572 CoreFileMemoryRanges core_ranges;
6573 error = process_sp->CalculateCoreFileSaveRanges(options, core_ranges);
6574 if (error.Success()) {
6575 const uint32_t addr_byte_size = target_arch.GetAddressByteSize();
6576 const ByteOrder byte_order = target_arch.GetByteOrder();
6577 std::vector<llvm::MachO::segment_command_64> segment_load_commands;
6578 for (const auto &core_range_info : core_ranges) {
6579 // TODO: Refactor RangeDataVector to have a data iterator.
6580 const auto &core_range = core_range_info.data;
6581 uint32_t cmd_type = LC_SEGMENT_64;
6582 uint32_t segment_size = sizeof(llvm::MachO::segment_command_64);
6583 if (addr_byte_size == 4) {
6584 cmd_type = LC_SEGMENT;
6585 segment_size = sizeof(llvm::MachO::segment_command);
6587 // Skip any ranges with no read/write/execute permissions and empty
6588 // ranges.
6589 if (core_range.lldb_permissions == 0 || core_range.range.size() == 0)
6590 continue;
6591 uint32_t vm_prot = 0;
6592 if (core_range.lldb_permissions & ePermissionsReadable)
6593 vm_prot |= VM_PROT_READ;
6594 if (core_range.lldb_permissions & ePermissionsWritable)
6595 vm_prot |= VM_PROT_WRITE;
6596 if (core_range.lldb_permissions & ePermissionsExecutable)
6597 vm_prot |= VM_PROT_EXECUTE;
6598 const addr_t vm_addr = core_range.range.start();
6599 const addr_t vm_size = core_range.range.size();
6600 llvm::MachO::segment_command_64 segment = {
6601 cmd_type, // uint32_t cmd;
6602 segment_size, // uint32_t cmdsize;
6603 {0}, // char segname[16];
6604 vm_addr, // uint64_t vmaddr; // uint32_t for 32-bit Mach-O
6605 vm_size, // uint64_t vmsize; // uint32_t for 32-bit Mach-O
6606 0, // uint64_t fileoff; // uint32_t for 32-bit Mach-O
6607 vm_size, // uint64_t filesize; // uint32_t for 32-bit Mach-O
6608 vm_prot, // uint32_t maxprot;
6609 vm_prot, // uint32_t initprot;
6610 0, // uint32_t nsects;
6611 0}; // uint32_t flags;
6612 segment_load_commands.push_back(segment);
6615 StreamString buffer(Stream::eBinary, addr_byte_size, byte_order);
6617 llvm::MachO::mach_header_64 mach_header;
6618 mach_header.magic = addr_byte_size == 8 ? MH_MAGIC_64 : MH_MAGIC;
6619 mach_header.cputype = target_arch.GetMachOCPUType();
6620 mach_header.cpusubtype = target_arch.GetMachOCPUSubType();
6621 mach_header.filetype = MH_CORE;
6622 mach_header.ncmds = segment_load_commands.size();
6623 mach_header.flags = 0;
6624 mach_header.reserved = 0;
6625 ThreadList &thread_list = process_sp->GetThreadList();
6626 const uint32_t num_threads = thread_list.GetSize();
6628 // Make an array of LC_THREAD data items. Each one contains the
6629 // contents of the LC_THREAD load command. The data doesn't contain
6630 // the load command + load command size, we will add the load command
6631 // and load command size as we emit the data.
6632 std::vector<StreamString> LC_THREAD_datas(num_threads);
6633 for (auto &LC_THREAD_data : LC_THREAD_datas) {
6634 LC_THREAD_data.GetFlags().Set(Stream::eBinary);
6635 LC_THREAD_data.SetAddressByteSize(addr_byte_size);
6636 LC_THREAD_data.SetByteOrder(byte_order);
6638 for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
6639 ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx));
6640 if (thread_sp) {
6641 switch (mach_header.cputype) {
6642 case llvm::MachO::CPU_TYPE_ARM64:
6643 case llvm::MachO::CPU_TYPE_ARM64_32:
6644 RegisterContextDarwin_arm64_Mach::Create_LC_THREAD(
6645 thread_sp.get(), LC_THREAD_datas[thread_idx]);
6646 break;
6648 case llvm::MachO::CPU_TYPE_ARM:
6649 RegisterContextDarwin_arm_Mach::Create_LC_THREAD(
6650 thread_sp.get(), LC_THREAD_datas[thread_idx]);
6651 break;
6653 case llvm::MachO::CPU_TYPE_I386:
6654 RegisterContextDarwin_i386_Mach::Create_LC_THREAD(
6655 thread_sp.get(), LC_THREAD_datas[thread_idx]);
6656 break;
6658 case llvm::MachO::CPU_TYPE_X86_64:
6659 RegisterContextDarwin_x86_64_Mach::Create_LC_THREAD(
6660 thread_sp.get(), LC_THREAD_datas[thread_idx]);
6661 break;
6666 // The size of the load command is the size of the segments...
6667 if (addr_byte_size == 8) {
6668 mach_header.sizeofcmds = segment_load_commands.size() *
6669 sizeof(llvm::MachO::segment_command_64);
6670 } else {
6671 mach_header.sizeofcmds = segment_load_commands.size() *
6672 sizeof(llvm::MachO::segment_command);
6675 // and the size of all LC_THREAD load command
6676 for (const auto &LC_THREAD_data : LC_THREAD_datas) {
6677 ++mach_header.ncmds;
6678 mach_header.sizeofcmds += 8 + LC_THREAD_data.GetSize();
6681 // Bits will be set to indicate which bits are NOT used in
6682 // addressing in this process or 0 for unknown.
6683 uint64_t address_mask = process_sp->GetCodeAddressMask();
6684 if (address_mask != LLDB_INVALID_ADDRESS_MASK) {
6685 // LC_NOTE "addrable bits"
6686 mach_header.ncmds++;
6687 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6690 // LC_NOTE "process metadata"
6691 mach_header.ncmds++;
6692 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6694 // LC_NOTE "all image infos"
6695 mach_header.ncmds++;
6696 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6698 // Write the mach header
6699 buffer.PutHex32(mach_header.magic);
6700 buffer.PutHex32(mach_header.cputype);
6701 buffer.PutHex32(mach_header.cpusubtype);
6702 buffer.PutHex32(mach_header.filetype);
6703 buffer.PutHex32(mach_header.ncmds);
6704 buffer.PutHex32(mach_header.sizeofcmds);
6705 buffer.PutHex32(mach_header.flags);
6706 if (addr_byte_size == 8) {
6707 buffer.PutHex32(mach_header.reserved);
6710 // Skip the mach header and all load commands and align to the next
6711 // 0x1000 byte boundary
6712 addr_t file_offset = buffer.GetSize() + mach_header.sizeofcmds;
6714 file_offset = llvm::alignTo(file_offset, 16);
6715 std::vector<std::unique_ptr<LCNoteEntry>> lc_notes;
6717 // Add "addrable bits" LC_NOTE when an address mask is available
6718 if (address_mask != LLDB_INVALID_ADDRESS_MASK) {
6719 std::unique_ptr<LCNoteEntry> addrable_bits_lcnote_up(
6720 new LCNoteEntry(addr_byte_size, byte_order));
6721 addrable_bits_lcnote_up->name = "addrable bits";
6722 addrable_bits_lcnote_up->payload_file_offset = file_offset;
6723 int bits = std::bitset<64>(~address_mask).count();
6724 addrable_bits_lcnote_up->payload.PutHex32(4); // version
6725 addrable_bits_lcnote_up->payload.PutHex32(
6726 bits); // # of bits used for low addresses
6727 addrable_bits_lcnote_up->payload.PutHex32(
6728 bits); // # of bits used for high addresses
6729 addrable_bits_lcnote_up->payload.PutHex32(0); // reserved
6731 file_offset += addrable_bits_lcnote_up->payload.GetSize();
6733 lc_notes.push_back(std::move(addrable_bits_lcnote_up));
6736 // Add "process metadata" LC_NOTE
6737 std::unique_ptr<LCNoteEntry> thread_extrainfo_lcnote_up(
6738 new LCNoteEntry(addr_byte_size, byte_order));
6739 thread_extrainfo_lcnote_up->name = "process metadata";
6740 thread_extrainfo_lcnote_up->payload_file_offset = file_offset;
6742 StructuredData::DictionarySP dict(
6743 std::make_shared<StructuredData::Dictionary>());
6744 StructuredData::ArraySP threads(
6745 std::make_shared<StructuredData::Array>());
6746 for (const ThreadSP &thread_sp :
6747 process_sp->CalculateCoreFileThreadList(options)) {
6748 StructuredData::DictionarySP thread(
6749 std::make_shared<StructuredData::Dictionary>());
6750 thread->AddIntegerItem("thread_id", thread_sp->GetID());
6751 threads->AddItem(thread);
6753 dict->AddItem("threads", threads);
6754 StreamString strm;
6755 dict->Dump(strm, /* pretty */ false);
6756 thread_extrainfo_lcnote_up->payload.PutRawBytes(strm.GetData(),
6757 strm.GetSize());
6759 file_offset += thread_extrainfo_lcnote_up->payload.GetSize();
6760 file_offset = llvm::alignTo(file_offset, 16);
6761 lc_notes.push_back(std::move(thread_extrainfo_lcnote_up));
6763 // Add "all image infos" LC_NOTE
6764 std::unique_ptr<LCNoteEntry> all_image_infos_lcnote_up(
6765 new LCNoteEntry(addr_byte_size, byte_order));
6766 all_image_infos_lcnote_up->name = "all image infos";
6767 all_image_infos_lcnote_up->payload_file_offset = file_offset;
6768 file_offset = CreateAllImageInfosPayload(
6769 process_sp, file_offset, all_image_infos_lcnote_up->payload,
6770 options);
6771 lc_notes.push_back(std::move(all_image_infos_lcnote_up));
6773 // Add LC_NOTE load commands
6774 for (auto &lcnote : lc_notes) {
6775 // Add the LC_NOTE load command to the file.
6776 buffer.PutHex32(LC_NOTE);
6777 buffer.PutHex32(sizeof(llvm::MachO::note_command));
6778 char namebuf[16];
6779 memset(namebuf, 0, sizeof(namebuf));
6780 // This is the uncommon case where strncpy is exactly
6781 // the right one, doesn't need to be nul terminated.
6782 // LC_NOTE name field is char[16] and is not guaranteed to be
6783 // nul-terminated.
6784 // coverity[buffer_size_warning]
6785 strncpy(namebuf, lcnote->name.c_str(), sizeof(namebuf));
6786 buffer.PutRawBytes(namebuf, sizeof(namebuf));
6787 buffer.PutHex64(lcnote->payload_file_offset);
6788 buffer.PutHex64(lcnote->payload.GetSize());
6791 // Align to 4096-byte page boundary for the LC_SEGMENTs.
6792 file_offset = llvm::alignTo(file_offset, 4096);
6794 for (auto &segment : segment_load_commands) {
6795 segment.fileoff = file_offset;
6796 file_offset += segment.filesize;
6799 // Write out all of the LC_THREAD load commands
6800 for (const auto &LC_THREAD_data : LC_THREAD_datas) {
6801 const size_t LC_THREAD_data_size = LC_THREAD_data.GetSize();
6802 buffer.PutHex32(LC_THREAD);
6803 buffer.PutHex32(8 + LC_THREAD_data_size); // cmd + cmdsize + data
6804 buffer.Write(LC_THREAD_data.GetString().data(), LC_THREAD_data_size);
6807 // Write out all of the segment load commands
6808 for (const auto &segment : segment_load_commands) {
6809 buffer.PutHex32(segment.cmd);
6810 buffer.PutHex32(segment.cmdsize);
6811 buffer.PutRawBytes(segment.segname, sizeof(segment.segname));
6812 if (addr_byte_size == 8) {
6813 buffer.PutHex64(segment.vmaddr);
6814 buffer.PutHex64(segment.vmsize);
6815 buffer.PutHex64(segment.fileoff);
6816 buffer.PutHex64(segment.filesize);
6817 } else {
6818 buffer.PutHex32(static_cast<uint32_t>(segment.vmaddr));
6819 buffer.PutHex32(static_cast<uint32_t>(segment.vmsize));
6820 buffer.PutHex32(static_cast<uint32_t>(segment.fileoff));
6821 buffer.PutHex32(static_cast<uint32_t>(segment.filesize));
6823 buffer.PutHex32(segment.maxprot);
6824 buffer.PutHex32(segment.initprot);
6825 buffer.PutHex32(segment.nsects);
6826 buffer.PutHex32(segment.flags);
6829 std::string core_file_path(outfile.GetPath());
6830 auto core_file = FileSystem::Instance().Open(
6831 outfile, File::eOpenOptionWriteOnly | File::eOpenOptionTruncate |
6832 File::eOpenOptionCanCreate);
6833 if (!core_file) {
6834 error = Status::FromError(core_file.takeError());
6835 } else {
6836 // Read 1 page at a time
6837 uint8_t bytes[0x1000];
6838 // Write the mach header and load commands out to the core file
6839 size_t bytes_written = buffer.GetString().size();
6840 error =
6841 core_file.get()->Write(buffer.GetString().data(), bytes_written);
6842 if (error.Success()) {
6844 for (auto &lcnote : lc_notes) {
6845 if (core_file.get()->SeekFromStart(lcnote->payload_file_offset) ==
6846 -1) {
6847 error = Status::FromErrorStringWithFormat(
6848 "Unable to seek to corefile pos "
6849 "to write '%s' LC_NOTE payload",
6850 lcnote->name.c_str());
6851 return false;
6853 bytes_written = lcnote->payload.GetSize();
6854 error = core_file.get()->Write(lcnote->payload.GetData(),
6855 bytes_written);
6856 if (!error.Success())
6857 return false;
6860 // Now write the file data for all memory segments in the process
6861 for (const auto &segment : segment_load_commands) {
6862 if (core_file.get()->SeekFromStart(segment.fileoff) == -1) {
6863 error = Status::FromErrorStringWithFormat(
6864 "unable to seek to offset 0x%" PRIx64 " in '%s'",
6865 segment.fileoff, core_file_path.c_str());
6866 break;
6869 target.GetDebugger().GetAsyncOutputStream()->Printf(
6870 "Saving %" PRId64
6871 " bytes of data for memory region at 0x%" PRIx64 "\n",
6872 segment.vmsize, segment.vmaddr);
6873 addr_t bytes_left = segment.vmsize;
6874 addr_t addr = segment.vmaddr;
6875 Status memory_read_error;
6876 while (bytes_left > 0 && error.Success()) {
6877 const size_t bytes_to_read =
6878 bytes_left > sizeof(bytes) ? sizeof(bytes) : bytes_left;
6880 // In a savecore setting, we don't really care about caching,
6881 // as the data is dumped and very likely never read again,
6882 // so we call ReadMemoryFromInferior to bypass it.
6883 const size_t bytes_read = process_sp->ReadMemoryFromInferior(
6884 addr, bytes, bytes_to_read, memory_read_error);
6886 if (bytes_read == bytes_to_read) {
6887 size_t bytes_written = bytes_read;
6888 error = core_file.get()->Write(bytes, bytes_written);
6889 bytes_left -= bytes_read;
6890 addr += bytes_read;
6891 } else {
6892 // Some pages within regions are not readable, those should
6893 // be zero filled
6894 memset(bytes, 0, bytes_to_read);
6895 size_t bytes_written = bytes_to_read;
6896 error = core_file.get()->Write(bytes, bytes_written);
6897 bytes_left -= bytes_to_read;
6898 addr += bytes_to_read;
6906 return true; // This is the right plug to handle saving core files for
6907 // this process
6909 return false;
6912 ObjectFileMachO::MachOCorefileAllImageInfos
6913 ObjectFileMachO::GetCorefileAllImageInfos() {
6914 MachOCorefileAllImageInfos image_infos;
6915 Log *log(GetLog(LLDBLog::Object | LLDBLog::Symbols | LLDBLog::Process |
6916 LLDBLog::DynamicLoader));
6918 auto lc_notes = FindLC_NOTEByName("all image infos");
6919 for (auto lc_note : lc_notes) {
6920 offset_t payload_offset = std::get<0>(lc_note);
6921 // Read the struct all_image_infos_header.
6922 uint32_t version = m_data.GetU32(&payload_offset);
6923 if (version != 1) {
6924 return image_infos;
6926 uint32_t imgcount = m_data.GetU32(&payload_offset);
6927 uint64_t entries_fileoff = m_data.GetU64(&payload_offset);
6928 // 'entries_size' is not used, nor is the 'unused' entry.
6929 // offset += 4; // uint32_t entries_size;
6930 // offset += 4; // uint32_t unused;
6932 LLDB_LOGF(log, "LC_NOTE 'all image infos' found version %d with %d images",
6933 version, imgcount);
6934 payload_offset = entries_fileoff;
6935 for (uint32_t i = 0; i < imgcount; i++) {
6936 // Read the struct image_entry.
6937 offset_t filepath_offset = m_data.GetU64(&payload_offset);
6938 uuid_t uuid;
6939 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)),
6940 sizeof(uuid_t));
6941 uint64_t load_address = m_data.GetU64(&payload_offset);
6942 offset_t seg_addrs_offset = m_data.GetU64(&payload_offset);
6943 uint32_t segment_count = m_data.GetU32(&payload_offset);
6944 uint32_t currently_executing = m_data.GetU32(&payload_offset);
6946 MachOCorefileImageEntry image_entry;
6947 image_entry.filename = (const char *)m_data.GetCStr(&filepath_offset);
6948 image_entry.uuid = UUID(uuid, sizeof(uuid_t));
6949 image_entry.load_address = load_address;
6950 image_entry.currently_executing = currently_executing;
6952 offset_t seg_vmaddrs_offset = seg_addrs_offset;
6953 for (uint32_t j = 0; j < segment_count; j++) {
6954 char segname[17];
6955 m_data.CopyData(seg_vmaddrs_offset, 16, segname);
6956 segname[16] = '\0';
6957 seg_vmaddrs_offset += 16;
6958 uint64_t vmaddr = m_data.GetU64(&seg_vmaddrs_offset);
6959 seg_vmaddrs_offset += 8; /* unused */
6961 std::tuple<ConstString, addr_t> new_seg{ConstString(segname), vmaddr};
6962 image_entry.segment_load_addresses.push_back(new_seg);
6964 LLDB_LOGF(log, " image entry: %s %s 0x%" PRIx64 " %s",
6965 image_entry.filename.c_str(),
6966 image_entry.uuid.GetAsString().c_str(),
6967 image_entry.load_address,
6968 image_entry.currently_executing ? "currently executing"
6969 : "not currently executing");
6970 image_infos.all_image_infos.push_back(image_entry);
6974 lc_notes = FindLC_NOTEByName("load binary");
6975 for (auto lc_note : lc_notes) {
6976 offset_t payload_offset = std::get<0>(lc_note);
6977 uint32_t version = m_data.GetU32(&payload_offset);
6978 if (version == 1) {
6979 uuid_t uuid;
6980 memcpy(&uuid, m_data.GetData(&payload_offset, sizeof(uuid_t)),
6981 sizeof(uuid_t));
6982 uint64_t load_address = m_data.GetU64(&payload_offset);
6983 uint64_t slide = m_data.GetU64(&payload_offset);
6984 std::string filename = m_data.GetCStr(&payload_offset);
6986 MachOCorefileImageEntry image_entry;
6987 image_entry.filename = filename;
6988 image_entry.uuid = UUID(uuid, sizeof(uuid_t));
6989 image_entry.load_address = load_address;
6990 image_entry.slide = slide;
6991 image_entry.currently_executing = true;
6992 image_infos.all_image_infos.push_back(image_entry);
6993 LLDB_LOGF(log,
6994 "LC_NOTE 'load binary' found, filename %s uuid %s load "
6995 "address 0x%" PRIx64 " slide 0x%" PRIx64,
6996 filename.c_str(),
6997 image_entry.uuid.IsValid()
6998 ? image_entry.uuid.GetAsString().c_str()
6999 : "00000000-0000-0000-0000-000000000000",
7000 load_address, slide);
7004 return image_infos;
7007 bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) {
7008 MachOCorefileAllImageInfos image_infos = GetCorefileAllImageInfos();
7009 Log *log = GetLog(LLDBLog::Object | LLDBLog::DynamicLoader);
7010 Status error;
7012 bool found_platform_binary = false;
7013 ModuleList added_modules;
7014 for (MachOCorefileImageEntry &image : image_infos.all_image_infos) {
7015 ModuleSP module_sp, local_filesystem_module_sp;
7017 // If this is a platform binary, it has been loaded (or registered with
7018 // the DynamicLoader to be loaded), we don't need to do any further
7019 // processing. We're not going to call ModulesDidLoad on this in this
7020 // method, so notify==true.
7021 if (process.GetTarget()
7022 .GetDebugger()
7023 .GetPlatformList()
7024 .LoadPlatformBinaryAndSetup(&process, image.load_address,
7025 true /* notify */)) {
7026 LLDB_LOGF(log,
7027 "ObjectFileMachO::%s binary at 0x%" PRIx64
7028 " is a platform binary, has been handled by a Platform plugin.",
7029 __FUNCTION__, image.load_address);
7030 continue;
7033 bool value_is_offset = image.load_address == LLDB_INVALID_ADDRESS;
7034 uint64_t value = value_is_offset ? image.slide : image.load_address;
7035 if (value_is_offset && value == LLDB_INVALID_ADDRESS) {
7036 // We have neither address nor slide; so we will find the binary
7037 // by UUID and load it at slide/offset 0.
7038 value = 0;
7041 // We have either a UUID, or we have a load address which
7042 // and can try to read load commands and find a UUID.
7043 if (image.uuid.IsValid() ||
7044 (!value_is_offset && value != LLDB_INVALID_ADDRESS)) {
7045 const bool set_load_address = image.segment_load_addresses.size() == 0;
7046 const bool notify = false;
7047 // Userland Darwin binaries will have segment load addresses via
7048 // the `all image infos` LC_NOTE.
7049 const bool allow_memory_image_last_resort =
7050 image.segment_load_addresses.size();
7051 module_sp = DynamicLoader::LoadBinaryWithUUIDAndAddress(
7052 &process, image.filename, image.uuid, value, value_is_offset,
7053 image.currently_executing, notify, set_load_address,
7054 allow_memory_image_last_resort);
7057 // We have a ModuleSP to load in the Target. Load it at the
7058 // correct address/slide and notify/load scripting resources.
7059 if (module_sp) {
7060 added_modules.Append(module_sp, false /* notify */);
7062 // We have a list of segment load address
7063 if (image.segment_load_addresses.size() > 0) {
7064 if (log) {
7065 std::string uuidstr = image.uuid.GetAsString();
7066 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' "
7067 "UUID %s with section load addresses",
7068 module_sp->GetFileSpec().GetPath().c_str(),
7069 uuidstr.c_str());
7071 for (auto name_vmaddr_tuple : image.segment_load_addresses) {
7072 SectionList *sectlist = module_sp->GetObjectFile()->GetSectionList();
7073 if (sectlist) {
7074 SectionSP sect_sp =
7075 sectlist->FindSectionByName(std::get<0>(name_vmaddr_tuple));
7076 if (sect_sp) {
7077 process.GetTarget().SetSectionLoadAddress(
7078 sect_sp, std::get<1>(name_vmaddr_tuple));
7082 } else {
7083 if (log) {
7084 std::string uuidstr = image.uuid.GetAsString();
7085 log->Printf("ObjectFileMachO::LoadCoreFileImages adding binary '%s' "
7086 "UUID %s with %s 0x%" PRIx64,
7087 module_sp->GetFileSpec().GetPath().c_str(),
7088 uuidstr.c_str(),
7089 value_is_offset ? "slide" : "load address", value);
7091 bool changed;
7092 module_sp->SetLoadAddress(process.GetTarget(), value, value_is_offset,
7093 changed);
7097 if (added_modules.GetSize() > 0) {
7098 process.GetTarget().ModulesDidLoad(added_modules);
7099 process.Flush();
7100 return true;
7102 // Return true if the only binary we found was the platform binary,
7103 // and it was loaded outside the scope of this method.
7104 if (found_platform_binary)
7105 return true;
7107 // No binaries.
7108 return false;