1 //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is shared between AddressSanitizer and ThreadSanitizer
10 // run-time libraries.
11 //===----------------------------------------------------------------------===//
13 #include "sanitizer_allocator_internal.h"
14 #include "sanitizer_internal_defs.h"
15 #include "sanitizer_platform.h"
16 #include "sanitizer_symbolizer_internal.h"
18 namespace __sanitizer
{
20 Symbolizer
*Symbolizer::GetOrInit() {
21 SpinMutexLock
l(&init_mu_
);
24 symbolizer_
= PlatformInit();
29 // See sanitizer_symbolizer_markup.cpp.
30 #if !SANITIZER_SYMBOLIZER_MARKUP
32 const char *ExtractToken(const char *str
, const char *delims
, char **result
) {
33 uptr prefix_len
= internal_strcspn(str
, delims
);
34 *result
= (char*)InternalAlloc(prefix_len
+ 1);
35 internal_memcpy(*result
, str
, prefix_len
);
36 (*result
)[prefix_len
] = '\0';
37 const char *prefix_end
= str
+ prefix_len
;
38 if (*prefix_end
!= '\0') prefix_end
++;
42 const char *ExtractInt(const char *str
, const char *delims
, int *result
) {
44 const char *ret
= ExtractToken(str
, delims
, &buff
);
46 *result
= (int)internal_atoll(buff
);
52 const char *ExtractUptr(const char *str
, const char *delims
, uptr
*result
) {
54 const char *ret
= ExtractToken(str
, delims
, &buff
);
56 *result
= (uptr
)internal_atoll(buff
);
62 const char *ExtractSptr(const char *str
, const char *delims
, sptr
*result
) {
64 const char *ret
= ExtractToken(str
, delims
, &buff
);
66 *result
= (sptr
)internal_atoll(buff
);
72 const char *ExtractTokenUpToDelimiter(const char *str
, const char *delimiter
,
74 const char *found_delimiter
= internal_strstr(str
, delimiter
);
76 found_delimiter
? found_delimiter
- str
: internal_strlen(str
);
77 *result
= (char *)InternalAlloc(prefix_len
+ 1);
78 internal_memcpy(*result
, str
, prefix_len
);
79 (*result
)[prefix_len
] = '\0';
80 const char *prefix_end
= str
+ prefix_len
;
81 if (*prefix_end
!= '\0') prefix_end
+= internal_strlen(delimiter
);
85 SymbolizedStack
*Symbolizer::SymbolizePC(uptr addr
) {
87 SymbolizedStack
*res
= SymbolizedStack::New(addr
);
88 auto *mod
= FindModuleForAddress(addr
);
91 // Always fill data about module name and offset.
92 res
->info
.FillModuleInfo(*mod
);
93 for (auto &tool
: tools_
) {
94 SymbolizerScope
sym_scope(this);
95 if (tool
.SymbolizePC(addr
, res
)) {
102 bool Symbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
104 const char *module_name
= nullptr;
107 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
,
111 info
->module
= internal_strdup(module_name
);
112 info
->module_offset
= module_offset
;
113 info
->module_arch
= arch
;
114 for (auto &tool
: tools_
) {
115 SymbolizerScope
sym_scope(this);
116 if (tool
.SymbolizeData(addr
, info
)) {
123 bool Symbolizer::SymbolizeFrame(uptr addr
, FrameInfo
*info
) {
125 const char *module_name
= nullptr;
126 if (!FindModuleNameAndOffsetForAddress(
127 addr
, &module_name
, &info
->module_offset
, &info
->module_arch
))
129 info
->module
= internal_strdup(module_name
);
130 for (auto &tool
: tools_
) {
131 SymbolizerScope
sym_scope(this);
132 if (tool
.SymbolizeFrame(addr
, info
)) {
139 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc
, const char **module_name
,
140 uptr
*module_address
) {
142 const char *internal_module_name
= nullptr;
144 if (!FindModuleNameAndOffsetForAddress(pc
, &internal_module_name
,
145 module_address
, &arch
))
149 *module_name
= module_names_
.GetOwnedCopy(internal_module_name
);
153 void Symbolizer::Flush() {
155 for (auto &tool
: tools_
) {
156 SymbolizerScope
sym_scope(this);
161 const char *Symbolizer::Demangle(const char *name
) {
164 for (auto &tool
: tools_
) {
165 SymbolizerScope
sym_scope(this);
166 if (const char *demangled
= tool
.Demangle(name
))
169 if (const char *demangled
= PlatformDemangle(name
))
174 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address
,
175 const char **module_name
,
177 ModuleArch
*module_arch
) {
178 const LoadedModule
*module
= FindModuleForAddress(address
);
181 *module_name
= module
->full_name();
182 *module_offset
= address
- module
->base_address();
183 *module_arch
= module
->arch();
187 void Symbolizer::RefreshModules() {
189 fallback_modules_
.fallbackInit();
190 RAW_CHECK(modules_
.size() > 0);
191 modules_fresh_
= true;
194 const ListOfModules
&Symbolizer::GetRefreshedListOfModules() {
201 static const LoadedModule
*SearchForModule(const ListOfModules
&modules
,
203 for (uptr i
= 0; i
< modules
.size(); i
++) {
204 if (modules
[i
].containsAddress(address
)) {
211 const LoadedModule
*Symbolizer::FindModuleForAddress(uptr address
) {
212 bool modules_were_reloaded
= false;
213 if (!modules_fresh_
) {
215 modules_were_reloaded
= true;
217 const LoadedModule
*module
= SearchForModule(modules_
, address
);
218 if (module
) return module
;
220 // dlopen/dlclose interceptors invalidate the module list, but when
221 // interception is disabled, we need to retry if the lookup fails in
222 // case the module list changed.
223 #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
224 if (!modules_were_reloaded
) {
226 module
= SearchForModule(modules_
, address
);
227 if (module
) return module
;
231 if (fallback_modules_
.size()) {
232 module
= SearchForModule(fallback_modules_
, address
);
237 // For now we assume the following protocol:
238 // For each request of the form
239 // <module_name> <module_offset>
240 // passed to STDIN, external symbolizer prints to STDOUT response:
242 // <file_name>:<line_number>:<column_number>
244 // <file_name>:<line_number>:<column_number>
247 class LLVMSymbolizerProcess final
: public SymbolizerProcess
{
249 explicit LLVMSymbolizerProcess(const char *path
)
250 : SymbolizerProcess(path
, /*use_posix_spawn=*/SANITIZER_APPLE
) {}
253 bool ReachedEndOfOutput(const char *buffer
, uptr length
) const override
{
254 // Empty line marks the end of llvm-symbolizer output.
255 return length
>= 2 && buffer
[length
- 1] == '\n' &&
256 buffer
[length
- 2] == '\n';
259 // When adding a new architecture, don't forget to also update
260 // script/asan_symbolize.py and sanitizer_common.h.
261 void GetArgV(const char *path_to_binary
,
262 const char *(&argv
)[kArgVMax
]) const override
{
263 #if defined(__x86_64h__)
264 const char* const kSymbolizerArch
= "--default-arch=x86_64h";
265 #elif defined(__x86_64__)
266 const char* const kSymbolizerArch
= "--default-arch=x86_64";
267 #elif defined(__i386__)
268 const char* const kSymbolizerArch
= "--default-arch=i386";
269 #elif SANITIZER_LOONGARCH64
270 const char *const kSymbolizerArch
= "--default-arch=loongarch64";
271 #elif SANITIZER_RISCV64
272 const char *const kSymbolizerArch
= "--default-arch=riscv64";
273 #elif defined(__aarch64__)
274 const char* const kSymbolizerArch
= "--default-arch=arm64";
275 #elif defined(__arm__)
276 const char* const kSymbolizerArch
= "--default-arch=arm";
277 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
278 const char* const kSymbolizerArch
= "--default-arch=powerpc64";
279 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
280 const char* const kSymbolizerArch
= "--default-arch=powerpc64le";
281 #elif defined(__s390x__)
282 const char* const kSymbolizerArch
= "--default-arch=s390x";
283 #elif defined(__s390__)
284 const char* const kSymbolizerArch
= "--default-arch=s390";
286 const char* const kSymbolizerArch
= "--default-arch=unknown";
289 const char *const demangle_flag
=
290 common_flags()->demangle
? "--demangle" : "--no-demangle";
291 const char *const inline_flag
=
292 common_flags()->symbolize_inline_frames
? "--inlines" : "--no-inlines";
294 argv
[i
++] = path_to_binary
;
295 argv
[i
++] = demangle_flag
;
296 argv
[i
++] = inline_flag
;
297 argv
[i
++] = kSymbolizerArch
;
299 CHECK_LE(i
, kArgVMax
);
303 LLVMSymbolizer::LLVMSymbolizer(const char *path
, LowLevelAllocator
*allocator
)
304 : symbolizer_process_(new(*allocator
) LLVMSymbolizerProcess(path
)) {}
306 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
307 // Windows, so extract tokens from the right hand side first. The column info is
309 static const char *ParseFileLineInfo(AddressInfo
*info
, const char *str
) {
310 char *file_line_info
= nullptr;
311 str
= ExtractToken(str
, "\n", &file_line_info
);
312 CHECK(file_line_info
);
314 if (uptr size
= internal_strlen(file_line_info
)) {
315 char *back
= file_line_info
+ size
- 1;
316 for (int i
= 0; i
< 2; ++i
) {
317 while (back
> file_line_info
&& IsDigit(*back
)) --back
;
318 if (*back
!= ':' || !IsDigit(back
[1])) break;
319 info
->column
= info
->line
;
320 info
->line
= internal_atoll(back
+ 1);
321 // Truncate the string at the colon to keep only filename.
325 ExtractToken(file_line_info
, "", &info
->file
);
328 InternalFree(file_line_info
);
332 // Parses one or more two-line strings in the following format:
334 // <file_name>:<line_number>[:<column_number>]
335 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
336 // them use the same output format.
337 void ParseSymbolizePCOutput(const char *str
, SymbolizedStack
*res
) {
338 bool top_frame
= true;
339 SymbolizedStack
*last
= res
;
341 char *function_name
= nullptr;
342 str
= ExtractToken(str
, "\n", &function_name
);
343 CHECK(function_name
);
344 if (function_name
[0] == '\0') {
345 // There are no more frames.
346 InternalFree(function_name
);
349 SymbolizedStack
*cur
;
354 cur
= SymbolizedStack::New(res
->info
.address
);
355 cur
->info
.FillModuleInfo(res
->info
.module
, res
->info
.module_offset
,
356 res
->info
.module_arch
);
361 AddressInfo
*info
= &cur
->info
;
362 info
->function
= function_name
;
363 str
= ParseFileLineInfo(info
, str
);
365 // Functions and filenames can be "??", in which case we write 0
366 // to address info to mark that names are unknown.
367 if (0 == internal_strcmp(info
->function
, "??")) {
368 InternalFree(info
->function
);
371 if (info
->file
&& 0 == internal_strcmp(info
->file
, "??")) {
372 InternalFree(info
->file
);
378 // Parses a two- or three-line string in the following format:
380 // <start_address> <size>
381 // <filename>:<column>
382 // Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
383 // for symbolizing the third line in D123538, but we support the older two-line
384 // information as well.
385 void ParseSymbolizeDataOutput(const char *str
, DataInfo
*info
) {
386 str
= ExtractToken(str
, "\n", &info
->name
);
387 str
= ExtractUptr(str
, " ", &info
->start
);
388 str
= ExtractUptr(str
, "\n", &info
->size
);
389 // Note: If the third line isn't present, these calls will set info.{file,
390 // line} to empty strings.
391 str
= ExtractToken(str
, ":", &info
->file
);
392 str
= ExtractUptr(str
, "\n", &info
->line
);
395 void ParseSymbolizeFrameOutput(const char *str
,
396 InternalMmapVector
<LocalInfo
> *locals
) {
397 if (internal_strncmp(str
, "??", 2) == 0)
402 str
= ExtractToken(str
, "\n", &local
.function_name
);
403 str
= ExtractToken(str
, "\n", &local
.name
);
406 str
= ParseFileLineInfo(&addr
, str
);
407 local
.decl_file
= addr
.file
;
408 local
.decl_line
= addr
.line
;
410 local
.has_frame_offset
= internal_strncmp(str
, "??", 2) != 0;
411 str
= ExtractSptr(str
, " ", &local
.frame_offset
);
413 local
.has_size
= internal_strncmp(str
, "??", 2) != 0;
414 str
= ExtractUptr(str
, " ", &local
.size
);
416 local
.has_tag_offset
= internal_strncmp(str
, "??", 2) != 0;
417 str
= ExtractUptr(str
, "\n", &local
.tag_offset
);
419 locals
->push_back(local
);
423 bool LLVMSymbolizer::SymbolizePC(uptr addr
, SymbolizedStack
*stack
) {
424 AddressInfo
*info
= &stack
->info
;
425 const char *buf
= FormatAndSendCommand(
426 "CODE", info
->module
, info
->module_offset
, info
->module_arch
);
429 ParseSymbolizePCOutput(buf
, stack
);
433 bool LLVMSymbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
434 const char *buf
= FormatAndSendCommand(
435 "DATA", info
->module
, info
->module_offset
, info
->module_arch
);
438 ParseSymbolizeDataOutput(buf
, info
);
439 info
->start
+= (addr
- info
->module_offset
); // Add the base address.
443 bool LLVMSymbolizer::SymbolizeFrame(uptr addr
, FrameInfo
*info
) {
444 const char *buf
= FormatAndSendCommand(
445 "FRAME", info
->module
, info
->module_offset
, info
->module_arch
);
448 ParseSymbolizeFrameOutput(buf
, &info
->locals
);
452 const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix
,
453 const char *module_name
,
458 if (arch
== kModuleArchUnknown
)
459 size_needed
= internal_snprintf(buffer_
, kBufferSize
, "%s \"%s\" 0x%zx\n",
460 command_prefix
, module_name
, module_offset
);
462 size_needed
= internal_snprintf(buffer_
, kBufferSize
,
463 "%s \"%s:%s\" 0x%zx\n", command_prefix
,
464 module_name
, ModuleArchToString(arch
),
467 if (size_needed
>= static_cast<int>(kBufferSize
)) {
468 Report("WARNING: Command buffer too small");
472 return symbolizer_process_
->SendCommand(buffer_
);
475 SymbolizerProcess::SymbolizerProcess(const char *path
, bool use_posix_spawn
)
477 input_fd_(kInvalidFd
),
478 output_fd_(kInvalidFd
),
480 failed_to_start_(false),
481 reported_invalid_path_(false),
482 use_posix_spawn_(use_posix_spawn
) {
484 CHECK_NE(path_
[0], '\0');
487 static bool IsSameModule(const char* path
) {
488 if (const char* ProcessName
= GetProcessName()) {
489 if (const char* SymbolizerName
= StripModuleName(path
)) {
490 return !internal_strcmp(ProcessName
, SymbolizerName
);
496 const char *SymbolizerProcess::SendCommand(const char *command
) {
497 if (failed_to_start_
)
499 if (IsSameModule(path_
)) {
500 Report("WARNING: Symbolizer was blocked from starting itself!\n");
501 failed_to_start_
= true;
504 for (; times_restarted_
< kMaxTimesRestarted
; times_restarted_
++) {
505 // Start or restart symbolizer if we failed to send command to it.
506 if (const char *res
= SendCommandImpl(command
))
510 if (!failed_to_start_
) {
511 Report("WARNING: Failed to use and restart external symbolizer!\n");
512 failed_to_start_
= true;
517 const char *SymbolizerProcess::SendCommandImpl(const char *command
) {
518 if (input_fd_
== kInvalidFd
|| output_fd_
== kInvalidFd
)
520 if (!WriteToSymbolizer(command
, internal_strlen(command
)))
522 if (!ReadFromSymbolizer())
524 return buffer_
.data();
527 bool SymbolizerProcess::Restart() {
528 if (input_fd_
!= kInvalidFd
)
529 CloseFile(input_fd_
);
530 if (output_fd_
!= kInvalidFd
)
531 CloseFile(output_fd_
);
532 return StartSymbolizerSubprocess();
535 bool SymbolizerProcess::ReadFromSymbolizer() {
537 constexpr uptr max_length
= 1024;
541 uptr size_before
= buffer_
.size();
542 buffer_
.resize(size_before
+ max_length
);
543 buffer_
.resize(buffer_
.capacity());
544 bool ret
= ReadFromFile(input_fd_
, &buffer_
[size_before
],
545 buffer_
.size() - size_before
, &just_read
);
550 buffer_
.resize(size_before
+ just_read
);
552 // We can't read 0 bytes, as we don't expect external symbolizer to close
554 if (just_read
== 0) {
555 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_
);
559 } while (!ReachedEndOfOutput(buffer_
.data(), buffer_
.size()));
560 buffer_
.push_back('\0');
564 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer
, uptr length
) {
568 bool success
= WriteToFile(output_fd_
, buffer
, length
, &write_len
);
569 if (!success
|| write_len
!= length
) {
570 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_
);
576 #endif // !SANITIZER_SYMBOLIZER_MARKUP
578 } // namespace __sanitizer