2 /*--------------------------------------------------------------------*/
3 /*--- Demangling of C++ mangled names. demangle.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2000-2017 Julian Seward
13 Rust demangler components are
14 Copyright (C) 2016-2016 David Tolnay
17 This program is free software; you can redistribute it and/or
18 modify it under the terms of the GNU General Public License as
19 published by the Free Software Foundation; either version 2 of the
20 License, or (at your option) any later version.
22 This program is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 General Public License for more details.
27 You should have received a copy of the GNU General Public License
28 along with this program; if not, see <http://www.gnu.org/licenses/>.
30 The GNU General Public License is contained in the file COPYING.
33 #include "pub_core_basics.h"
34 #include "pub_core_demangle.h"
35 #include "pub_core_libcassert.h"
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_mallocfree.h"
39 #include "pub_core_options.h"
41 #include "vg_libciface.h"
45 /*------------------------------------------------------------*/
47 /*------------------------------------------------------------*/
49 /* The demangler's job is to take a raw symbol name and turn it into
50 something a Human Bean can understand. There are two levels of
53 1. First, C++ names are mangled by the compiler. So we'll have to
56 2. Optionally, in relatively rare cases, the resulting name is then
57 itself encoded using Z-escaping (see pub_core_redir.h) so as to
58 become part of a redirect-specification.
60 Therefore, VG_(demangle) first tries to undo (2). If successful,
61 the soname part is discarded (humans don't want to see that).
62 Then, it tries to undo (1) (using demangling code from GNU/FSF).
64 Finally, it changes the name of all symbols which are known to be
65 functions below main() to "(below main)". This helps reduce
66 variability of stack traces, something which has been a problem for
67 the testsuite for a long time.
70 If do_cxx_demangle == True, it does all the above stages:
71 - undo (2) [Z-encoding]
72 - undo (1) [C++ mangling]
73 - do the below-main hack
75 If do_cxx_demangle == False, the C++ and Rust stags are skipped:
76 - undo (2) [Z-encoding]
77 - do the below-main hack
80 /* Note that the C++ demangler is from GNU libiberty and is almost
81 completely unmodified. We use vg_libciface.h as a way to
82 impedance-match the libiberty code into our own framework.
84 The libiberty code included here was taken from the GCC repository
85 and is released under the LGPL 2.1 license, which AFAICT is compatible
86 with "GPL 2 or later" and so is OK for inclusion in Valgrind.
88 To update to a newer libiberty, use the "update-demangler" script
89 which is included in the valgrind repository. */
91 /* This is the main, standard demangler entry point. */
93 /* Upon return, *RESULT will point to the demangled name.
94 The memory buffer that holds the demangled name is allocated on the
95 heap and will be deallocated in the next invocation. Conceptually,
96 that buffer is owned by VG_(demangle). That means two things:
97 (1) Users of VG_(demangle) must not free that buffer.
98 (2) If the demangled name needs to be stashed away for later use,
99 the contents of the buffer need to be copied. It is not sufficient
100 to just store the pointer as it will point to deallocated memory
101 after the next VG_(demangle) invocation. */
102 void VG_(demangle
) ( Bool do_cxx_demangling
, Bool do_z_demangling
,
103 /* IN */ const HChar
*orig
,
104 /* OUT */ const HChar
**result
)
106 /* Possibly undo (2) */
107 /* Z-Demangling was requested.
108 The fastest way to see if it's a Z-mangled name is just to attempt
109 to Z-demangle it (with NULL for the soname buffer, since we're not
110 interested in that). */
111 if (do_z_demangling
) {
112 const HChar
*z_demangled
;
114 if (VG_(maybe_Z_demangle
)( orig
, NULL
, /*soname*/
115 &z_demangled
, NULL
, NULL
, NULL
)) {
120 /* Possibly undo (1) */
121 // - C++ mangled symbols start with "_Z" (possibly with exceptions?)
122 // - Rust "legacy" mangled symbols start with "_Z".
123 // - Rust "v0" mangled symbols start with "_R".
124 // - D programming language mangled symbols start with "_D".
125 // XXX: the Java/Rust/Ada demangling here probably doesn't work. See
126 // https://bugs.kde.org/show_bug.cgi?id=445235 for details.
127 if (do_cxx_demangling
&& VG_(clo_demangle
)
128 && orig
!= NULL
&& orig
[0] == '_'
129 && (orig
[1] == 'Z' || orig
[1] == 'R' || orig
[1] == 'D')) {
130 /* !!! vvv STATIC vvv !!! */
131 static HChar
* demangled
= NULL
;
132 /* !!! ^^^ STATIC ^^^ !!! */
134 /* Free up previously demangled name */
136 VG_(arena_free
) (VG_AR_DEMANGLE
, demangled
);
139 if (orig
[1] == 'D') {
140 demangled
= dlang_demangle ( orig
, DMGL_ANSI
| DMGL_PARAMS
);
142 demangled
= ML_(cplus_demangle
) ( orig
, DMGL_ANSI
| DMGL_PARAMS
);
145 *result
= (demangled
== NULL
) ? orig
: demangled
;
150 // 13 Mar 2005: We used to check here that the demangler wasn't leaking
151 // by calling the (now-removed) function VG_(is_empty_arena)(). But,
152 // very rarely (ie. I've heard of it twice in 3 years), the demangler
153 // does leak. But, we can't do much about it, and it's not a disaster,
154 // so we just let it slide without aborting or telling the user.
158 /*------------------------------------------------------------*/
159 /*--- DEMANGLE Z-ENCODED NAMES ---*/
160 /*------------------------------------------------------------*/
162 /* Demangle a Z-encoded name as described in pub_tool_redir.h.
163 Z-encoded names are used by Valgrind for doing function
164 interception/wrapping.
166 Demangle 'sym' into its soname and fnname parts, putting them in
167 the specified buffers. Returns a Bool indicating whether the
168 demangled failed or not. A failure can occur because the prefix
169 isn't recognised, the internal Z-escaping is wrong, or because one
170 or the other (or both) of the output buffers becomes full. Passing
171 'so' as NULL is acceptable if the caller is only interested in the
172 function name part. */
174 Bool
VG_(maybe_Z_demangle
) ( const HChar
* sym
,
175 /*OUT*/const HChar
** so
,
176 /*OUT*/const HChar
** fn
,
178 /*OUT*/Int
* eclassTag
,
179 /*OUT*/Int
* eclassPrio
)
183 static SizeT buf_len
= 0;
185 /* The length of the name after undoing Z-encoding is always smaller
186 than the mangled name. Making the soname and fnname buffers as large
187 as the demangled name is therefore always safe and overflow can never
189 SizeT len
= VG_(strlen
)(sym
) + 1;
192 sobuf
= VG_(arena_realloc
)(VG_AR_DEMANGLE
, "Z-demangle", sobuf
, len
);
193 fnbuf
= VG_(arena_realloc
)(VG_AR_DEMANGLE
, "Z-demangle", fnbuf
, len
);
196 sobuf
[0] = fnbuf
[0] = '\0';
202 # define EMITSO(ch) \
205 sobuf[soi++] = ch; sobuf[soi] = 0; \
208 # define EMITFN(ch) \
210 fnbuf[fni++] = ch; fnbuf[fni] = 0; \
213 Bool error
, valid
, fn_is_encoded
, is_VG_Z_prefixed
;
220 valid
= sym
[0] == '_'
223 && (sym
[3] == 'r' || sym
[3] == 'w')
224 && VG_(isdigit
)(sym
[4])
225 && VG_(isdigit
)(sym
[5])
226 && VG_(isdigit
)(sym
[6])
227 && VG_(isdigit
)(sym
[7])
228 && VG_(isdigit
)(sym
[8])
230 && (sym
[10] == 'Z' || sym
[10] == 'U')
234 && sym
[4] == '0' && sym
[5] == '0' && sym
[6] == '0' && sym
[7] == '0'
236 /* If the eclass tag is 0000 (meaning "no eclass"), the priority
244 fn_is_encoded
= sym
[10] == 'Z';
247 *isWrap
= sym
[3] == 'w';
250 *eclassTag
= 1000 * ((Int
)sym
[4] - '0')
251 + 100 * ((Int
)sym
[5] - '0')
252 + 10 * ((Int
)sym
[6] - '0')
253 + 1 * ((Int
)sym
[7] - '0');
254 vg_assert(*eclassTag
>= 0 && *eclassTag
<= 9999);
258 *eclassPrio
= ((Int
)sym
[8]) - '0';
259 vg_assert(*eclassPrio
>= 0 && *eclassPrio
<= 9);
262 /* Now check the soname prefix isn't "VG_Z_", as described in
270 if (is_VG_Z_prefixed
) {
271 vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
272 "see pub_tool_redir.h for an explanation.", sym
);
275 /* Now scan the Z-encoded soname. */
280 /* Found the delimiter. Move on to the fnname loop. */
294 /* We've got a Z-escape. */
297 case 'a': EMITSO('*'); break;
298 case 'c': EMITSO(':'); break;
299 case 'd': EMITSO('.'); break;
300 case 'h': EMITSO('-'); break;
301 case 'p': EMITSO('+'); break;
302 case 's': EMITSO(' '); break;
303 case 'u': EMITSO('_'); break;
304 case 'A': EMITSO('@'); break;
305 case 'D': EMITSO('$'); break;
306 case 'L': EMITSO('('); break;
307 case 'P': EMITSO('%'); break;
308 case 'R': EMITSO(')'); break;
309 case 'S': EMITSO('/'); break;
310 case 'Z': EMITSO('Z'); break;
311 default: error
= True
; goto out
;
316 vg_assert(sym
[i
] == '_');
319 /* Now deal with the function name part. */
320 if (!fn_is_encoded
) {
322 /* simple; just copy. */
333 /* else use a Z-decoding loop like with soname */
345 /* We've got a Z-escape. */
348 case 'a': EMITFN('*'); break;
349 case 'c': EMITFN(':'); break;
350 case 'd': EMITFN('.'); break;
351 case 'h': EMITFN('-'); break;
352 case 'p': EMITFN('+'); break;
353 case 's': EMITFN(' '); break;
354 case 'u': EMITFN('_'); break;
355 case 'A': EMITFN('@'); break;
356 case 'D': EMITFN('$'); break;
357 case 'L': EMITFN('('); break;
358 case 'P': EMITFN('%'); break;
359 case 'R': EMITFN(')'); break;
360 case 'S': EMITFN('/'); break;
361 case 'Z': EMITFN('Z'); break;
362 default: error
= True
; goto out
;
372 /* Something's wrong. Give up. */
373 VG_(message
)(Vg_UserMsg
,
374 "m_demangle: error Z-demangling: %s\n", sym
);
382 /*--------------------------------------------------------------------*/
384 /*--------------------------------------------------------------------*/