Add missing zstd.h to coregrind Makefile.am noinst_HEADERS
[valgrind.git] / coregrind / m_demangle / demangle.c
blobca066a4b079667451013efc440fc3c95ab601c20
2 /*--------------------------------------------------------------------*/
3 /*--- Demangling of decorated names. demangle.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2000-2017 Julian Seward
11 jseward@acm.org
13 Rust demangler components are
14 Copyright (C) 2016-2016 David Tolnay
15 dtolnay@gmail.com
17 This program is free software; you can redistribute it and/or
18 modify it under the terms of the GNU General Public License as
19 published by the Free Software Foundation; either version 2 of the
20 License, or (at your option) any later version.
22 This program is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 General Public License for more details.
27 You should have received a copy of the GNU General Public License
28 along with this program; if not, see <http://www.gnu.org/licenses/>.
30 The GNU General Public License is contained in the file COPYING.
33 #include "pub_core_basics.h"
34 #include "pub_core_demangle.h"
35 #include "pub_core_libcassert.h"
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_mallocfree.h"
39 #include "pub_core_options.h"
41 #include "vg_libciface.h"
42 #include "demangle.h"
44 Bool VG_(lang_is_ada) = False;
46 /*------------------------------------------------------------*/
47 /*--- ---*/
48 /*------------------------------------------------------------*/
50 /* The demangler's job is to take a raw symbol name and turn it into
51 something a Human Bean can understand. There are two levels of
52 mangling.
54 1. First, C++ names are mangled by the compiler. So we'll have to
55 undo that.
57 2. Optionally, in relatively rare cases, the resulting name is then
58 itself encoded using Z-escaping (see pub_core_redir.h) so as to
59 become part of a redirect-specification.
61 Therefore, VG_(demangle) first tries to undo (2). If successful,
62 the soname part is discarded (humans don't want to see that).
63 Then, it tries to undo (1) (using demangling code from GNU/FSF).
65 Finally, it changes the name of all symbols which are known to be
66 functions below main() to "(below main)". This helps reduce
67 variability of stack traces, something which has been a problem for
68 the testsuite for a long time.
70 --------
71 If do_cxx_demangle == True, it does all the above stages:
72 - undo (2) [Z-encoding]
73 - undo (1) [C++ mangling]
74 - do the below-main hack
76 If do_cxx_demangle == False, the C++ and Rust stags are skipped:
77 - undo (2) [Z-encoding]
78 - do the below-main hack
81 /* Note that the C++ demangler is from GNU libiberty and is almost
82 completely unmodified. We use vg_libciface.h as a way to
83 impedance-match the libiberty code into our own framework.
85 The libiberty code included here was taken from the GCC repository
86 and is released under the LGPL 2.1 license, which AFAICT is compatible
87 with "GPL 2 or later" and so is OK for inclusion in Valgrind.
89 To update to a newer libiberty, use the "update-demangler" script
90 which is included in the valgrind repository. */
92 /* This is the main, standard demangler entry point. */
94 /* Upon return, *RESULT will point to the demangled name.
95 The memory buffer that holds the demangled name is allocated on the
96 heap and will be deallocated in the next invocation. Conceptually,
97 that buffer is owned by VG_(demangle). That means two things:
98 (1) Users of VG_(demangle) must not free that buffer.
99 (2) If the demangled name needs to be stashed away for later use,
100 the contents of the buffer need to be copied. It is not sufficient
101 to just store the pointer as it will point to deallocated memory
102 after the next VG_(demangle) invocation. */
103 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
104 /* IN */ const HChar *orig,
105 /* OUT */ const HChar **result )
107 /* Possibly undo (2) */
108 /* Z-Demangling was requested.
109 The fastest way to see if it's a Z-mangled name is just to attempt
110 to Z-demangle it (with NULL for the soname buffer, since we're not
111 interested in that). */
112 if (do_z_demangling) {
113 const HChar *z_demangled;
115 if (VG_(maybe_Z_demangle)( orig, NULL, /*soname*/
116 &z_demangled, NULL, NULL, NULL )) {
117 orig = z_demangled;
121 /* Possibly undo (1) */
122 // - C++ mangled symbols start with "_Z" (possibly with exceptions?)
123 // - Rust "legacy" mangled symbols start with "_Z".
124 // - Rust "v0" mangled symbols start with "_R".
125 // - D programming language mangled symbols start with "_D".
126 // XXX: the Java/Rust/Ada demangling here probably doesn't work. See
127 // https://bugs.kde.org/show_bug.cgi?id=445235 for details.
128 if (do_cxx_demangling && VG_(clo_demangle)
129 && orig != NULL && (VG_(lang_is_ada) ||
130 (orig[0] == '_' && (orig[1] == 'Z' || orig[1] == 'R' || orig[1] == 'D')))) {
131 /* !!! vvv STATIC vvv !!! */
132 static HChar* demangled = NULL;
133 /* !!! ^^^ STATIC ^^^ !!! */
135 /* Free up previously demangled name */
136 if (demangled) {
137 VG_(arena_free) (VG_AR_DEMANGLE, demangled);
138 demangled = NULL;
140 if (orig[1] == 'D') {
141 demangled = dlang_demangle ( orig, DMGL_ANSI | DMGL_PARAMS );
142 } else if (VG_(lang_is_ada)) {
143 demangled = ada_demangle(orig, 0);
144 } else {
145 demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
148 *result = (demangled == NULL) ? orig : demangled;
149 } else {
150 *result = orig;
153 // 13 Mar 2005: We used to check here that the demangler wasn't leaking
154 // by calling the (now-removed) function VG_(is_empty_arena)(). But,
155 // very rarely (ie. I've heard of it twice in 3 years), the demangler
156 // does leak. But, we can't do much about it, and it's not a disaster,
157 // so we just let it slide without aborting or telling the user.
161 /*------------------------------------------------------------*/
162 /*--- DEMANGLE Z-ENCODED NAMES ---*/
163 /*------------------------------------------------------------*/
165 /* Demangle a Z-encoded name as described in pub_tool_redir.h.
166 Z-encoded names are used by Valgrind for doing function
167 interception/wrapping.
169 Demangle 'sym' into its soname and fnname parts, putting them in
170 the specified buffers. Returns a Bool indicating whether the
171 demangled failed or not. A failure can occur because the prefix
172 isn't recognised, the internal Z-escaping is wrong, or because one
173 or the other (or both) of the output buffers becomes full. Passing
174 'so' as NULL is acceptable if the caller is only interested in the
175 function name part. */
177 Bool VG_(maybe_Z_demangle) ( const HChar* sym,
178 /*OUT*/const HChar** so,
179 /*OUT*/const HChar** fn,
180 /*OUT*/Bool* isWrap,
181 /*OUT*/Int* eclassTag,
182 /*OUT*/Int* eclassPrio )
184 static HChar *sobuf;
185 static HChar *fnbuf;
186 static SizeT buf_len = 0;
188 /* The length of the name after undoing Z-encoding is always smaller
189 than the mangled name. Making the soname and fnname buffers as large
190 as the demangled name is therefore always safe and overflow can never
191 occur. */
192 SizeT len = VG_(strlen)(sym) + 1;
194 if (buf_len < len) {
195 sobuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", sobuf, len);
196 fnbuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", fnbuf, len);
197 buf_len = len;
199 sobuf[0] = fnbuf[0] = '\0';
201 if (so)
202 *so = sobuf;
203 *fn = fnbuf;
205 # define EMITSO(ch) \
206 do { \
207 if (so) { \
208 sobuf[soi++] = ch; sobuf[soi] = 0; \
210 } while (0)
211 # define EMITFN(ch) \
212 do { \
213 fnbuf[fni++] = ch; fnbuf[fni] = 0; \
214 } while (0)
216 Bool error, valid, fn_is_encoded, is_VG_Z_prefixed;
217 Int soi, fni, i;
219 error = False;
220 soi = 0;
221 fni = 0;
223 valid = sym[0] == '_'
224 && sym[1] == 'v'
225 && sym[2] == 'g'
226 && (sym[3] == 'r' || sym[3] == 'w')
227 && VG_(isdigit)(sym[4])
228 && VG_(isdigit)(sym[5])
229 && VG_(isdigit)(sym[6])
230 && VG_(isdigit)(sym[7])
231 && VG_(isdigit)(sym[8])
232 && sym[9] == 'Z'
233 && (sym[10] == 'Z' || sym[10] == 'U')
234 && sym[11] == '_';
236 if (valid
237 && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0'
238 && sym[8] != '0') {
239 /* If the eclass tag is 0000 (meaning "no eclass"), the priority
240 must be 0 too. */
241 valid = False;
244 if (!valid)
245 return False;
247 fn_is_encoded = sym[10] == 'Z';
249 if (isWrap)
250 *isWrap = sym[3] == 'w';
252 if (eclassTag) {
253 *eclassTag = 1000 * ((Int)sym[4] - '0')
254 + 100 * ((Int)sym[5] - '0')
255 + 10 * ((Int)sym[6] - '0')
256 + 1 * ((Int)sym[7] - '0');
257 vg_assert(*eclassTag >= 0 && *eclassTag <= 9999);
260 if (eclassPrio) {
261 *eclassPrio = ((Int)sym[8]) - '0';
262 vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9);
265 /* Now check the soname prefix isn't "VG_Z_", as described in
266 pub_tool_redir.h. */
267 is_VG_Z_prefixed =
268 sym[12] == 'V' &&
269 sym[13] == 'G' &&
270 sym[14] == '_' &&
271 sym[15] == 'Z' &&
272 sym[16] == '_';
273 if (is_VG_Z_prefixed) {
274 vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
275 "see pub_tool_redir.h for an explanation.", sym);
278 /* Now scan the Z-encoded soname. */
279 i = 12;
280 while (True) {
282 if (sym[i] == '_')
283 /* Found the delimiter. Move on to the fnname loop. */
284 break;
286 if (sym[i] == 0) {
287 error = True;
288 goto out;
291 if (sym[i] != 'Z') {
292 EMITSO(sym[i]);
293 i++;
294 continue;
297 /* We've got a Z-escape. */
298 i++;
299 switch (sym[i]) {
300 case 'a': EMITSO('*'); break;
301 case 'c': EMITSO(':'); break;
302 case 'd': EMITSO('.'); break;
303 case 'h': EMITSO('-'); break;
304 case 'p': EMITSO('+'); break;
305 case 's': EMITSO(' '); break;
306 case 'u': EMITSO('_'); break;
307 case 'A': EMITSO('@'); break;
308 case 'D': EMITSO('$'); break;
309 case 'L': EMITSO('('); break;
310 case 'P': EMITSO('%'); break;
311 case 'R': EMITSO(')'); break;
312 case 'S': EMITSO('/'); break;
313 case 'Z': EMITSO('Z'); break;
314 default: error = True; goto out;
316 i++;
319 vg_assert(sym[i] == '_');
320 i++;
322 /* Now deal with the function name part. */
323 if (!fn_is_encoded) {
325 /* simple; just copy. */
326 while (True) {
327 if (sym[i] == 0)
328 break;
329 EMITFN(sym[i]);
330 i++;
332 goto out;
336 /* else use a Z-decoding loop like with soname */
337 while (True) {
339 if (sym[i] == 0)
340 break;
342 if (sym[i] != 'Z') {
343 EMITFN(sym[i]);
344 i++;
345 continue;
348 /* We've got a Z-escape. */
349 i++;
350 switch (sym[i]) {
351 case 'a': EMITFN('*'); break;
352 case 'c': EMITFN(':'); break;
353 case 'd': EMITFN('.'); break;
354 case 'h': EMITFN('-'); break;
355 case 'p': EMITFN('+'); break;
356 case 's': EMITFN(' '); break;
357 case 'u': EMITFN('_'); break;
358 case 'A': EMITFN('@'); break;
359 case 'D': EMITFN('$'); break;
360 case 'L': EMITFN('('); break;
361 case 'P': EMITFN('%'); break;
362 case 'R': EMITFN(')'); break;
363 case 'S': EMITFN('/'); break;
364 case 'Z': EMITFN('Z'); break;
365 default: error = True; goto out;
367 i++;
370 out:
371 EMITSO(0);
372 EMITFN(0);
374 if (error) {
375 /* Something's wrong. Give up. */
376 VG_(message)(Vg_UserMsg,
377 "m_demangle: error Z-demangling: %s\n", sym);
378 return False;
381 return True;
385 /*--------------------------------------------------------------------*/
386 /*--- end ---*/
387 /*--------------------------------------------------------------------*/