FreeBSD regtest: add fakes for FreeBSD < 13
[valgrind.git] / coregrind / m_demangle / rust-demangle.c
blob4024813c20b1b356e8f1f852d55128305eb83cfc
1 /* Demangler for the Rust programming language
2 Copyright (C) 2016-2024 Free Software Foundation, Inc.
3 Written by David Tolnay (dtolnay@gmail.com).
4 Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
6 This file is part of the libiberty library.
7 Libiberty is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
12 In addition to the permissions in the GNU Library General Public
13 License, the Free Software Foundation gives you unlimited permission
14 to link the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any restriction
16 coming from the use of this file. (The Library Public License
17 restrictions do apply in other respects; for example, they cover
18 modification of the file, and distribution when not linked into a
19 combined executable.)
21 Libiberty is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 Library General Public License for more details.
26 You should have received a copy of the GNU Library General Public
27 License along with libiberty; see the file COPYING.LIB.
28 If not, see <http://www.gnu.org/licenses/>. */
31 #if 0 /* in valgrind */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35 #endif /* ! in valgrind */
37 #if 0 /* in valgrind */
38 #include "safe-ctype.h"
39 #endif /* ! in valgrind */
41 #if 0 /* in valgrind */
42 #include <inttypes.h>
43 #include <sys/types.h>
44 #include <string.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #endif /* ! in valgrind */
49 #if 0 /* in valgrind */
50 #ifdef HAVE_STRING_H
51 #include <string.h>
52 #else
53 extern size_t strlen(const char *s);
54 extern int strncmp(const char *s1, const char *s2, size_t n);
55 extern void *memset(void *s, int c, size_t n);
56 #endif
57 #endif /* ! in valgrind */
59 #if 0 /* in valgrind */
60 #include <demangle.h>
61 #include "libiberty.h"
62 #endif /* ! in valgrind */
64 #include "vg_libciface.h"
66 #include "ansidecl.h"
67 #include "demangle.h"
68 #include "safe-ctype.h"
70 typedef UChar uint8_t;
71 typedef Char int8_t;
72 typedef UShort uint16_t;
73 typedef Short int16_t;
74 typedef UInt uint32_t;
75 typedef Int int32_t;
76 typedef ULong uint64_t;
77 typedef Long int64_t;
78 #define PRIu64 "llu"
79 #define PRIx64 "llx"
81 struct rust_demangler
83 const char *sym;
84 size_t sym_len;
86 void *callback_opaque;
87 demangle_callbackref callback;
89 /* Position of the next character to read from the symbol. */
90 size_t next;
92 /* Non-zero if any error occurred. */
93 int errored;
95 /* Non-zero if nothing should be printed. */
96 int skipping_printing;
98 /* Non-zero if printing should be verbose (e.g. include hashes). */
99 int verbose;
101 /* Rust mangling version, with legacy mangling being -1. */
102 int version;
104 /* Recursion depth. */
105 unsigned int recursion;
106 /* Maximum number of times demangle_path may be called recursively. */
107 #define RUST_MAX_RECURSION_COUNT 1024
108 #define RUST_NO_RECURSION_LIMIT ((unsigned int) -1)
110 uint64_t bound_lifetime_depth;
113 /* Parsing functions. */
115 static char
116 peek (const struct rust_demangler *rdm)
118 if (rdm->next < rdm->sym_len)
119 return rdm->sym[rdm->next];
120 return 0;
123 static int
124 eat (struct rust_demangler *rdm, char c)
126 if (peek (rdm) == c)
128 rdm->next++;
129 return 1;
131 else
132 return 0;
135 static char
136 next (struct rust_demangler *rdm)
138 char c = peek (rdm);
139 if (!c)
140 rdm->errored = 1;
141 else
142 rdm->next++;
143 return c;
146 static uint64_t
147 parse_integer_62 (struct rust_demangler *rdm)
149 char c;
150 uint64_t x;
152 if (eat (rdm, '_'))
153 return 0;
155 x = 0;
156 while (!eat (rdm, '_') && !rdm->errored)
158 c = next (rdm);
159 x *= 62;
160 if (ISDIGIT (c))
161 x += c - '0';
162 else if (ISLOWER (c))
163 x += 10 + (c - 'a');
164 else if (ISUPPER (c))
165 x += 10 + 26 + (c - 'A');
166 else
168 rdm->errored = 1;
169 return 0;
172 return x + 1;
175 static uint64_t
176 parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
178 if (!eat (rdm, tag))
179 return 0;
180 return 1 + parse_integer_62 (rdm);
183 static uint64_t
184 parse_disambiguator (struct rust_demangler *rdm)
186 return parse_opt_integer_62 (rdm, 's');
189 static size_t
190 parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
192 char c;
193 size_t hex_len;
195 hex_len = 0;
196 *value = 0;
198 while (!eat (rdm, '_'))
200 *value <<= 4;
202 c = next (rdm);
203 if (ISDIGIT (c))
204 *value |= c - '0';
205 else if (c >= 'a' && c <= 'f')
206 *value |= 10 + (c - 'a');
207 else
209 rdm->errored = 1;
210 return 0;
212 hex_len++;
215 return hex_len;
218 struct rust_mangled_ident
220 /* ASCII part of the identifier. */
221 const char *ascii;
222 size_t ascii_len;
224 /* Punycode insertion codes for Unicode codepoints, if any. */
225 const char *punycode;
226 size_t punycode_len;
229 static struct rust_mangled_ident
230 parse_ident (struct rust_demangler *rdm)
232 char c;
233 size_t start, len;
234 int is_punycode = 0;
235 struct rust_mangled_ident ident;
237 ident.ascii = NULL;
238 ident.ascii_len = 0;
239 ident.punycode = NULL;
240 ident.punycode_len = 0;
242 if (rdm->version != -1)
243 is_punycode = eat (rdm, 'u');
245 c = next (rdm);
246 if (!ISDIGIT (c))
248 rdm->errored = 1;
249 return ident;
251 len = c - '0';
253 if (c != '0')
254 while (ISDIGIT (peek (rdm)))
255 len = len * 10 + (next (rdm) - '0');
257 /* Skip past the optional `_` separator (v0). */
258 if (rdm->version != -1)
259 eat (rdm, '_');
261 start = rdm->next;
262 rdm->next += len;
263 /* Check for overflows. */
264 if ((start > rdm->next) || (rdm->next > rdm->sym_len))
266 rdm->errored = 1;
267 return ident;
270 ident.ascii = rdm->sym + start;
271 ident.ascii_len = len;
273 if (is_punycode)
275 ident.punycode_len = 0;
276 while (ident.ascii_len > 0)
278 ident.ascii_len--;
280 /* The last '_' is a separator between ascii & punycode. */
281 if (ident.ascii[ident.ascii_len] == '_')
282 break;
284 ident.punycode_len++;
286 if (!ident.punycode_len)
288 rdm->errored = 1;
289 return ident;
291 ident.punycode = ident.ascii + (len - ident.punycode_len);
294 if (ident.ascii_len == 0)
295 ident.ascii = NULL;
297 return ident;
300 /* Printing functions. */
302 static void
303 print_str (struct rust_demangler *rdm, const char *data, size_t len)
305 if (!rdm->errored && !rdm->skipping_printing)
306 rdm->callback (data, len, rdm->callback_opaque);
309 #define PRINT(s) print_str (rdm, s, strlen (s))
311 static void
312 print_uint64 (struct rust_demangler *rdm, uint64_t x)
314 char s[21];
315 snprintf (s, 21, "%" PRIu64, x);
316 PRINT (s);
319 static void
320 print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
322 char s[17];
323 snprintf (s, 17, "%" PRIx64, x);
324 PRINT (s);
327 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
328 static int
329 decode_lower_hex_nibble (char nibble)
331 if ('0' <= nibble && nibble <= '9')
332 return nibble - '0';
333 if ('a' <= nibble && nibble <= 'f')
334 return 0xa + (nibble - 'a');
335 return -1;
338 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
339 static char
340 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
342 char c = 0;
343 size_t escape_len = 0;
344 int lo_nibble = -1, hi_nibble = -1;
346 if (len < 3 || e[0] != '$')
347 return 0;
349 e++;
350 len--;
352 if (e[0] == 'C')
354 escape_len = 1;
356 c = ',';
358 else if (len > 2)
360 escape_len = 2;
362 if (e[0] == 'S' && e[1] == 'P')
363 c = '@';
364 else if (e[0] == 'B' && e[1] == 'P')
365 c = '*';
366 else if (e[0] == 'R' && e[1] == 'F')
367 c = '&';
368 else if (e[0] == 'L' && e[1] == 'T')
369 c = '<';
370 else if (e[0] == 'G' && e[1] == 'T')
371 c = '>';
372 else if (e[0] == 'L' && e[1] == 'P')
373 c = '(';
374 else if (e[0] == 'R' && e[1] == 'P')
375 c = ')';
376 else if (e[0] == 'u' && len > 3)
378 escape_len = 3;
380 hi_nibble = decode_lower_hex_nibble (e[1]);
381 if (hi_nibble < 0)
382 return 0;
383 lo_nibble = decode_lower_hex_nibble (e[2]);
384 if (lo_nibble < 0)
385 return 0;
387 /* Only allow non-control ASCII characters. */
388 if (hi_nibble > 7)
389 return 0;
390 c = (hi_nibble << 4) | lo_nibble;
391 if (c < 0x20)
392 return 0;
396 if (!c || len <= escape_len || e[escape_len] != '$')
397 return 0;
399 *out_len = 2 + escape_len;
400 return c;
403 static void
404 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
406 char unescaped;
407 uint8_t *out, *p, d;
408 size_t len, cap, punycode_pos, j;
409 /* Punycode parameters and state. */
410 uint32_t c;
411 size_t base, t_min, t_max, skew, damp, bias, i;
412 size_t delta, w, k, t;
414 if (rdm->errored || rdm->skipping_printing)
415 return;
417 if (rdm->version == -1)
419 /* Ignore leading underscores preceding escape sequences.
420 The mangler inserts an underscore to make sure the
421 identifier begins with a XID_Start character. */
422 if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
423 && ident.ascii[1] == '$')
425 ident.ascii++;
426 ident.ascii_len--;
429 while (ident.ascii_len > 0)
431 /* Handle legacy escape sequences ("$...$", ".." or "."). */
432 if (ident.ascii[0] == '$')
434 unescaped
435 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
436 if (unescaped)
437 print_str (rdm, &unescaped, 1);
438 else
440 /* Unexpected escape sequence, print the rest verbatim. */
441 print_str (rdm, ident.ascii, ident.ascii_len);
442 return;
445 else if (ident.ascii[0] == '.')
447 if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
449 /* ".." becomes "::" */
450 PRINT ("::");
451 len = 2;
453 else
455 PRINT (".");
456 len = 1;
459 else
461 /* Print everything before the next escape sequence, at once. */
462 for (len = 0; len < ident.ascii_len; len++)
463 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
464 break;
466 print_str (rdm, ident.ascii, len);
469 ident.ascii += len;
470 ident.ascii_len -= len;
473 return;
476 if (!ident.punycode)
478 print_str (rdm, ident.ascii, ident.ascii_len);
479 return;
482 len = 0;
483 cap = 4;
484 while (cap < ident.ascii_len)
486 cap *= 2;
487 /* Check for overflows. */
488 if ((cap * 4) / 4 != cap)
490 rdm->errored = 1;
491 return;
495 /* Store the output codepoints as groups of 4 UTF-8 bytes. */
496 out = (uint8_t *)xmalloc (cap * 4);
497 if (!out)
499 rdm->errored = 1;
500 return;
503 /* Populate initial output from ASCII fragment. */
504 for (len = 0; len < ident.ascii_len; len++)
506 p = out + 4 * len;
507 p[0] = 0;
508 p[1] = 0;
509 p[2] = 0;
510 p[3] = ident.ascii[len];
513 /* Punycode parameters and initial state. */
514 base = 36;
515 t_min = 1;
516 t_max = 26;
517 skew = 38;
518 damp = 700;
519 bias = 72;
520 i = 0;
521 c = 0x80;
523 punycode_pos = 0;
524 while (punycode_pos < ident.punycode_len)
526 /* Read one delta value. */
527 delta = 0;
528 w = 1;
529 k = 0;
532 k += base;
533 t = k < bias ? 0 : (k - bias);
534 if (t < t_min)
535 t = t_min;
536 if (t > t_max)
537 t = t_max;
539 if (punycode_pos >= ident.punycode_len)
540 goto cleanup;
541 d = ident.punycode[punycode_pos++];
543 if (ISLOWER (d))
544 d = d - 'a';
545 else if (ISDIGIT (d))
546 d = 26 + (d - '0');
547 else
549 rdm->errored = 1;
550 goto cleanup;
553 delta += d * w;
554 w *= base - t;
556 while (d >= t);
558 /* Compute the new insert position and character. */
559 len++;
560 i += delta;
561 c += i / len;
562 i %= len;
564 /* Ensure enough space is available. */
565 if (cap < len)
567 cap *= 2;
568 /* Check for overflows. */
569 if ((cap * 4) / 4 != cap || cap < len)
571 rdm->errored = 1;
572 goto cleanup;
575 p = (uint8_t *)xrealloc (out, cap * 4);
576 if (!p)
578 rdm->errored = 1;
579 goto cleanup;
581 out = p;
583 /* Move the characters after the insert position. */
584 p = out + i * 4;
585 memmove (p + 4, p, (len - i - 1) * 4);
587 /* Insert the new character, as UTF-8 bytes. */
588 p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
589 p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
590 p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
591 p[3] = 0x80 | (c & 0x3f);
593 /* If there are no more deltas, decoding is complete. */
594 if (punycode_pos == ident.punycode_len)
595 break;
597 i++;
599 /* Perform bias adaptation. */
600 delta /= damp;
601 damp = 2;
603 delta += delta / len;
604 k = 0;
605 while (delta > ((base - t_min) * t_max) / 2)
607 delta /= base - t_min;
608 k += base;
610 bias = k + ((base - t_min + 1) * delta) / (delta + skew);
613 /* Remove all the 0 bytes to leave behind an UTF-8 string. */
614 for (i = 0, j = 0; i < len * 4; i++)
615 if (out[i] != 0)
616 out[j++] = out[i];
618 print_str (rdm, (const char *)out, j);
620 cleanup:
621 free (out);
624 /* Print the lifetime according to the previously decoded index.
625 An index of `0` always refers to `'_`, but starting with `1`,
626 indices refer to late-bound lifetimes introduced by a binder. */
627 static void
628 print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
630 char c;
631 uint64_t depth;
633 PRINT ("'");
634 if (lt == 0)
636 PRINT ("_");
637 return;
640 depth = rdm->bound_lifetime_depth - lt;
641 /* Try to print lifetimes alphabetically first. */
642 if (depth < 26)
644 c = 'a' + depth;
645 print_str (rdm, &c, 1);
647 else
649 /* Use `'_123` after running out of letters. */
650 PRINT ("_");
651 print_uint64 (rdm, depth);
655 /* Demangling functions. */
657 static void demangle_binder (struct rust_demangler *rdm);
658 static void demangle_path (struct rust_demangler *rdm, int in_value);
659 static void demangle_generic_arg (struct rust_demangler *rdm);
660 static void demangle_type (struct rust_demangler *rdm);
661 static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
662 static void demangle_dyn_trait (struct rust_demangler *rdm);
663 static void demangle_const (struct rust_demangler *rdm);
664 static void demangle_const_uint (struct rust_demangler *rdm);
665 static void demangle_const_int (struct rust_demangler *rdm);
666 static void demangle_const_bool (struct rust_demangler *rdm);
667 static void demangle_const_char (struct rust_demangler *rdm);
669 /* Optionally enter a binder ('G') for late-bound lifetimes,
670 printing e.g. `for<'a, 'b> `, and make those lifetimes visible
671 to the caller (via depth level, which the caller should reset). */
672 static void
673 demangle_binder (struct rust_demangler *rdm)
675 uint64_t i, bound_lifetimes;
677 if (rdm->errored)
678 return;
680 bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
681 if (bound_lifetimes > 0)
683 PRINT ("for<");
684 for (i = 0; i < bound_lifetimes; i++)
686 if (i > 0)
687 PRINT (", ");
688 rdm->bound_lifetime_depth++;
689 print_lifetime_from_index (rdm, 1);
691 PRINT ("> ");
695 static void
696 demangle_path (struct rust_demangler *rdm, int in_value)
698 char tag, ns;
699 int was_skipping_printing;
700 size_t i, backref, old_next;
701 uint64_t dis;
702 struct rust_mangled_ident name;
704 if (rdm->errored)
705 return;
707 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
709 ++ rdm->recursion;
710 if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
711 /* FIXME: There ought to be a way to report
712 that the recursion limit has been reached. */
713 goto fail_return;
716 switch (tag = next (rdm))
718 case 'C':
719 dis = parse_disambiguator (rdm);
720 name = parse_ident (rdm);
722 print_ident (rdm, name);
723 if (rdm->verbose)
725 PRINT ("[");
726 print_uint64_hex (rdm, dis);
727 PRINT ("]");
729 break;
730 case 'N':
731 ns = next (rdm);
732 if (!ISLOWER (ns) && !ISUPPER (ns))
733 goto fail_return;
735 demangle_path (rdm, in_value);
737 dis = parse_disambiguator (rdm);
738 name = parse_ident (rdm);
740 if (ISUPPER (ns))
742 /* Special namespaces, like closures and shims. */
743 PRINT ("::{");
744 switch (ns)
746 case 'C':
747 PRINT ("closure");
748 break;
749 case 'S':
750 PRINT ("shim");
751 break;
752 default:
753 print_str (rdm, &ns, 1);
755 if (name.ascii || name.punycode)
757 PRINT (":");
758 print_ident (rdm, name);
760 PRINT ("#");
761 print_uint64 (rdm, dis);
762 PRINT ("}");
764 else
766 /* Implementation-specific/unspecified namespaces. */
768 if (name.ascii || name.punycode)
770 PRINT ("::");
771 print_ident (rdm, name);
774 break;
775 case 'M':
776 case 'X':
777 /* Ignore the `impl`'s own path.*/
778 parse_disambiguator (rdm);
779 was_skipping_printing = rdm->skipping_printing;
780 rdm->skipping_printing = 1;
781 demangle_path (rdm, in_value);
782 rdm->skipping_printing = was_skipping_printing;
783 /* fallthrough */
784 case 'Y':
785 PRINT ("<");
786 demangle_type (rdm);
787 if (tag != 'M')
789 PRINT (" as ");
790 demangle_path (rdm, 0);
792 PRINT (">");
793 break;
794 case 'I':
795 demangle_path (rdm, in_value);
796 if (in_value)
797 PRINT ("::");
798 PRINT ("<");
799 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
801 if (i > 0)
802 PRINT (", ");
803 demangle_generic_arg (rdm);
805 PRINT (">");
806 break;
807 case 'B':
808 backref = parse_integer_62 (rdm);
809 if (!rdm->skipping_printing)
811 old_next = rdm->next;
812 rdm->next = backref;
813 demangle_path (rdm, in_value);
814 rdm->next = old_next;
816 break;
817 default:
818 goto fail_return;
820 goto pass_return;
822 fail_return:
823 rdm->errored = 1;
824 pass_return:
825 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
826 -- rdm->recursion;
829 static void
830 demangle_generic_arg (struct rust_demangler *rdm)
832 uint64_t lt;
833 if (eat (rdm, 'L'))
835 lt = parse_integer_62 (rdm);
836 print_lifetime_from_index (rdm, lt);
838 else if (eat (rdm, 'K'))
839 demangle_const (rdm);
840 else
841 demangle_type (rdm);
844 static const char *
845 basic_type (char tag)
847 switch (tag)
849 case 'b':
850 return "bool";
851 case 'c':
852 return "char";
853 case 'e':
854 return "str";
855 case 'u':
856 return "()";
857 case 'a':
858 return "i8";
859 case 's':
860 return "i16";
861 case 'l':
862 return "i32";
863 case 'x':
864 return "i64";
865 case 'n':
866 return "i128";
867 case 'i':
868 return "isize";
869 case 'h':
870 return "u8";
871 case 't':
872 return "u16";
873 case 'm':
874 return "u32";
875 case 'y':
876 return "u64";
877 case 'o':
878 return "u128";
879 case 'j':
880 return "usize";
881 case 'f':
882 return "f32";
883 case 'd':
884 return "f64";
885 case 'z':
886 return "!";
887 case 'p':
888 return "_";
889 case 'v':
890 return "...";
892 default:
893 return NULL;
897 static void
898 demangle_type (struct rust_demangler *rdm)
900 char tag;
901 size_t i, old_next, backref;
902 uint64_t lt, old_bound_lifetime_depth;
903 const char *basic;
904 struct rust_mangled_ident abi;
906 if (rdm->errored)
907 return;
909 tag = next (rdm);
911 basic = basic_type (tag);
912 if (basic)
914 PRINT (basic);
915 return;
918 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
920 ++ rdm->recursion;
921 if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
922 /* FIXME: There ought to be a way to report
923 that the recursion limit has been reached. */
925 rdm->errored = 1;
926 -- rdm->recursion;
927 return;
931 switch (tag)
933 case 'R':
934 case 'Q':
935 PRINT ("&");
936 if (eat (rdm, 'L'))
938 lt = parse_integer_62 (rdm);
939 if (lt)
941 print_lifetime_from_index (rdm, lt);
942 PRINT (" ");
945 if (tag != 'R')
946 PRINT ("mut ");
947 demangle_type (rdm);
948 break;
949 case 'P':
950 case 'O':
951 PRINT ("*");
952 if (tag != 'P')
953 PRINT ("mut ");
954 else
955 PRINT ("const ");
956 demangle_type (rdm);
957 break;
958 case 'A':
959 case 'S':
960 PRINT ("[");
961 demangle_type (rdm);
962 if (tag == 'A')
964 PRINT ("; ");
965 demangle_const (rdm);
967 PRINT ("]");
968 break;
969 case 'T':
970 PRINT ("(");
971 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
973 if (i > 0)
974 PRINT (", ");
975 demangle_type (rdm);
977 if (i == 1)
978 PRINT (",");
979 PRINT (")");
980 break;
981 case 'F':
982 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
983 demangle_binder (rdm);
985 if (eat (rdm, 'U'))
986 PRINT ("unsafe ");
988 if (eat (rdm, 'K'))
990 if (eat (rdm, 'C'))
992 abi.ascii = "C";
993 abi.ascii_len = 1;
995 else
997 abi = parse_ident (rdm);
998 if (!abi.ascii || abi.punycode)
1000 rdm->errored = 1;
1001 goto restore;
1005 PRINT ("extern \"");
1007 /* If the ABI had any `-`, they were replaced with `_`,
1008 so the parts between `_` have to be re-joined with `-`. */
1009 for (i = 0; i < abi.ascii_len; i++)
1011 if (abi.ascii[i] == '_')
1013 print_str (rdm, abi.ascii, i);
1014 PRINT ("-");
1015 abi.ascii += i + 1;
1016 abi.ascii_len -= i + 1;
1017 i = 0;
1020 print_str (rdm, abi.ascii, abi.ascii_len);
1022 PRINT ("\" ");
1025 PRINT ("fn(");
1026 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1028 if (i > 0)
1029 PRINT (", ");
1030 demangle_type (rdm);
1032 PRINT (")");
1034 if (eat (rdm, 'u'))
1036 /* Skip printing the return type if it's 'u', i.e. `()`. */
1038 else
1040 PRINT (" -> ");
1041 demangle_type (rdm);
1044 /* Restore `bound_lifetime_depth` to outside the binder. */
1045 restore:
1046 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1047 break;
1048 case 'D':
1049 PRINT ("dyn ");
1051 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
1052 demangle_binder (rdm);
1054 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1056 if (i > 0)
1057 PRINT (" + ");
1058 demangle_dyn_trait (rdm);
1061 /* Restore `bound_lifetime_depth` to outside the binder. */
1062 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1064 if (!eat (rdm, 'L'))
1066 rdm->errored = 1;
1067 return;
1069 lt = parse_integer_62 (rdm);
1070 if (lt)
1072 PRINT (" + ");
1073 print_lifetime_from_index (rdm, lt);
1075 break;
1076 case 'B':
1077 backref = parse_integer_62 (rdm);
1078 if (!rdm->skipping_printing)
1080 old_next = rdm->next;
1081 rdm->next = backref;
1082 demangle_type (rdm);
1083 rdm->next = old_next;
1085 break;
1086 default:
1087 /* Go back to the tag, so `demangle_path` also sees it. */
1088 rdm->next--;
1089 demangle_path (rdm, 0);
1092 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1093 -- rdm->recursion;
1096 /* A trait in a trait object may have some "existential projections"
1097 (i.e. associated type bindings) after it, which should be printed
1098 in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1099 To this end, this method will keep the `<...>` of an 'I' path
1100 open, by omitting the `>`, and return `Ok(true)` in that case. */
1101 static int
1102 demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1104 int open;
1105 size_t i, old_next, backref;
1107 open = 0;
1109 if (rdm->errored)
1110 return open;
1112 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1114 ++ rdm->recursion;
1115 if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
1117 /* FIXME: There ought to be a way to report
1118 that the recursion limit has been reached. */
1119 rdm->errored = 1;
1120 goto end_of_func;
1124 if (eat (rdm, 'B'))
1126 backref = parse_integer_62 (rdm);
1127 if (!rdm->skipping_printing)
1129 old_next = rdm->next;
1130 rdm->next = backref;
1131 open = demangle_path_maybe_open_generics (rdm);
1132 rdm->next = old_next;
1135 else if (eat (rdm, 'I'))
1137 demangle_path (rdm, 0);
1138 PRINT ("<");
1139 open = 1;
1140 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1142 if (i > 0)
1143 PRINT (", ");
1144 demangle_generic_arg (rdm);
1147 else
1148 demangle_path (rdm, 0);
1150 end_of_func:
1151 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1152 -- rdm->recursion;
1154 return open;
1157 static void
1158 demangle_dyn_trait (struct rust_demangler *rdm)
1160 int open;
1161 struct rust_mangled_ident name;
1163 if (rdm->errored)
1164 return;
1166 open = demangle_path_maybe_open_generics (rdm);
1168 while (eat (rdm, 'p'))
1170 if (!open)
1171 PRINT ("<");
1172 else
1173 PRINT (", ");
1174 open = 1;
1176 name = parse_ident (rdm);
1177 print_ident (rdm, name);
1178 PRINT (" = ");
1179 demangle_type (rdm);
1182 if (open)
1183 PRINT (">");
1186 static void
1187 demangle_const (struct rust_demangler *rdm)
1189 char ty_tag;
1190 size_t old_next, backref;
1192 if (rdm->errored)
1193 return;
1195 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1197 ++ rdm->recursion;
1198 if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
1199 /* FIXME: There ought to be a way to report
1200 that the recursion limit has been reached. */
1201 goto fail_return;
1204 if (eat (rdm, 'B'))
1206 backref = parse_integer_62 (rdm);
1207 if (!rdm->skipping_printing)
1209 old_next = rdm->next;
1210 rdm->next = backref;
1211 demangle_const (rdm);
1212 rdm->next = old_next;
1214 goto pass_return;
1217 ty_tag = next (rdm);
1218 switch (ty_tag)
1220 /* Placeholder. */
1221 case 'p':
1222 PRINT ("_");
1223 goto pass_return;
1225 /* Unsigned integer types. */
1226 case 'h':
1227 case 't':
1228 case 'm':
1229 case 'y':
1230 case 'o':
1231 case 'j':
1232 demangle_const_uint (rdm);
1233 break;
1235 /* Signed integer types. */
1236 case 'a':
1237 case 's':
1238 case 'l':
1239 case 'x':
1240 case 'n':
1241 case 'i':
1242 demangle_const_int (rdm);
1243 break;
1245 /* Boolean. */
1246 case 'b':
1247 demangle_const_bool (rdm);
1248 break;
1250 /* Character. */
1251 case 'c':
1252 demangle_const_char (rdm);
1253 break;
1255 default:
1256 goto fail_return;
1259 if (!rdm->errored && rdm->verbose)
1261 PRINT (": ");
1262 PRINT (basic_type (ty_tag));
1264 goto pass_return;
1266 fail_return:
1267 rdm->errored = 1;
1268 pass_return:
1269 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1270 -- rdm->recursion;
1273 static void
1274 demangle_const_uint (struct rust_demangler *rdm)
1276 size_t hex_len;
1277 uint64_t value;
1279 if (rdm->errored)
1280 return;
1282 hex_len = parse_hex_nibbles (rdm, &value);
1284 if (hex_len > 16)
1286 /* Print anything that doesn't fit in `uint64_t` verbatim. */
1287 PRINT ("0x");
1288 print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1290 else if (hex_len > 0)
1291 print_uint64 (rdm, value);
1292 else
1293 rdm->errored = 1;
1296 static void
1297 demangle_const_int (struct rust_demangler *rdm)
1299 if (eat (rdm, 'n'))
1300 PRINT ("-");
1301 demangle_const_uint (rdm);
1304 static void
1305 demangle_const_bool (struct rust_demangler *rdm)
1307 uint64_t value;
1309 if (parse_hex_nibbles (rdm, &value) != 1)
1311 rdm->errored = 1;
1312 return;
1315 if (value == 0)
1316 PRINT ("false");
1317 else if (value == 1)
1318 PRINT ("true");
1319 else
1320 rdm->errored = 1;
1323 static void
1324 demangle_const_char (struct rust_demangler *rdm)
1326 size_t hex_len;
1327 uint64_t value;
1329 hex_len = parse_hex_nibbles (rdm, &value);
1331 if (hex_len == 0 || hex_len > 8)
1333 rdm->errored = 1;
1334 return;
1337 /* Match Rust's character "debug" output as best as we can. */
1338 PRINT ("'");
1339 if (value == '\t')
1340 PRINT ("\\t");
1341 else if (value == '\r')
1342 PRINT ("\\r");
1343 else if (value == '\n')
1344 PRINT ("\\n");
1345 else if (value > ' ' && value < '~')
1347 /* Rust also considers many non-ASCII codepoints to be printable, but
1348 that logic is not easily ported to C. */
1349 char c = value;
1350 print_str (rdm, &c, 1);
1352 else
1354 PRINT ("\\u{");
1355 print_uint64_hex (rdm, value);
1356 PRINT ("}");
1358 PRINT ("'");
1361 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1362 The hex digits must contain at least 5 distinct digits. */
1363 static int
1364 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1366 uint16_t seen;
1367 int nibble;
1368 size_t i, count;
1370 if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1371 return 0;
1373 seen = 0;
1374 for (i = 0; i < 16; i++)
1376 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1377 if (nibble < 0)
1378 return 0;
1379 seen |= (uint16_t)1 << nibble;
1382 /* Count how many distinct digits were seen. */
1383 count = 0;
1384 while (seen)
1386 if (seen & 1)
1387 count++;
1388 seen >>= 1;
1391 return count >= 5;
1395 rust_demangle_callback (const char *mangled, int options,
1396 demangle_callbackref callback, void *opaque)
1398 const char *p;
1399 struct rust_demangler rdm;
1400 struct rust_mangled_ident ident;
1402 rdm.sym = mangled;
1403 rdm.sym_len = 0;
1405 rdm.callback_opaque = opaque;
1406 rdm.callback = callback;
1408 rdm.next = 0;
1409 rdm.errored = 0;
1410 rdm.skipping_printing = 0;
1411 rdm.verbose = (options & DMGL_VERBOSE) != 0;
1412 rdm.version = 0;
1413 rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0;
1414 rdm.bound_lifetime_depth = 0;
1416 /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1417 if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1418 rdm.sym += 2;
1419 else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1421 rdm.sym += 3;
1422 rdm.version = -1;
1424 else
1425 return 0;
1427 /* Paths (v0) always start with uppercase characters. */
1428 if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1429 return 0;
1431 /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1432 for (p = rdm.sym; *p; p++)
1434 /* Rust v0 symbols can have '.' suffixes, ignore those. */
1435 if (rdm.version == 0 && *p == '.')
1436 break;
1438 rdm.sym_len++;
1440 if (*p == '_' || ISALNUM (*p))
1441 continue;
1443 /* Legacy Rust symbols can also contain [.:$] characters.
1444 Or @ in the .suffix (which will be skipped, see below). */
1445 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
1446 || *p == '@'))
1447 continue;
1449 return 0;
1452 /* Legacy Rust symbols need to be handled separately. */
1453 if (rdm.version == -1)
1455 /* Legacy Rust symbols always end with E. But can be followed by a
1456 .suffix (which we want to ignore). */
1457 int dot_suffix = 1;
1458 while (rdm.sym_len > 0 &&
1459 !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
1461 dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
1462 rdm.sym_len--;
1465 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1466 return 0;
1467 rdm.sym_len--;
1469 /* Legacy Rust symbols also always end with a path segment
1470 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1471 This early check, before any parse_ident calls, should
1472 quickly filter out most C++ symbols unrelated to Rust. */
1473 if (!(rdm.sym_len > 19
1474 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1475 return 0;
1479 ident = parse_ident (&rdm);
1480 if (rdm.errored || !ident.ascii)
1481 return 0;
1483 while (rdm.next < rdm.sym_len);
1485 /* The last path segment should be the hash. */
1486 if (!is_legacy_prefixed_hash (ident))
1487 return 0;
1489 /* Reset the state for a second pass, to print the symbol. */
1490 rdm.next = 0;
1491 if (!rdm.verbose && rdm.sym_len > 19)
1493 /* Hide the last segment, containing the hash, if not verbose. */
1494 rdm.sym_len -= 19;
1499 if (rdm.next > 0)
1500 print_str (&rdm, "::", 2);
1502 ident = parse_ident (&rdm);
1503 print_ident (&rdm, ident);
1505 while (rdm.next < rdm.sym_len);
1507 else
1509 demangle_path (&rdm, 1);
1511 /* Skip instantiating crate. */
1512 if (!rdm.errored && rdm.next < rdm.sym_len)
1514 rdm.skipping_printing = 1;
1515 demangle_path (&rdm, 0);
1518 /* It's an error to not reach the end. */
1519 rdm.errored |= rdm.next != rdm.sym_len;
1522 return !rdm.errored;
1525 /* Growable string buffers. */
1526 struct str_buf
1528 char *ptr;
1529 size_t len;
1530 size_t cap;
1531 int errored;
1534 static void
1535 str_buf_reserve (struct str_buf *buf, size_t extra)
1537 size_t available, min_new_cap, new_cap;
1538 char *new_ptr;
1540 /* Allocation failed before. */
1541 if (buf->errored)
1542 return;
1544 available = buf->cap - buf->len;
1546 if (extra <= available)
1547 return;
1549 min_new_cap = buf->cap + (extra - available);
1551 /* Check for overflows. */
1552 if (min_new_cap < buf->cap)
1554 buf->errored = 1;
1555 return;
1558 new_cap = buf->cap;
1560 if (new_cap == 0)
1561 new_cap = 4;
1563 /* Double capacity until sufficiently large. */
1564 while (new_cap < min_new_cap)
1566 new_cap *= 2;
1568 /* Check for overflows. */
1569 if (new_cap < buf->cap)
1571 buf->errored = 1;
1572 return;
1576 new_ptr = (char *)xrealloc (buf->ptr, new_cap);
1577 if (new_ptr == NULL)
1579 free (buf->ptr);
1580 buf->ptr = NULL;
1581 buf->len = 0;
1582 buf->cap = 0;
1583 buf->errored = 1;
1585 else
1587 buf->ptr = new_ptr;
1588 buf->cap = new_cap;
1592 static void
1593 str_buf_append (struct str_buf *buf, const char *data, size_t len)
1595 str_buf_reserve (buf, len);
1596 if (buf->errored)
1597 return;
1599 memcpy (buf->ptr + buf->len, data, len);
1600 buf->len += len;
1603 static void
1604 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1606 str_buf_append ((struct str_buf *)opaque, data, len);
1609 char *
1610 rust_demangle (const char *mangled, int options)
1612 struct str_buf out;
1613 int success;
1615 out.ptr = NULL;
1616 out.len = 0;
1617 out.cap = 0;
1618 out.errored = 0;
1620 success = rust_demangle_callback (mangled, options,
1621 str_buf_demangle_callback, &out);
1623 if (!success)
1625 free (out.ptr);
1626 return NULL;
1629 str_buf_append (&out, "\0", 1);
1630 return out.ptr;