struct / union in initializer, RFE #901.
[sdcc.git] / sdcc / support / sdbinutils / libiberty / rust-demangle.c
blob3b24d63892a9732f335860140e481c7bd696fe2f
1 /* Demangler for the Rust programming language
2 Copyright (C) 2016-2022 Free Software Foundation, Inc.
3 Written by David Tolnay (dtolnay@gmail.com).
4 Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
6 This file is part of the libiberty library.
7 Libiberty is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
12 In addition to the permissions in the GNU Library General Public
13 License, the Free Software Foundation gives you unlimited permission
14 to link the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any restriction
16 coming from the use of this file. (The Library Public License
17 restrictions do apply in other respects; for example, they cover
18 modification of the file, and distribution when not linked into a
19 combined executable.)
21 Libiberty is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 Library General Public License for more details.
26 You should have received a copy of the GNU Library General Public
27 License along with libiberty; see the file COPYING.LIB.
28 If not, see <http://www.gnu.org/licenses/>. */
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
35 #include "safe-ctype.h"
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include <stdlib.h>
43 #ifdef HAVE_STRING_H
44 #include <string.h>
45 #else
46 extern size_t strlen(const char *s);
47 extern int strncmp(const char *s1, const char *s2, size_t n);
48 extern void *memset(void *s, int c, size_t n);
49 #endif
51 #include <demangle.h>
52 #include "libiberty.h"
54 struct rust_demangler
56 const char *sym;
57 size_t sym_len;
59 void *callback_opaque;
60 demangle_callbackref callback;
62 /* Position of the next character to read from the symbol. */
63 size_t next;
65 /* Non-zero if any error occurred. */
66 int errored;
68 /* Non-zero if nothing should be printed. */
69 int skipping_printing;
71 /* Non-zero if printing should be verbose (e.g. include hashes). */
72 int verbose;
74 /* Rust mangling version, with legacy mangling being -1. */
75 int version;
77 /* Recursion depth. */
78 unsigned int recursion;
79 /* Maximum number of times demangle_path may be called recursively. */
80 #define RUST_MAX_RECURSION_COUNT 1024
81 #define RUST_NO_RECURSION_LIMIT ((unsigned int) -1)
83 uint64_t bound_lifetime_depth;
86 /* Parsing functions. */
88 static char
89 peek (const struct rust_demangler *rdm)
91 if (rdm->next < rdm->sym_len)
92 return rdm->sym[rdm->next];
93 return 0;
96 static int
97 eat (struct rust_demangler *rdm, char c)
99 if (peek (rdm) == c)
101 rdm->next++;
102 return 1;
104 else
105 return 0;
108 static char
109 next (struct rust_demangler *rdm)
111 char c = peek (rdm);
112 if (!c)
113 rdm->errored = 1;
114 else
115 rdm->next++;
116 return c;
119 static uint64_t
120 parse_integer_62 (struct rust_demangler *rdm)
122 char c;
123 uint64_t x;
125 if (eat (rdm, '_'))
126 return 0;
128 x = 0;
129 while (!eat (rdm, '_'))
131 c = next (rdm);
132 x *= 62;
133 if (ISDIGIT (c))
134 x += c - '0';
135 else if (ISLOWER (c))
136 x += 10 + (c - 'a');
137 else if (ISUPPER (c))
138 x += 10 + 26 + (c - 'A');
139 else
141 rdm->errored = 1;
142 return 0;
145 return x + 1;
148 static uint64_t
149 parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
151 if (!eat (rdm, tag))
152 return 0;
153 return 1 + parse_integer_62 (rdm);
156 static uint64_t
157 parse_disambiguator (struct rust_demangler *rdm)
159 return parse_opt_integer_62 (rdm, 's');
162 static size_t
163 parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
165 char c;
166 size_t hex_len;
168 hex_len = 0;
169 *value = 0;
171 while (!eat (rdm, '_'))
173 *value <<= 4;
175 c = next (rdm);
176 if (ISDIGIT (c))
177 *value |= c - '0';
178 else if (c >= 'a' && c <= 'f')
179 *value |= 10 + (c - 'a');
180 else
182 rdm->errored = 1;
183 return 0;
185 hex_len++;
188 return hex_len;
191 struct rust_mangled_ident
193 /* ASCII part of the identifier. */
194 const char *ascii;
195 size_t ascii_len;
197 /* Punycode insertion codes for Unicode codepoints, if any. */
198 const char *punycode;
199 size_t punycode_len;
202 static struct rust_mangled_ident
203 parse_ident (struct rust_demangler *rdm)
205 char c;
206 size_t start, len;
207 int is_punycode = 0;
208 struct rust_mangled_ident ident;
210 ident.ascii = NULL;
211 ident.ascii_len = 0;
212 ident.punycode = NULL;
213 ident.punycode_len = 0;
215 if (rdm->version != -1)
216 is_punycode = eat (rdm, 'u');
218 c = next (rdm);
219 if (!ISDIGIT (c))
221 rdm->errored = 1;
222 return ident;
224 len = c - '0';
226 if (c != '0')
227 while (ISDIGIT (peek (rdm)))
228 len = len * 10 + (next (rdm) - '0');
230 /* Skip past the optional `_` separator (v0). */
231 if (rdm->version != -1)
232 eat (rdm, '_');
234 start = rdm->next;
235 rdm->next += len;
236 /* Check for overflows. */
237 if ((start > rdm->next) || (rdm->next > rdm->sym_len))
239 rdm->errored = 1;
240 return ident;
243 ident.ascii = rdm->sym + start;
244 ident.ascii_len = len;
246 if (is_punycode)
248 ident.punycode_len = 0;
249 while (ident.ascii_len > 0)
251 ident.ascii_len--;
253 /* The last '_' is a separator between ascii & punycode. */
254 if (ident.ascii[ident.ascii_len] == '_')
255 break;
257 ident.punycode_len++;
259 if (!ident.punycode_len)
261 rdm->errored = 1;
262 return ident;
264 ident.punycode = ident.ascii + (len - ident.punycode_len);
267 if (ident.ascii_len == 0)
268 ident.ascii = NULL;
270 return ident;
273 /* Printing functions. */
275 static void
276 print_str (struct rust_demangler *rdm, const char *data, size_t len)
278 if (!rdm->errored && !rdm->skipping_printing)
279 rdm->callback (data, len, rdm->callback_opaque);
282 #define PRINT(s) print_str (rdm, s, strlen (s))
284 static void
285 print_uint64 (struct rust_demangler *rdm, uint64_t x)
287 char s[21];
288 snprintf (s, 21, "%" PRIu64, x);
289 PRINT (s);
292 static void
293 print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
295 char s[17];
296 snprintf (s, 17, "%" PRIx64, x);
297 PRINT (s);
300 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
301 static int
302 decode_lower_hex_nibble (char nibble)
304 if ('0' <= nibble && nibble <= '9')
305 return nibble - '0';
306 if ('a' <= nibble && nibble <= 'f')
307 return 0xa + (nibble - 'a');
308 return -1;
311 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
312 static char
313 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
315 char c = 0;
316 size_t escape_len = 0;
317 int lo_nibble = -1, hi_nibble = -1;
319 if (len < 3 || e[0] != '$')
320 return 0;
322 e++;
323 len--;
325 if (e[0] == 'C')
327 escape_len = 1;
329 c = ',';
331 else if (len > 2)
333 escape_len = 2;
335 if (e[0] == 'S' && e[1] == 'P')
336 c = '@';
337 else if (e[0] == 'B' && e[1] == 'P')
338 c = '*';
339 else if (e[0] == 'R' && e[1] == 'F')
340 c = '&';
341 else if (e[0] == 'L' && e[1] == 'T')
342 c = '<';
343 else if (e[0] == 'G' && e[1] == 'T')
344 c = '>';
345 else if (e[0] == 'L' && e[1] == 'P')
346 c = '(';
347 else if (e[0] == 'R' && e[1] == 'P')
348 c = ')';
349 else if (e[0] == 'u' && len > 3)
351 escape_len = 3;
353 hi_nibble = decode_lower_hex_nibble (e[1]);
354 if (hi_nibble < 0)
355 return 0;
356 lo_nibble = decode_lower_hex_nibble (e[2]);
357 if (lo_nibble < 0)
358 return 0;
360 /* Only allow non-control ASCII characters. */
361 if (hi_nibble > 7)
362 return 0;
363 c = (hi_nibble << 4) | lo_nibble;
364 if (c < 0x20)
365 return 0;
369 if (!c || len <= escape_len || e[escape_len] != '$')
370 return 0;
372 *out_len = 2 + escape_len;
373 return c;
376 static void
377 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
379 char unescaped;
380 uint8_t *out, *p, d;
381 size_t len, cap, punycode_pos, j;
382 /* Punycode parameters and state. */
383 uint32_t c;
384 size_t base, t_min, t_max, skew, damp, bias, i;
385 size_t delta, w, k, t;
387 if (rdm->errored || rdm->skipping_printing)
388 return;
390 if (rdm->version == -1)
392 /* Ignore leading underscores preceding escape sequences.
393 The mangler inserts an underscore to make sure the
394 identifier begins with a XID_Start character. */
395 if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
396 && ident.ascii[1] == '$')
398 ident.ascii++;
399 ident.ascii_len--;
402 while (ident.ascii_len > 0)
404 /* Handle legacy escape sequences ("$...$", ".." or "."). */
405 if (ident.ascii[0] == '$')
407 unescaped
408 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
409 if (unescaped)
410 print_str (rdm, &unescaped, 1);
411 else
413 /* Unexpected escape sequence, print the rest verbatim. */
414 print_str (rdm, ident.ascii, ident.ascii_len);
415 return;
418 else if (ident.ascii[0] == '.')
420 if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
422 /* ".." becomes "::" */
423 PRINT ("::");
424 len = 2;
426 else
428 PRINT (".");
429 len = 1;
432 else
434 /* Print everything before the next escape sequence, at once. */
435 for (len = 0; len < ident.ascii_len; len++)
436 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
437 break;
439 print_str (rdm, ident.ascii, len);
442 ident.ascii += len;
443 ident.ascii_len -= len;
446 return;
449 if (!ident.punycode)
451 print_str (rdm, ident.ascii, ident.ascii_len);
452 return;
455 len = 0;
456 cap = 4;
457 while (cap < ident.ascii_len)
459 cap *= 2;
460 /* Check for overflows. */
461 if ((cap * 4) / 4 != cap)
463 rdm->errored = 1;
464 return;
468 /* Store the output codepoints as groups of 4 UTF-8 bytes. */
469 out = (uint8_t *)malloc (cap * 4);
470 if (!out)
472 rdm->errored = 1;
473 return;
476 /* Populate initial output from ASCII fragment. */
477 for (len = 0; len < ident.ascii_len; len++)
479 p = out + 4 * len;
480 p[0] = 0;
481 p[1] = 0;
482 p[2] = 0;
483 p[3] = ident.ascii[len];
486 /* Punycode parameters and initial state. */
487 base = 36;
488 t_min = 1;
489 t_max = 26;
490 skew = 38;
491 damp = 700;
492 bias = 72;
493 i = 0;
494 c = 0x80;
496 punycode_pos = 0;
497 while (punycode_pos < ident.punycode_len)
499 /* Read one delta value. */
500 delta = 0;
501 w = 1;
502 k = 0;
505 k += base;
506 t = k < bias ? 0 : (k - bias);
507 if (t < t_min)
508 t = t_min;
509 if (t > t_max)
510 t = t_max;
512 if (punycode_pos >= ident.punycode_len)
513 goto cleanup;
514 d = ident.punycode[punycode_pos++];
516 if (ISLOWER (d))
517 d = d - 'a';
518 else if (ISDIGIT (d))
519 d = 26 + (d - '0');
520 else
522 rdm->errored = 1;
523 goto cleanup;
526 delta += d * w;
527 w *= base - t;
529 while (d >= t);
531 /* Compute the new insert position and character. */
532 len++;
533 i += delta;
534 c += i / len;
535 i %= len;
537 /* Ensure enough space is available. */
538 if (cap < len)
540 cap *= 2;
541 /* Check for overflows. */
542 if ((cap * 4) / 4 != cap || cap < len)
544 rdm->errored = 1;
545 goto cleanup;
548 p = (uint8_t *)realloc (out, cap * 4);
549 if (!p)
551 rdm->errored = 1;
552 goto cleanup;
554 out = p;
556 /* Move the characters after the insert position. */
557 p = out + i * 4;
558 memmove (p + 4, p, (len - i - 1) * 4);
560 /* Insert the new character, as UTF-8 bytes. */
561 p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
562 p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
563 p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
564 p[3] = 0x80 | (c & 0x3f);
566 /* If there are no more deltas, decoding is complete. */
567 if (punycode_pos == ident.punycode_len)
568 break;
570 i++;
572 /* Perform bias adaptation. */
573 delta /= damp;
574 damp = 2;
576 delta += delta / len;
577 k = 0;
578 while (delta > ((base - t_min) * t_max) / 2)
580 delta /= base - t_min;
581 k += base;
583 bias = k + ((base - t_min + 1) * delta) / (delta + skew);
586 /* Remove all the 0 bytes to leave behind an UTF-8 string. */
587 for (i = 0, j = 0; i < len * 4; i++)
588 if (out[i] != 0)
589 out[j++] = out[i];
591 print_str (rdm, (const char *)out, j);
593 cleanup:
594 free (out);
597 /* Print the lifetime according to the previously decoded index.
598 An index of `0` always refers to `'_`, but starting with `1`,
599 indices refer to late-bound lifetimes introduced by a binder. */
600 static void
601 print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
603 char c;
604 uint64_t depth;
606 PRINT ("'");
607 if (lt == 0)
609 PRINT ("_");
610 return;
613 depth = rdm->bound_lifetime_depth - lt;
614 /* Try to print lifetimes alphabetically first. */
615 if (depth < 26)
617 c = 'a' + depth;
618 print_str (rdm, &c, 1);
620 else
622 /* Use `'_123` after running out of letters. */
623 PRINT ("_");
624 print_uint64 (rdm, depth);
628 /* Demangling functions. */
630 static void demangle_binder (struct rust_demangler *rdm);
631 static void demangle_path (struct rust_demangler *rdm, int in_value);
632 static void demangle_generic_arg (struct rust_demangler *rdm);
633 static void demangle_type (struct rust_demangler *rdm);
634 static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
635 static void demangle_dyn_trait (struct rust_demangler *rdm);
636 static void demangle_const (struct rust_demangler *rdm);
637 static void demangle_const_uint (struct rust_demangler *rdm);
638 static void demangle_const_int (struct rust_demangler *rdm);
639 static void demangle_const_bool (struct rust_demangler *rdm);
640 static void demangle_const_char (struct rust_demangler *rdm);
642 /* Optionally enter a binder ('G') for late-bound lifetimes,
643 printing e.g. `for<'a, 'b> `, and make those lifetimes visible
644 to the caller (via depth level, which the caller should reset). */
645 static void
646 demangle_binder (struct rust_demangler *rdm)
648 uint64_t i, bound_lifetimes;
650 if (rdm->errored)
651 return;
653 bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
654 if (bound_lifetimes > 0)
656 PRINT ("for<");
657 for (i = 0; i < bound_lifetimes; i++)
659 if (i > 0)
660 PRINT (", ");
661 rdm->bound_lifetime_depth++;
662 print_lifetime_from_index (rdm, 1);
664 PRINT ("> ");
668 static void
669 demangle_path (struct rust_demangler *rdm, int in_value)
671 char tag, ns;
672 int was_skipping_printing;
673 size_t i, backref, old_next;
674 uint64_t dis;
675 struct rust_mangled_ident name;
677 if (rdm->errored)
678 return;
680 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
682 ++ rdm->recursion;
683 if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
684 /* FIXME: There ought to be a way to report
685 that the recursion limit has been reached. */
686 goto fail_return;
689 switch (tag = next (rdm))
691 case 'C':
692 dis = parse_disambiguator (rdm);
693 name = parse_ident (rdm);
695 print_ident (rdm, name);
696 if (rdm->verbose)
698 PRINT ("[");
699 print_uint64_hex (rdm, dis);
700 PRINT ("]");
702 break;
703 case 'N':
704 ns = next (rdm);
705 if (!ISLOWER (ns) && !ISUPPER (ns))
706 goto fail_return;
708 demangle_path (rdm, in_value);
710 dis = parse_disambiguator (rdm);
711 name = parse_ident (rdm);
713 if (ISUPPER (ns))
715 /* Special namespaces, like closures and shims. */
716 PRINT ("::{");
717 switch (ns)
719 case 'C':
720 PRINT ("closure");
721 break;
722 case 'S':
723 PRINT ("shim");
724 break;
725 default:
726 print_str (rdm, &ns, 1);
728 if (name.ascii || name.punycode)
730 PRINT (":");
731 print_ident (rdm, name);
733 PRINT ("#");
734 print_uint64 (rdm, dis);
735 PRINT ("}");
737 else
739 /* Implementation-specific/unspecified namespaces. */
741 if (name.ascii || name.punycode)
743 PRINT ("::");
744 print_ident (rdm, name);
747 break;
748 case 'M':
749 case 'X':
750 /* Ignore the `impl`'s own path.*/
751 parse_disambiguator (rdm);
752 was_skipping_printing = rdm->skipping_printing;
753 rdm->skipping_printing = 1;
754 demangle_path (rdm, in_value);
755 rdm->skipping_printing = was_skipping_printing;
756 /* fallthrough */
757 case 'Y':
758 PRINT ("<");
759 demangle_type (rdm);
760 if (tag != 'M')
762 PRINT (" as ");
763 demangle_path (rdm, 0);
765 PRINT (">");
766 break;
767 case 'I':
768 demangle_path (rdm, in_value);
769 if (in_value)
770 PRINT ("::");
771 PRINT ("<");
772 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
774 if (i > 0)
775 PRINT (", ");
776 demangle_generic_arg (rdm);
778 PRINT (">");
779 break;
780 case 'B':
781 backref = parse_integer_62 (rdm);
782 if (!rdm->skipping_printing)
784 old_next = rdm->next;
785 rdm->next = backref;
786 demangle_path (rdm, in_value);
787 rdm->next = old_next;
789 break;
790 default:
791 goto fail_return;
793 goto pass_return;
795 fail_return:
796 rdm->errored = 1;
797 pass_return:
798 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
799 -- rdm->recursion;
802 static void
803 demangle_generic_arg (struct rust_demangler *rdm)
805 uint64_t lt;
806 if (eat (rdm, 'L'))
808 lt = parse_integer_62 (rdm);
809 print_lifetime_from_index (rdm, lt);
811 else if (eat (rdm, 'K'))
812 demangle_const (rdm);
813 else
814 demangle_type (rdm);
817 static const char *
818 basic_type (char tag)
820 switch (tag)
822 case 'b':
823 return "bool";
824 case 'c':
825 return "char";
826 case 'e':
827 return "str";
828 case 'u':
829 return "()";
830 case 'a':
831 return "i8";
832 case 's':
833 return "i16";
834 case 'l':
835 return "i32";
836 case 'x':
837 return "i64";
838 case 'n':
839 return "i128";
840 case 'i':
841 return "isize";
842 case 'h':
843 return "u8";
844 case 't':
845 return "u16";
846 case 'm':
847 return "u32";
848 case 'y':
849 return "u64";
850 case 'o':
851 return "u128";
852 case 'j':
853 return "usize";
854 case 'f':
855 return "f32";
856 case 'd':
857 return "f64";
858 case 'z':
859 return "!";
860 case 'p':
861 return "_";
862 case 'v':
863 return "...";
865 default:
866 return NULL;
870 static void
871 demangle_type (struct rust_demangler *rdm)
873 char tag;
874 size_t i, old_next, backref;
875 uint64_t lt, old_bound_lifetime_depth;
876 const char *basic;
877 struct rust_mangled_ident abi;
879 if (rdm->errored)
880 return;
882 tag = next (rdm);
884 basic = basic_type (tag);
885 if (basic)
887 PRINT (basic);
888 return;
891 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
893 ++ rdm->recursion;
894 if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
895 /* FIXME: There ought to be a way to report
896 that the recursion limit has been reached. */
898 rdm->errored = 1;
899 -- rdm->recursion;
900 return;
904 switch (tag)
906 case 'R':
907 case 'Q':
908 PRINT ("&");
909 if (eat (rdm, 'L'))
911 lt = parse_integer_62 (rdm);
912 if (lt)
914 print_lifetime_from_index (rdm, lt);
915 PRINT (" ");
918 if (tag != 'R')
919 PRINT ("mut ");
920 demangle_type (rdm);
921 break;
922 case 'P':
923 case 'O':
924 PRINT ("*");
925 if (tag != 'P')
926 PRINT ("mut ");
927 else
928 PRINT ("const ");
929 demangle_type (rdm);
930 break;
931 case 'A':
932 case 'S':
933 PRINT ("[");
934 demangle_type (rdm);
935 if (tag == 'A')
937 PRINT ("; ");
938 demangle_const (rdm);
940 PRINT ("]");
941 break;
942 case 'T':
943 PRINT ("(");
944 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
946 if (i > 0)
947 PRINT (", ");
948 demangle_type (rdm);
950 if (i == 1)
951 PRINT (",");
952 PRINT (")");
953 break;
954 case 'F':
955 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
956 demangle_binder (rdm);
958 if (eat (rdm, 'U'))
959 PRINT ("unsafe ");
961 if (eat (rdm, 'K'))
963 if (eat (rdm, 'C'))
965 abi.ascii = "C";
966 abi.ascii_len = 1;
968 else
970 abi = parse_ident (rdm);
971 if (!abi.ascii || abi.punycode)
973 rdm->errored = 1;
974 goto restore;
978 PRINT ("extern \"");
980 /* If the ABI had any `-`, they were replaced with `_`,
981 so the parts between `_` have to be re-joined with `-`. */
982 for (i = 0; i < abi.ascii_len; i++)
984 if (abi.ascii[i] == '_')
986 print_str (rdm, abi.ascii, i);
987 PRINT ("-");
988 abi.ascii += i + 1;
989 abi.ascii_len -= i + 1;
990 i = 0;
993 print_str (rdm, abi.ascii, abi.ascii_len);
995 PRINT ("\" ");
998 PRINT ("fn(");
999 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1001 if (i > 0)
1002 PRINT (", ");
1003 demangle_type (rdm);
1005 PRINT (")");
1007 if (eat (rdm, 'u'))
1009 /* Skip printing the return type if it's 'u', i.e. `()`. */
1011 else
1013 PRINT (" -> ");
1014 demangle_type (rdm);
1017 /* Restore `bound_lifetime_depth` to outside the binder. */
1018 restore:
1019 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1020 break;
1021 case 'D':
1022 PRINT ("dyn ");
1024 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
1025 demangle_binder (rdm);
1027 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1029 if (i > 0)
1030 PRINT (" + ");
1031 demangle_dyn_trait (rdm);
1034 /* Restore `bound_lifetime_depth` to outside the binder. */
1035 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1037 if (!eat (rdm, 'L'))
1039 rdm->errored = 1;
1040 return;
1042 lt = parse_integer_62 (rdm);
1043 if (lt)
1045 PRINT (" + ");
1046 print_lifetime_from_index (rdm, lt);
1048 break;
1049 case 'B':
1050 backref = parse_integer_62 (rdm);
1051 if (!rdm->skipping_printing)
1053 old_next = rdm->next;
1054 rdm->next = backref;
1055 demangle_type (rdm);
1056 rdm->next = old_next;
1058 break;
1059 default:
1060 /* Go back to the tag, so `demangle_path` also sees it. */
1061 rdm->next--;
1062 demangle_path (rdm, 0);
1065 if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1066 -- rdm->recursion;
1069 /* A trait in a trait object may have some "existential projections"
1070 (i.e. associated type bindings) after it, which should be printed
1071 in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1072 To this end, this method will keep the `<...>` of an 'I' path
1073 open, by omitting the `>`, and return `Ok(true)` in that case. */
1074 static int
1075 demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1077 int open;
1078 size_t i, old_next, backref;
1080 open = 0;
1082 if (rdm->errored)
1083 return open;
1085 if (eat (rdm, 'B'))
1087 backref = parse_integer_62 (rdm);
1088 if (!rdm->skipping_printing)
1090 old_next = rdm->next;
1091 rdm->next = backref;
1092 open = demangle_path_maybe_open_generics (rdm);
1093 rdm->next = old_next;
1096 else if (eat (rdm, 'I'))
1098 demangle_path (rdm, 0);
1099 PRINT ("<");
1100 open = 1;
1101 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1103 if (i > 0)
1104 PRINT (", ");
1105 demangle_generic_arg (rdm);
1108 else
1109 demangle_path (rdm, 0);
1110 return open;
1113 static void
1114 demangle_dyn_trait (struct rust_demangler *rdm)
1116 int open;
1117 struct rust_mangled_ident name;
1119 if (rdm->errored)
1120 return;
1122 open = demangle_path_maybe_open_generics (rdm);
1124 while (eat (rdm, 'p'))
1126 if (!open)
1127 PRINT ("<");
1128 else
1129 PRINT (", ");
1130 open = 1;
1132 name = parse_ident (rdm);
1133 print_ident (rdm, name);
1134 PRINT (" = ");
1135 demangle_type (rdm);
1138 if (open)
1139 PRINT (">");
1142 static void
1143 demangle_const (struct rust_demangler *rdm)
1145 char ty_tag;
1146 size_t old_next, backref;
1148 if (rdm->errored)
1149 return;
1151 if (eat (rdm, 'B'))
1153 backref = parse_integer_62 (rdm);
1154 if (!rdm->skipping_printing)
1156 old_next = rdm->next;
1157 rdm->next = backref;
1158 demangle_const (rdm);
1159 rdm->next = old_next;
1161 return;
1164 ty_tag = next (rdm);
1165 switch (ty_tag)
1167 /* Placeholder. */
1168 case 'p':
1169 PRINT ("_");
1170 return;
1172 /* Unsigned integer types. */
1173 case 'h':
1174 case 't':
1175 case 'm':
1176 case 'y':
1177 case 'o':
1178 case 'j':
1179 demangle_const_uint (rdm);
1180 break;
1182 /* Signed integer types. */
1183 case 'a':
1184 case 's':
1185 case 'l':
1186 case 'x':
1187 case 'n':
1188 case 'i':
1189 demangle_const_int (rdm);
1190 break;
1192 /* Boolean. */
1193 case 'b':
1194 demangle_const_bool (rdm);
1195 break;
1197 /* Character. */
1198 case 'c':
1199 demangle_const_char (rdm);
1200 break;
1202 default:
1203 rdm->errored = 1;
1204 return;
1207 if (rdm->errored)
1208 return;
1210 if (rdm->verbose)
1212 PRINT (": ");
1213 PRINT (basic_type (ty_tag));
1217 static void
1218 demangle_const_uint (struct rust_demangler *rdm)
1220 size_t hex_len;
1221 uint64_t value;
1223 if (rdm->errored)
1224 return;
1226 hex_len = parse_hex_nibbles (rdm, &value);
1228 if (hex_len > 16)
1230 /* Print anything that doesn't fit in `uint64_t` verbatim. */
1231 PRINT ("0x");
1232 print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1234 else if (hex_len > 0)
1235 print_uint64 (rdm, value);
1236 else
1237 rdm->errored = 1;
1240 static void
1241 demangle_const_int (struct rust_demangler *rdm)
1243 if (eat (rdm, 'n'))
1244 PRINT ("-");
1245 demangle_const_uint (rdm);
1248 static void
1249 demangle_const_bool (struct rust_demangler *rdm)
1251 uint64_t value;
1253 if (parse_hex_nibbles (rdm, &value) != 1)
1255 rdm->errored = 1;
1256 return;
1259 if (value == 0)
1260 PRINT ("false");
1261 else if (value == 1)
1262 PRINT ("true");
1263 else
1264 rdm->errored = 1;
1267 static void
1268 demangle_const_char (struct rust_demangler *rdm)
1270 size_t hex_len;
1271 uint64_t value;
1273 hex_len = parse_hex_nibbles (rdm, &value);
1275 if (hex_len == 0 || hex_len > 8)
1277 rdm->errored = 1;
1278 return;
1281 /* Match Rust's character "debug" output as best as we can. */
1282 PRINT ("'");
1283 if (value == '\t')
1284 PRINT ("\\t");
1285 else if (value == '\r')
1286 PRINT ("\\r");
1287 else if (value == '\n')
1288 PRINT ("\\n");
1289 else if (value > ' ' && value < '~')
1291 /* Rust also considers many non-ASCII codepoints to be printable, but
1292 that logic is not easily ported to C. */
1293 char c = value;
1294 print_str (rdm, &c, 1);
1296 else
1298 PRINT ("\\u{");
1299 print_uint64_hex (rdm, value);
1300 PRINT ("}");
1302 PRINT ("'");
1305 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1306 The hex digits must contain at least 5 distinct digits. */
1307 static int
1308 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1310 uint16_t seen;
1311 int nibble;
1312 size_t i, count;
1314 if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1315 return 0;
1317 seen = 0;
1318 for (i = 0; i < 16; i++)
1320 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1321 if (nibble < 0)
1322 return 0;
1323 seen |= (uint16_t)1 << nibble;
1326 /* Count how many distinct digits were seen. */
1327 count = 0;
1328 while (seen)
1330 if (seen & 1)
1331 count++;
1332 seen >>= 1;
1335 return count >= 5;
1339 rust_demangle_callback (const char *mangled, int options,
1340 demangle_callbackref callback, void *opaque)
1342 const char *p;
1343 struct rust_demangler rdm;
1344 struct rust_mangled_ident ident;
1346 rdm.sym = mangled;
1347 rdm.sym_len = 0;
1349 rdm.callback_opaque = opaque;
1350 rdm.callback = callback;
1352 rdm.next = 0;
1353 rdm.errored = 0;
1354 rdm.skipping_printing = 0;
1355 rdm.verbose = (options & DMGL_VERBOSE) != 0;
1356 rdm.version = 0;
1357 rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0;
1358 rdm.bound_lifetime_depth = 0;
1360 /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1361 if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1362 rdm.sym += 2;
1363 else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1365 rdm.sym += 3;
1366 rdm.version = -1;
1368 else
1369 return 0;
1371 /* Paths (v0) always start with uppercase characters. */
1372 if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1373 return 0;
1375 /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1376 for (p = rdm.sym; *p; p++)
1378 rdm.sym_len++;
1380 if (*p == '_' || ISALNUM (*p))
1381 continue;
1383 /* Legacy Rust symbols can also contain [.:$] characters. */
1384 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
1385 continue;
1387 return 0;
1390 /* Legacy Rust symbols need to be handled separately. */
1391 if (rdm.version == -1)
1393 /* Legacy Rust symbols always end with E. */
1394 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1395 return 0;
1396 rdm.sym_len--;
1398 /* Legacy Rust symbols also always end with a path segment
1399 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1400 This early check, before any parse_ident calls, should
1401 quickly filter out most C++ symbols unrelated to Rust. */
1402 if (!(rdm.sym_len > 19
1403 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1404 return 0;
1408 ident = parse_ident (&rdm);
1409 if (rdm.errored || !ident.ascii)
1410 return 0;
1412 while (rdm.next < rdm.sym_len);
1414 /* The last path segment should be the hash. */
1415 if (!is_legacy_prefixed_hash (ident))
1416 return 0;
1418 /* Reset the state for a second pass, to print the symbol. */
1419 rdm.next = 0;
1420 if (!rdm.verbose && rdm.sym_len > 19)
1422 /* Hide the last segment, containing the hash, if not verbose. */
1423 rdm.sym_len -= 19;
1428 if (rdm.next > 0)
1429 print_str (&rdm, "::", 2);
1431 ident = parse_ident (&rdm);
1432 print_ident (&rdm, ident);
1434 while (rdm.next < rdm.sym_len);
1436 else
1438 demangle_path (&rdm, 1);
1440 /* Skip instantiating crate. */
1441 if (!rdm.errored && rdm.next < rdm.sym_len)
1443 rdm.skipping_printing = 1;
1444 demangle_path (&rdm, 0);
1447 /* It's an error to not reach the end. */
1448 rdm.errored |= rdm.next != rdm.sym_len;
1451 return !rdm.errored;
1454 /* Growable string buffers. */
1455 struct str_buf
1457 char *ptr;
1458 size_t len;
1459 size_t cap;
1460 int errored;
1463 static void
1464 str_buf_reserve (struct str_buf *buf, size_t extra)
1466 size_t available, min_new_cap, new_cap;
1467 char *new_ptr;
1469 /* Allocation failed before. */
1470 if (buf->errored)
1471 return;
1473 available = buf->cap - buf->len;
1475 if (extra <= available)
1476 return;
1478 min_new_cap = buf->cap + (extra - available);
1480 /* Check for overflows. */
1481 if (min_new_cap < buf->cap)
1483 buf->errored = 1;
1484 return;
1487 new_cap = buf->cap;
1489 if (new_cap == 0)
1490 new_cap = 4;
1492 /* Double capacity until sufficiently large. */
1493 while (new_cap < min_new_cap)
1495 new_cap *= 2;
1497 /* Check for overflows. */
1498 if (new_cap < buf->cap)
1500 buf->errored = 1;
1501 return;
1505 new_ptr = (char *)realloc (buf->ptr, new_cap);
1506 if (new_ptr == NULL)
1508 free (buf->ptr);
1509 buf->ptr = NULL;
1510 buf->len = 0;
1511 buf->cap = 0;
1512 buf->errored = 1;
1514 else
1516 buf->ptr = new_ptr;
1517 buf->cap = new_cap;
1521 static void
1522 str_buf_append (struct str_buf *buf, const char *data, size_t len)
1524 str_buf_reserve (buf, len);
1525 if (buf->errored)
1526 return;
1528 memcpy (buf->ptr + buf->len, data, len);
1529 buf->len += len;
1532 static void
1533 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1535 str_buf_append ((struct str_buf *)opaque, data, len);
1538 char *
1539 rust_demangle (const char *mangled, int options)
1541 struct str_buf out;
1542 int success;
1544 out.ptr = NULL;
1545 out.len = 0;
1546 out.cap = 0;
1547 out.errored = 0;
1549 success = rust_demangle_callback (mangled, options,
1550 str_buf_demangle_callback, &out);
1552 if (!success)
1554 free (out.ptr);
1555 return NULL;
1558 str_buf_append (&out, "\0", 1);
1559 return out.ptr;