1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringViewExtras.h"
16 #include "llvm/Demangle/Utility.h"
23 #include <string_view>
27 using llvm::itanium_demangle::OutputBuffer
;
28 using llvm::itanium_demangle::ScopedOverride
;
29 using llvm::itanium_demangle::starts_with
;
34 std::string_view Name
;
37 bool empty() const { return Name
.empty(); }
40 enum class BasicType
{
69 enum class LeaveGenericsOpen
{
75 // Maximum recursion level. Used to avoid stack overflow.
76 size_t MaxRecursionLevel
;
77 // Current recursion level.
78 size_t RecursionLevel
;
79 size_t BoundLifetimes
;
80 // Input string that is being demangled with "_R" prefix removed.
81 std::string_view Input
;
82 // Position in the input string.
84 // When true, print methods append the output to the stream.
85 // When false, the output is suppressed.
87 // True if an error occurred.
94 Demangler(size_t MaxRecursionLevel
= 500);
96 bool demangle(std::string_view MangledName
);
99 bool demanglePath(IsInType Type
,
100 LeaveGenericsOpen LeaveOpen
= LeaveGenericsOpen::No
);
101 void demangleImplPath(IsInType InType
);
102 void demangleGenericArg();
104 void demangleFnSig();
105 void demangleDynBounds();
106 void demangleDynTrait();
107 void demangleOptionalBinder();
108 void demangleConst();
109 void demangleConstInt();
110 void demangleConstBool();
111 void demangleConstChar();
113 template <typename Callable
> void demangleBackref(Callable Demangler
) {
114 uint64_t Backref
= parseBase62Number();
115 if (Error
|| Backref
>= Position
) {
123 ScopedOverride
<size_t> SavePosition(Position
, Position
);
128 Identifier
parseIdentifier();
129 uint64_t parseOptionalBase62Number(char Tag
);
130 uint64_t parseBase62Number();
131 uint64_t parseDecimalNumber();
132 uint64_t parseHexNumber(std::string_view
&HexDigits
);
135 void print(std::string_view S
);
136 void printDecimalNumber(uint64_t N
);
137 void printBasicType(BasicType
);
138 void printLifetime(uint64_t Index
);
139 void printIdentifier(Identifier Ident
);
143 bool consumeIf(char Prefix
);
145 bool addAssign(uint64_t &A
, uint64_t B
);
146 bool mulAssign(uint64_t &A
, uint64_t B
);
151 char *llvm::rustDemangle(std::string_view MangledName
) {
152 // Return early if mangled name doesn't look like a Rust symbol.
153 if (MangledName
.empty() || !starts_with(MangledName
, "_R"))
157 if (!D
.demangle(MangledName
)) {
158 std::free(D
.Output
.getBuffer());
164 return D
.Output
.getBuffer();
167 Demangler::Demangler(size_t MaxRecursionLevel
)
168 : MaxRecursionLevel(MaxRecursionLevel
) {}
170 static inline bool isDigit(const char C
) { return '0' <= C
&& C
<= '9'; }
172 static inline bool isHexDigit(const char C
) {
173 return ('0' <= C
&& C
<= '9') || ('a' <= C
&& C
<= 'f');
176 static inline bool isLower(const char C
) { return 'a' <= C
&& C
<= 'z'; }
178 static inline bool isUpper(const char C
) { return 'A' <= C
&& C
<= 'Z'; }
180 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
181 static inline bool isValid(const char C
) {
182 return isDigit(C
) || isLower(C
) || isUpper(C
) || C
== '_';
185 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
186 // otherwise. The demangled symbol is stored in Output field. It is
187 // responsibility of the caller to free the memory behind the output stream.
189 // <symbol-name> = "_R" <path> [<instantiating-crate>]
190 bool Demangler::demangle(std::string_view Mangled
) {
197 if (!starts_with(Mangled
, "_R")) {
201 Mangled
.remove_prefix(2);
202 size_t Dot
= Mangled
.find('.');
203 Input
= Dot
== std::string_view::npos
? Mangled
: Mangled
.substr(0, Dot
);
205 demanglePath(IsInType::No
);
207 if (Position
!= Input
.size()) {
208 ScopedOverride
<bool> SavePrint(Print
, false);
209 demanglePath(IsInType::No
);
212 if (Position
!= Input
.size())
215 if (Dot
!= std::string_view::npos
) {
217 print(Mangled
.substr(Dot
));
224 // Demangles a path. InType indicates whether a path is inside a type. When
225 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
226 // output. Return value indicates whether generics arguments have been left
229 // <path> = "C" <identifier> // crate root
230 // | "M" <impl-path> <type> // <T> (inherent impl)
231 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
232 // | "Y" <type> <path> // <T as Trait> (trait definition)
233 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
234 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
236 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
237 // <ns> = "C" // closure
239 // | <A-Z> // other special namespaces
240 // | <a-z> // internal namespaces
241 bool Demangler::demanglePath(IsInType InType
, LeaveGenericsOpen LeaveOpen
) {
242 if (Error
|| RecursionLevel
>= MaxRecursionLevel
) {
246 ScopedOverride
<size_t> SaveRecursionLevel(RecursionLevel
, RecursionLevel
+ 1);
250 parseOptionalBase62Number('s');
251 printIdentifier(parseIdentifier());
255 demangleImplPath(InType
);
262 demangleImplPath(InType
);
266 demanglePath(IsInType::Yes
);
274 demanglePath(IsInType::Yes
);
280 if (!isLower(NS
) && !isUpper(NS
)) {
284 demanglePath(InType
);
286 uint64_t Disambiguator
= parseOptionalBase62Number('s');
287 Identifier Ident
= parseIdentifier();
290 // Special namespaces
298 if (!Ident
.empty()) {
300 printIdentifier(Ident
);
303 printDecimalNumber(Disambiguator
);
306 // Implementation internal namespaces.
307 if (!Ident
.empty()) {
309 printIdentifier(Ident
);
315 demanglePath(InType
);
316 // Omit "::" when in a type, where it is optional.
317 if (InType
== IsInType::No
)
320 for (size_t I
= 0; !Error
&& !consumeIf('E'); ++I
) {
323 demangleGenericArg();
325 if (LeaveOpen
== LeaveGenericsOpen::Yes
)
333 demangleBackref([&] { IsOpen
= demanglePath(InType
, LeaveOpen
); });
344 // <impl-path> = [<disambiguator>] <path>
345 // <disambiguator> = "s" <base-62-number>
346 void Demangler::demangleImplPath(IsInType InType
) {
347 ScopedOverride
<bool> SavePrint(Print
, false);
348 parseOptionalBase62Number('s');
349 demanglePath(InType
);
352 // <generic-arg> = <lifetime>
355 // <lifetime> = "L" <base-62-number>
356 void Demangler::demangleGenericArg() {
358 printLifetime(parseBase62Number());
359 else if (consumeIf('K'))
365 // <basic-type> = "a" // i8
385 // | "p" // placeholder (e.g. for generic params), shown as _
386 static bool parseBasicType(char C
, BasicType
&Type
) {
389 Type
= BasicType::I8
;
392 Type
= BasicType::Bool
;
395 Type
= BasicType::Char
;
398 Type
= BasicType::F64
;
401 Type
= BasicType::Str
;
404 Type
= BasicType::F32
;
407 Type
= BasicType::U8
;
410 Type
= BasicType::ISize
;
413 Type
= BasicType::USize
;
416 Type
= BasicType::I32
;
419 Type
= BasicType::U32
;
422 Type
= BasicType::I128
;
425 Type
= BasicType::U128
;
428 Type
= BasicType::Placeholder
;
431 Type
= BasicType::I16
;
434 Type
= BasicType::U16
;
437 Type
= BasicType::Unit
;
440 Type
= BasicType::Variadic
;
443 Type
= BasicType::I64
;
446 Type
= BasicType::U64
;
449 Type
= BasicType::Never
;
456 void Demangler::printBasicType(BasicType Type
) {
458 case BasicType::Bool
:
461 case BasicType::Char
:
476 case BasicType::I128
:
479 case BasicType::ISize
:
494 case BasicType::U128
:
497 case BasicType::USize
:
509 case BasicType::Placeholder
:
512 case BasicType::Unit
:
515 case BasicType::Variadic
:
518 case BasicType::Never
:
524 // <type> = | <basic-type>
525 // | <path> // named type
526 // | "A" <type> <const> // [T; N]
527 // | "S" <type> // [T]
528 // | "T" {<type>} "E" // (T1, T2, T3, ...)
529 // | "R" [<lifetime>] <type> // &T
530 // | "Q" [<lifetime>] <type> // &mut T
531 // | "P" <type> // *const T
532 // | "O" <type> // *mut T
533 // | "F" <fn-sig> // fn(...) -> ...
534 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
535 // | <backref> // backref
536 void Demangler::demangleType() {
537 if (Error
|| RecursionLevel
>= MaxRecursionLevel
) {
541 ScopedOverride
<size_t> SaveRecursionLevel(RecursionLevel
, RecursionLevel
+ 1);
543 size_t Start
= Position
;
546 if (parseBasicType(C
, Type
))
547 return printBasicType(Type
);
565 for (; !Error
&& !consumeIf('E'); ++I
) {
578 if (consumeIf('L')) {
579 if (auto Lifetime
= parseBase62Number()) {
580 printLifetime(Lifetime
);
601 if (consumeIf('L')) {
602 if (auto Lifetime
= parseBase62Number()) {
604 printLifetime(Lifetime
);
611 demangleBackref([&] { demangleType(); });
615 demanglePath(IsInType::Yes
);
620 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
622 // | <undisambiguated-identifier>
623 void Demangler::demangleFnSig() {
624 ScopedOverride
<size_t> SaveBoundLifetimes(BoundLifetimes
, BoundLifetimes
);
625 demangleOptionalBinder();
630 if (consumeIf('K')) {
632 if (consumeIf('C')) {
635 Identifier Ident
= parseIdentifier();
638 for (char C
: Ident
.Name
) {
639 // When mangling ABI string, the "-" is replaced with "_".
649 for (size_t I
= 0; !Error
&& !consumeIf('E'); ++I
) {
656 if (consumeIf('u')) {
657 // Skip the unit type from the output.
664 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
665 void Demangler::demangleDynBounds() {
666 ScopedOverride
<size_t> SaveBoundLifetimes(BoundLifetimes
, BoundLifetimes
);
668 demangleOptionalBinder();
669 for (size_t I
= 0; !Error
&& !consumeIf('E'); ++I
) {
676 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
677 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
678 void Demangler::demangleDynTrait() {
679 bool IsOpen
= demanglePath(IsInType::Yes
, LeaveGenericsOpen::Yes
);
680 while (!Error
&& consumeIf('p')) {
687 print(parseIdentifier().Name
);
695 // Demangles optional binder and updates the number of bound lifetimes.
697 // <binder> = "G" <base-62-number>
698 void Demangler::demangleOptionalBinder() {
699 uint64_t Binder
= parseOptionalBase62Number('G');
700 if (Error
|| Binder
== 0)
703 // In valid inputs each bound lifetime is referenced later. Referencing a
704 // lifetime requires at least one byte of input. Reject inputs that are too
705 // short to reference all bound lifetimes. Otherwise demangling of invalid
706 // binders could generate excessive amounts of output.
707 if (Binder
>= Input
.size() - BoundLifetimes
) {
713 for (size_t I
= 0; I
!= Binder
; ++I
) {
722 // <const> = <basic-type> <const-data>
723 // | "p" // placeholder
725 void Demangler::demangleConst() {
726 if (Error
|| RecursionLevel
>= MaxRecursionLevel
) {
730 ScopedOverride
<size_t> SaveRecursionLevel(RecursionLevel
, RecursionLevel
+ 1);
734 if (parseBasicType(C
, Type
)) {
740 case BasicType::I128
:
741 case BasicType::ISize
:
746 case BasicType::U128
:
747 case BasicType::USize
:
750 case BasicType::Bool
:
753 case BasicType::Char
:
756 case BasicType::Placeholder
:
763 } else if (C
== 'B') {
764 demangleBackref([&] { demangleConst(); });
770 // <const-data> = ["n"] <hex-number>
771 void Demangler::demangleConstInt() {
775 std::string_view HexDigits
;
776 uint64_t Value
= parseHexNumber(HexDigits
);
777 if (HexDigits
.size() <= 16) {
778 printDecimalNumber(Value
);
785 // <const-data> = "0_" // false
787 void Demangler::demangleConstBool() {
788 std::string_view HexDigits
;
789 parseHexNumber(HexDigits
);
790 if (HexDigits
== "0")
792 else if (HexDigits
== "1")
798 /// Returns true if CodePoint represents a printable ASCII character.
799 static bool isAsciiPrintable(uint64_t CodePoint
) {
800 return 0x20 <= CodePoint
&& CodePoint
<= 0x7e;
803 // <const-data> = <hex-number>
804 void Demangler::demangleConstChar() {
805 std::string_view HexDigits
;
806 uint64_t CodePoint
= parseHexNumber(HexDigits
);
807 if (Error
|| HexDigits
.size() > 6) {
833 if (isAsciiPrintable(CodePoint)) {
846 // <undisambiguated-identifier> = ["u
"] <decimal-number> ["_
"] <bytes>
847 Identifier Demangler::parseIdentifier() {
848 bool Punycode = consumeIf('u');
849 uint64_t Bytes = parseDecimalNumber();
851 // Underscore resolves the ambiguity when identifier starts with a decimal
852 // digit or another underscore.
855 if (Error || Bytes > Input.size() - Position) {
859 std::string_view S = Input.substr(Position, Bytes);
862 if (!std::all_of(S.begin(), S.end(), isValid)) {
867 return {S, Punycode};
870 // Parses optional base 62 number. The presence of a number is determined using
871 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
873 // This function is indended for parsing disambiguators and binders which when
874 // not present have their value interpreted as 0, and otherwise as decoded
875 // value + 1. For example for binders, value for "G_
" is 1, for "G0_
" value is
876 // 2. When "G
" is absent value is 0.
877 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
881 uint64_t N = parseBase62Number();
882 if (Error || !addAssign(N, 1))
888 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
889 // "_
". All values are offset by 1, so that "_
" encodes 0, "0_
" encodes 1,
890 // "1_
" encodes 2, etc.
892 // <base-62-number> = {<0-9a-zA-Z>} "_
"
893 uint64_t Demangler::parseBase62Number() {
905 } else if (isDigit(C)) {
907 } else if (isLower(C)) {
908 Digit = 10 + (C - 'a');
909 } else if (isUpper(C)) {
910 Digit = 10 + 26 + (C - 'A');
916 if (!mulAssign(Value, 62))
919 if (!addAssign(Value, Digit))
923 if (!addAssign(Value, 1))
929 // Parses a decimal number that had been encoded without any leading zeros.
931 // <decimal-number> = "0"
933 uint64_t Demangler::parseDecimalNumber() {
947 while (isDigit(look())) {
948 if (!mulAssign(Value, 10)) {
953 uint64_t D = consume() - '0';
954 if (!addAssign(Value, D))
961 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
962 // value and stores hex digits in HexDigits. The return value is unspecified if
963 // HexDigits.size() > 16.
965 // <hex-number> = "0_
"
966 // | <1-9a-f> {<0-9a-f>} "_
"
967 uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
968 size_t Start = Position;
971 if (!isHexDigit(look()))
974 if (consumeIf('0')) {
978 while (!Error && !consumeIf('_')) {
983 else if ('a' <= C && C <= 'f')
984 Value += 10 + (C - 'a');
991 HexDigits = std::string_view();
995 size_t End = Position - 1;
997 HexDigits = Input.substr(Start, End - Start);
1001 void Demangler::print(char C) {
1002 if (Error || !Print)
1008 void Demangler::print(std::string_view S) {
1009 if (Error || !Print)
1015 void Demangler::printDecimalNumber(uint64_t N) {
1016 if (Error || !Print)
1022 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1023 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1024 // bound by one of the enclosing binders.
1025 void Demangler::printLifetime(uint64_t Index) {
1031 if (Index - 1 >= BoundLifetimes) {
1036 uint64_t Depth = BoundLifetimes - Index;
1039 char C = 'a
' + Depth;
1043 printDecimalNumber(Depth - 26 + 1);
1047 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1054 Value = 26 + (C - '0');
1061 static void removeNullBytes(OutputBuffer
&Output
, size_t StartIdx
) {
1062 char *Buffer
= Output
.getBuffer();
1063 char *Start
= Buffer
+ StartIdx
;
1064 char *End
= Buffer
+ Output
.getCurrentPosition();
1065 Output
.setCurrentPosition(std::remove(Start
, End
, '\0') - Buffer
);
1068 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1069 // CodePoint is not a valid unicode scalar value.
1070 static inline bool encodeUTF8(size_t CodePoint
, char *Output
) {
1071 if (0xD800 <= CodePoint
&& CodePoint
<= 0xDFFF)
1074 if (CodePoint
<= 0x7F) {
1075 Output
[0] = CodePoint
;
1079 if (CodePoint
<= 0x7FF) {
1080 Output
[0] = 0xC0 | ((CodePoint
>> 6) & 0x3F);
1081 Output
[1] = 0x80 | (CodePoint
& 0x3F);
1085 if (CodePoint
<= 0xFFFF) {
1086 Output
[0] = 0xE0 | (CodePoint
>> 12);
1087 Output
[1] = 0x80 | ((CodePoint
>> 6) & 0x3F);
1088 Output
[2] = 0x80 | (CodePoint
& 0x3F);
1092 if (CodePoint
<= 0x10FFFF) {
1093 Output
[0] = 0xF0 | (CodePoint
>> 18);
1094 Output
[1] = 0x80 | ((CodePoint
>> 12) & 0x3F);
1095 Output
[2] = 0x80 | ((CodePoint
>> 6) & 0x3F);
1096 Output
[3] = 0x80 | (CodePoint
& 0x3F);
1103 // Decodes string encoded using punycode and appends results to Output.
1104 // Returns true if decoding was successful.
1105 static bool decodePunycode(std::string_view Input
, OutputBuffer
&Output
) {
1106 size_t OutputSize
= Output
.getCurrentPosition();
1107 size_t InputIdx
= 0;
1109 // Rust uses an underscore as a delimiter.
1110 size_t DelimiterPos
= std::string_view::npos
;
1111 for (size_t I
= 0; I
!= Input
.size(); ++I
)
1112 if (Input
[I
] == '_')
1115 if (DelimiterPos
!= std::string_view::npos
) {
1116 // Copy basic code points before the last delimiter to the output.
1117 for (; InputIdx
!= DelimiterPos
; ++InputIdx
) {
1118 char C
= Input
[InputIdx
];
1121 // Code points are padded with zeros while decoding is in progress.
1123 Output
+= std::string_view(UTF8
, 4);
1125 // Skip over the delimiter.
1137 auto Adapt
= [&](size_t Delta
, size_t NumPoints
) {
1139 Delta
+= Delta
/ NumPoints
;
1143 while (Delta
> (Base
- TMin
) * TMax
/ 2) {
1144 Delta
/= Base
- TMin
;
1147 return K
+ (((Base
- TMin
+ 1) * Delta
) / (Delta
+ Skew
));
1150 // Main decoding loop.
1151 for (size_t I
= 0; InputIdx
!= Input
.size(); I
+= 1) {
1154 size_t Max
= std::numeric_limits
<size_t>::max();
1155 for (size_t K
= Base
; true; K
+= Base
) {
1156 if (InputIdx
== Input
.size())
1158 char C
= Input
[InputIdx
++];
1160 if (!decodePunycodeDigit(C
, Digit
))
1163 if (Digit
> (Max
- I
) / W
)
1170 else if (K
>= Bias
+ TMax
)
1178 if (W
> Max
/ (Base
- T
))
1182 size_t NumPoints
= (Output
.getCurrentPosition() - OutputSize
) / 4 + 1;
1183 Bias
= Adapt(I
- OldI
, NumPoints
);
1185 if (I
/ NumPoints
> Max
- N
)
1190 // Insert N at position I in the output.
1192 if (!encodeUTF8(N
, UTF8
))
1194 Output
.insert(OutputSize
+ I
* 4, UTF8
, 4);
1197 removeNullBytes(Output
, OutputSize
);
1201 void Demangler::printIdentifier(Identifier Ident
) {
1202 if (Error
|| !Print
)
1205 if (Ident
.Punycode
) {
1206 if (!decodePunycode(Ident
.Name
, Output
))
1213 char Demangler::look() const {
1214 if (Error
|| Position
>= Input
.size())
1217 return Input
[Position
];
1220 char Demangler::consume() {
1221 if (Error
|| Position
>= Input
.size()) {
1226 return Input
[Position
++];
1229 bool Demangler::consumeIf(char Prefix
) {
1230 if (Error
|| Position
>= Input
.size() || Input
[Position
] != Prefix
)
1237 /// Computes A + B. When computation wraps around sets the error and returns
1238 /// false. Otherwise assigns the result to A and returns true.
1239 bool Demangler::addAssign(uint64_t &A
, uint64_t B
) {
1240 if (A
> std::numeric_limits
<uint64_t>::max() - B
) {
1249 /// Computes A * B. When computation wraps around sets the error and returns
1250 /// false. Otherwise assigns the result to A and returns true.
1251 bool Demangler::mulAssign(uint64_t &A
, uint64_t B
) {
1252 if (B
!= 0 && A
> std::numeric_limits
<uint64_t>::max() / B
) {