1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringView.h"
16 #include "llvm/Demangle/Utility.h"
26 using llvm::itanium_demangle::OutputStream
;
27 using llvm::itanium_demangle::StringView
;
28 using llvm::itanium_demangle::SwapAndRestore
;
36 bool empty() const { return Name
.empty(); }
39 enum class BasicType
{
68 enum class LeaveGenericsOpen
{
74 // Maximum recursion level. Used to avoid stack overflow.
75 size_t MaxRecursionLevel
;
76 // Current recursion level.
77 size_t RecursionLevel
;
78 size_t BoundLifetimes
;
79 // Input string that is being demangled with "_R" prefix removed.
81 // Position in the input string.
83 // When true, print methods append the output to the stream.
84 // When false, the output is suppressed.
86 // True if an error occurred.
93 Demangler(size_t MaxRecursionLevel
= 500);
95 bool demangle(StringView MangledName
);
98 bool demanglePath(IsInType Type
,
99 LeaveGenericsOpen LeaveOpen
= LeaveGenericsOpen::No
);
100 void demangleImplPath(IsInType InType
);
101 void demangleGenericArg();
103 void demangleFnSig();
104 void demangleDynBounds();
105 void demangleDynTrait();
106 void demangleOptionalBinder();
107 void demangleConst();
108 void demangleConstInt();
109 void demangleConstBool();
110 void demangleConstChar();
112 template <typename Callable
> void demangleBackref(Callable Demangler
) {
113 uint64_t Backref
= parseBase62Number();
114 if (Error
|| Backref
>= Position
) {
122 SwapAndRestore
<size_t> SavePosition(Position
, Position
);
127 Identifier
parseIdentifier();
128 uint64_t parseOptionalBase62Number(char Tag
);
129 uint64_t parseBase62Number();
130 uint64_t parseDecimalNumber();
131 uint64_t parseHexNumber(StringView
&HexDigits
);
134 void print(StringView S
);
135 void printDecimalNumber(uint64_t N
);
136 void printBasicType(BasicType
);
137 void printLifetime(uint64_t Index
);
141 bool consumeIf(char Prefix
);
143 bool addAssign(uint64_t &A
, uint64_t B
);
144 bool mulAssign(uint64_t &A
, uint64_t B
);
149 char *llvm::rustDemangle(const char *MangledName
, char *Buf
, size_t *N
,
151 if (MangledName
== nullptr || (Buf
!= nullptr && N
== nullptr)) {
152 if (Status
!= nullptr)
153 *Status
= demangle_invalid_args
;
157 // Return early if mangled name doesn't look like a Rust symbol.
158 StringView
Mangled(MangledName
);
159 if (!Mangled
.startsWith("_R")) {
160 if (Status
!= nullptr)
161 *Status
= demangle_invalid_mangled_name
;
166 if (!initializeOutputStream(nullptr, nullptr, D
.Output
, 1024)) {
167 if (Status
!= nullptr)
168 *Status
= demangle_memory_alloc_failure
;
172 if (!D
.demangle(Mangled
)) {
173 if (Status
!= nullptr)
174 *Status
= demangle_invalid_mangled_name
;
175 std::free(D
.Output
.getBuffer());
180 char *Demangled
= D
.Output
.getBuffer();
181 size_t DemangledLen
= D
.Output
.getCurrentPosition();
183 if (Buf
!= nullptr) {
184 if (DemangledLen
<= *N
) {
185 std::memcpy(Buf
, Demangled
, DemangledLen
);
186 std::free(Demangled
);
196 if (Status
!= nullptr)
197 *Status
= demangle_success
;
202 Demangler::Demangler(size_t MaxRecursionLevel
)
203 : MaxRecursionLevel(MaxRecursionLevel
) {}
205 static inline bool isDigit(const char C
) { return '0' <= C
&& C
<= '9'; }
207 static inline bool isHexDigit(const char C
) {
208 return ('0' <= C
&& C
<= '9') || ('a' <= C
&& C
<= 'f');
211 static inline bool isLower(const char C
) { return 'a' <= C
&& C
<= 'z'; }
213 static inline bool isUpper(const char C
) { return 'A' <= C
&& C
<= 'Z'; }
215 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
216 static inline bool isValid(const char C
) {
217 return isDigit(C
) || isLower(C
) || isUpper(C
) || C
== '_';
220 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
221 // otherwise. The demangled symbol is stored in Output field. It is
222 // responsibility of the caller to free the memory behind the output stream.
224 // <symbol-name> = "_R" <path> [<instantiating-crate>]
225 bool Demangler::demangle(StringView Mangled
) {
232 if (!Mangled
.consumeFront("_R")) {
236 size_t Dot
= Mangled
.find('.');
237 Input
= Mangled
.substr(0, Dot
);
238 StringView Suffix
= Mangled
.dropFront(Dot
);
240 demanglePath(IsInType::No
);
242 if (Position
!= Input
.size()) {
243 SwapAndRestore
<bool> SavePrint(Print
, false);
244 demanglePath(IsInType::No
);
247 if (Position
!= Input
.size())
250 if (!Suffix
.empty()) {
259 // Demangles a path. InType indicates whether a path is inside a type. When
260 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
261 // output. Return value indicates whether generics arguments have been left
264 // <path> = "C" <identifier> // crate root
265 // | "M" <impl-path> <type> // <T> (inherent impl)
266 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
267 // | "Y" <type> <path> // <T as Trait> (trait definition)
268 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
269 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
271 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
272 // <ns> = "C" // closure
274 // | <A-Z> // other special namespaces
275 // | <a-z> // internal namespaces
276 bool Demangler::demanglePath(IsInType InType
, LeaveGenericsOpen LeaveOpen
) {
277 if (Error
|| RecursionLevel
>= MaxRecursionLevel
) {
281 SwapAndRestore
<size_t> SaveRecursionLevel(RecursionLevel
, RecursionLevel
+ 1);
285 parseOptionalBase62Number('s');
286 Identifier Ident
= parseIdentifier();
291 demangleImplPath(InType
);
298 demangleImplPath(InType
);
302 demanglePath(IsInType::Yes
);
310 demanglePath(IsInType::Yes
);
316 if (!isLower(NS
) && !isUpper(NS
)) {
320 demanglePath(InType
);
322 uint64_t Disambiguator
= parseOptionalBase62Number('s');
323 Identifier Ident
= parseIdentifier();
326 // Special namespaces
334 if (!Ident
.empty()) {
339 printDecimalNumber(Disambiguator
);
342 // Implementation internal namespaces.
343 if (!Ident
.empty()) {
351 demanglePath(InType
);
352 // Omit "::" when in a type, where it is optional.
353 if (InType
== IsInType::No
)
356 for (size_t I
= 0; !Error
&& !consumeIf('E'); ++I
) {
359 demangleGenericArg();
361 if (LeaveOpen
== LeaveGenericsOpen::Yes
)
369 demangleBackref([&] { IsOpen
= demanglePath(InType
, LeaveOpen
); });
380 // <impl-path> = [<disambiguator>] <path>
381 // <disambiguator> = "s" <base-62-number>
382 void Demangler::demangleImplPath(IsInType InType
) {
383 SwapAndRestore
<bool> SavePrint(Print
, false);
384 parseOptionalBase62Number('s');
385 demanglePath(InType
);
388 // <generic-arg> = <lifetime>
391 // <lifetime> = "L" <base-62-number>
392 void Demangler::demangleGenericArg() {
394 printLifetime(parseBase62Number());
395 else if (consumeIf('K'))
401 // <basic-type> = "a" // i8
421 // | "p" // placeholder (e.g. for generic params), shown as _
422 static bool parseBasicType(char C
, BasicType
&Type
) {
425 Type
= BasicType::I8
;
428 Type
= BasicType::Bool
;
431 Type
= BasicType::Char
;
434 Type
= BasicType::F64
;
437 Type
= BasicType::Str
;
440 Type
= BasicType::F32
;
443 Type
= BasicType::U8
;
446 Type
= BasicType::ISize
;
449 Type
= BasicType::USize
;
452 Type
= BasicType::I32
;
455 Type
= BasicType::U32
;
458 Type
= BasicType::I128
;
461 Type
= BasicType::U128
;
464 Type
= BasicType::Placeholder
;
467 Type
= BasicType::I16
;
470 Type
= BasicType::U16
;
473 Type
= BasicType::Unit
;
476 Type
= BasicType::Variadic
;
479 Type
= BasicType::I64
;
482 Type
= BasicType::U64
;
485 Type
= BasicType::Never
;
492 void Demangler::printBasicType(BasicType Type
) {
494 case BasicType::Bool
:
497 case BasicType::Char
:
512 case BasicType::I128
:
515 case BasicType::ISize
:
530 case BasicType::U128
:
533 case BasicType::USize
:
545 case BasicType::Placeholder
:
548 case BasicType::Unit
:
551 case BasicType::Variadic
:
554 case BasicType::Never
:
560 // <type> = | <basic-type>
561 // | <path> // named type
562 // | "A" <type> <const> // [T; N]
563 // | "S" <type> // [T]
564 // | "T" {<type>} "E" // (T1, T2, T3, ...)
565 // | "R" [<lifetime>] <type> // &T
566 // | "Q" [<lifetime>] <type> // &mut T
567 // | "P" <type> // *const T
568 // | "O" <type> // *mut T
569 // | "F" <fn-sig> // fn(...) -> ...
570 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
571 // | <backref> // backref
572 void Demangler::demangleType() {
573 if (Error
|| RecursionLevel
>= MaxRecursionLevel
) {
577 SwapAndRestore
<size_t> SaveRecursionLevel(RecursionLevel
, RecursionLevel
+ 1);
579 size_t Start
= Position
;
582 if (parseBasicType(C
, Type
))
583 return printBasicType(Type
);
601 for (; !Error
&& !consumeIf('E'); ++I
) {
614 if (consumeIf('L')) {
615 if (auto Lifetime
= parseBase62Number()) {
616 printLifetime(Lifetime
);
637 if (consumeIf('L')) {
638 if (auto Lifetime
= parseBase62Number()) {
640 printLifetime(Lifetime
);
647 demangleBackref([&] { demangleType(); });
651 demanglePath(IsInType::Yes
);
656 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
658 // | <undisambiguated-identifier>
659 void Demangler::demangleFnSig() {
660 SwapAndRestore
<size_t> SaveBoundLifetimes(BoundLifetimes
, BoundLifetimes
);
661 demangleOptionalBinder();
666 if (consumeIf('K')) {
668 if (consumeIf('C')) {
671 Identifier Ident
= parseIdentifier();
672 for (char C
: Ident
.Name
) {
673 // When mangling ABI string, the "-" is replaced with "_".
683 for (size_t I
= 0; !Error
&& !consumeIf('E'); ++I
) {
690 if (consumeIf('u')) {
691 // Skip the unit type from the output.
698 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
699 void Demangler::demangleDynBounds() {
700 SwapAndRestore
<size_t> SaveBoundLifetimes(BoundLifetimes
, BoundLifetimes
);
702 demangleOptionalBinder();
703 for (size_t I
= 0; !Error
&& !consumeIf('E'); ++I
) {
710 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
711 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
712 void Demangler::demangleDynTrait() {
713 bool IsOpen
= demanglePath(IsInType::Yes
, LeaveGenericsOpen::Yes
);
714 while (!Error
&& consumeIf('p')) {
721 print(parseIdentifier().Name
);
729 // Demangles optional binder and updates the number of bound lifetimes.
731 // <binder> = "G" <base-62-number>
732 void Demangler::demangleOptionalBinder() {
733 uint64_t Binder
= parseOptionalBase62Number('G');
734 if (Error
|| Binder
== 0)
737 // In valid inputs each bound lifetime is referenced later. Referencing a
738 // lifetime requires at least one byte of input. Reject inputs that are too
739 // short to reference all bound lifetimes. Otherwise demangling of invalid
740 // binders could generate excessive amounts of output.
741 if (Binder
>= Input
.size() - BoundLifetimes
) {
747 for (size_t I
= 0; I
!= Binder
; ++I
) {
756 // <const> = <basic-type> <const-data>
757 // | "p" // placeholder
759 void Demangler::demangleConst() {
760 if (Error
|| RecursionLevel
>= MaxRecursionLevel
) {
764 SwapAndRestore
<size_t> SaveRecursionLevel(RecursionLevel
, RecursionLevel
+ 1);
768 if (parseBasicType(C
, Type
)) {
774 case BasicType::I128
:
775 case BasicType::ISize
:
780 case BasicType::U128
:
781 case BasicType::USize
:
784 case BasicType::Bool
:
787 case BasicType::Char
:
790 case BasicType::Placeholder
:
797 } else if (C
== 'B') {
798 demangleBackref([&] { demangleConst(); });
804 // <const-data> = ["n"] <hex-number>
805 void Demangler::demangleConstInt() {
809 StringView HexDigits
;
810 uint64_t Value
= parseHexNumber(HexDigits
);
811 if (HexDigits
.size() <= 16) {
812 printDecimalNumber(Value
);
819 // <const-data> = "0_" // false
821 void Demangler::demangleConstBool() {
822 StringView HexDigits
;
823 parseHexNumber(HexDigits
);
824 if (HexDigits
== "0")
826 else if (HexDigits
== "1")
832 /// Returns true if CodePoint represents a printable ASCII character.
833 static bool isAsciiPrintable(uint64_t CodePoint
) {
834 return 0x20 <= CodePoint
&& CodePoint
<= 0x7e;
837 // <const-data> = <hex-number>
838 void Demangler::demangleConstChar() {
839 StringView HexDigits
;
840 uint64_t CodePoint
= parseHexNumber(HexDigits
);
841 if (Error
|| HexDigits
.size() > 6) {
867 if (isAsciiPrintable(CodePoint)) {
880 // <undisambiguated-identifier> = ["u
"] <decimal-number> ["_
"] <bytes>
881 Identifier Demangler::parseIdentifier() {
882 bool Punycode = consumeIf('u');
883 uint64_t Bytes = parseDecimalNumber();
885 // Underscore resolves the ambiguity when identifier starts with a decimal
886 // digit or another underscore.
889 if (Error || Bytes > Input.size() - Position) {
893 StringView S = Input.substr(Position, Bytes);
896 if (!std::all_of(S.begin(), S.end(), isValid)) {
901 return {S, Punycode};
904 // Parses optional base 62 number. The presence of a number is determined using
905 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
907 // This function is indended for parsing disambiguators and binders which when
908 // not present have their value interpreted as 0, and otherwise as decoded
909 // value + 1. For example for binders, value for "G_
" is 1, for "G0_
" value is
910 // 2. When "G
" is absent value is 0.
911 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
915 uint64_t N = parseBase62Number();
916 if (Error || !addAssign(N, 1))
922 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
923 // "_
". All values are offset by 1, so that "_
" encodes 0, "0_
" encodes 1,
924 // "1_
" encodes 2, etc.
926 // <base-62-number> = {<0-9a-zA-Z>} "_
"
927 uint64_t Demangler::parseBase62Number() {
939 } else if (isDigit(C)) {
941 } else if (isLower(C)) {
942 Digit = 10 + (C - 'a');
943 } else if (isUpper(C)) {
944 Digit = 10 + 26 + (C - 'A');
950 if (!mulAssign(Value, 62))
953 if (!addAssign(Value, Digit))
957 if (!addAssign(Value, 1))
963 // Parses a decimal number that had been encoded without any leading zeros.
965 // <decimal-number> = "0"
967 uint64_t Demangler::parseDecimalNumber() {
981 while (isDigit(look())) {
982 if (!mulAssign(Value, 10)) {
987 uint64_t D = consume() - '0';
988 if (!addAssign(Value, D))
995 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
996 // value and stores hex digits in HexDigits. The return value is unspecified if
997 // HexDigits.size() > 16.
999 // <hex-number> = "0_
"
1000 // | <1-9a-f> {<0-9a-f>} "_
"
1001 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
1002 size_t Start = Position;
1005 if (!isHexDigit(look()))
1008 if (consumeIf('0')) {
1009 if (!consumeIf('_'))
1012 while (!Error && !consumeIf('_')) {
1017 else if ('a' <= C && C <= 'f')
1018 Value += 10 + (C - 'a');
1025 HexDigits = StringView();
1029 size_t End = Position - 1;
1030 assert(Start < End);
1031 HexDigits = Input.substr(Start, End - Start);
1035 void Demangler::print(char C) {
1036 if (Error || !Print)
1042 void Demangler::print(StringView S) {
1043 if (Error || !Print)
1049 void Demangler::printDecimalNumber(uint64_t N) {
1050 if (Error || !Print)
1056 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1057 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1058 // bound by one of the enclosing binders.
1059 void Demangler::printLifetime(uint64_t Index) {
1065 if (Index - 1 >= BoundLifetimes) {
1070 uint64_t Depth = BoundLifetimes - Index;
1073 char C = 'a
' + Depth;
1077 printDecimalNumber(Depth - 26 + 1);
1081 char Demangler::look() const {
1082 if (Error || Position >= Input.size())
1085 return Input[Position];
1088 char Demangler::consume() {
1089 if (Error || Position >= Input.size()) {
1094 return Input[Position++];
1097 bool Demangler::consumeIf(char Prefix) {
1098 if (Error || Position >= Input.size() || Input[Position] != Prefix)
1105 /// Computes A + B. When computation wraps around sets the error and returns
1106 /// false. Otherwise assigns the result to A and returns true.
1107 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1108 if (A > std::numeric_limits<uint64_t>::max() - B) {
1117 /// Computes A * B. When computation wraps around sets the error and returns
1118 /// false. Otherwise assigns the result to A and returns true.
1119 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1120 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {