1 // SPDX-License-Identifier: GPL-2.0
6 #include "demangle-rust.h"
9 * Mangled Rust symbols look like this:
11 * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
13 * The original symbol is:
15 * <std::sys::fd::FileDesc as core::ops::Drop>::drop
17 * The last component of the path is a 64-bit hash in lowercase hex, prefixed
18 * with "h". Rust does not have a global namespace between crates, an illusion
19 * which Rust maintains by using the hash to distinguish things that would
20 * otherwise have the same symbol.
22 * Any path component not starting with a XID_Start character is prefixed with
25 * The following escape sequences are used:
41 * A double ".." means "::" and a single "." means "-".
43 * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
46 static const char *hash_prefix
= "::h";
47 static const size_t hash_prefix_len
= 3;
48 static const size_t hash_len
= 16;
50 static bool is_prefixed_hash(const char *start
);
51 static bool looks_like_rust(const char *sym
, size_t len
);
52 static bool unescape(const char **in
, char **out
, const char *seq
, char value
);
56 * sym: symbol that has been through BFD-demangling
58 * This function looks for the following indicators:
60 * 1. The hash must consist of "h" followed by 16 lowercase hex digits.
62 * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
63 * hex digits. This is true of 99.9998% of hashes so once in your life you
64 * may see a false negative. The point is to notice path components that
65 * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
66 * this case a false positive (non-Rust symbol has an important path
67 * component removed because it looks like a Rust hash) is worse than a
68 * false negative (the rare Rust symbol is not demangled) so this sets the
69 * balance in favor of false negatives.
71 * 3. There must be no characters other than a-zA-Z0-9 and _.:$
73 * 4. There must be no unrecognized $-sign sequences.
75 * 5. There must be no sequence of three or more dots in a row ("...").
78 rust_is_mangled(const char *sym
)
80 size_t len
, len_without_hash
;
86 if (len
<= hash_prefix_len
+ hash_len
)
87 /* Not long enough to contain "::h" + hash + something else */
90 len_without_hash
= len
- (hash_prefix_len
+ hash_len
);
91 if (!is_prefixed_hash(sym
+ len_without_hash
))
94 return looks_like_rust(sym
, len_without_hash
);
98 * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
99 * digits must comprise between 5 and 15 (inclusive) distinct digits.
101 static bool is_prefixed_hash(const char *str
)
108 if (strncmp(str
, hash_prefix
, hash_prefix_len
))
110 str
+= hash_prefix_len
;
112 memset(seen
, false, sizeof(seen
));
113 for (end
= str
+ hash_len
; str
< end
; str
++)
114 if (*str
>= '0' && *str
<= '9')
115 seen
[*str
- '0'] = true;
116 else if (*str
>= 'a' && *str
<= 'f')
117 seen
[*str
- 'a' + 10] = true;
121 /* Count how many distinct digits seen */
123 for (i
= 0; i
< 16; i
++)
127 return count
>= 5 && count
<= 15;
130 static bool looks_like_rust(const char *str
, size_t len
)
132 const char *end
= str
+ len
;
137 if (!strncmp(str
, "$C$", 3))
139 else if (!strncmp(str
, "$SP$", 4)
140 || !strncmp(str
, "$BP$", 4)
141 || !strncmp(str
, "$RF$", 4)
142 || !strncmp(str
, "$LT$", 4)
143 || !strncmp(str
, "$GT$", 4)
144 || !strncmp(str
, "$LP$", 4)
145 || !strncmp(str
, "$RP$", 4))
147 else if (!strncmp(str
, "$u20$", 5)
148 || !strncmp(str
, "$u27$", 5)
149 || !strncmp(str
, "$u5b$", 5)
150 || !strncmp(str
, "$u5d$", 5)
151 || !strncmp(str
, "$u7e$", 5))
157 /* Do not allow three or more consecutive dots */
158 if (!strncmp(str
, "...", 3))
177 * sym: symbol for which rust_is_mangled(sym) returns true
179 * The input is demangled in-place because the mangled name is always longer
180 * than the demangled one.
183 rust_demangle_sym(char *sym
)
194 end
= sym
+ strlen(sym
) - (hash_prefix_len
+ hash_len
);
199 if (!(unescape(&in
, &out
, "$C$", ',')
200 || unescape(&in
, &out
, "$SP$", '@')
201 || unescape(&in
, &out
, "$BP$", '*')
202 || unescape(&in
, &out
, "$RF$", '&')
203 || unescape(&in
, &out
, "$LT$", '<')
204 || unescape(&in
, &out
, "$GT$", '>')
205 || unescape(&in
, &out
, "$LP$", '(')
206 || unescape(&in
, &out
, "$RP$", ')')
207 || unescape(&in
, &out
, "$u20$", ' ')
208 || unescape(&in
, &out
, "$u27$", '\'')
209 || unescape(&in
, &out
, "$u5b$", '[')
210 || unescape(&in
, &out
, "$u5d$", ']')
211 || unescape(&in
, &out
, "$u7e$", '~'))) {
212 pr_err("demangle-rust: unexpected escape sequence");
218 * If this is the start of a path component and the next
219 * character is an escape sequence, ignore the
220 * underscore. The mangler inserts an underscore to make
221 * sure the path component begins with a XID_Start
224 if ((in
== sym
|| in
[-1] == ':') && in
[1] == '$')
231 /* ".." becomes "::" */
236 /* "." becomes "-" */
248 pr_err("demangle-rust: unexpected character '%c' in symbol\n",
257 static bool unescape(const char **in
, char **out
, const char *seq
, char value
)
259 size_t len
= strlen(seq
);
261 if (strncmp(*in
, seq
, len
))