1 //===--- DLangDemangle.cpp ------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines a demangler for the D programming language as specified
11 /// in the ABI specification, available at:
12 /// https://dlang.org/spec/abi.html#name_mangling
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Demangle/Demangle.h"
17 #include "llvm/Demangle/StringViewExtras.h"
18 #include "llvm/Demangle/Utility.h"
23 #include <string_view>
26 using llvm::itanium_demangle::OutputBuffer
;
27 using llvm::itanium_demangle::starts_with
;
31 /// Demangle information structure.
33 /// Initialize the information structure we use to pass around information.
35 /// \param Mangled String to demangle.
36 Demangler(std::string_view Mangled
);
38 /// Extract and demangle the mangled symbol and append it to the output
41 /// \param Demangled Output buffer to write the demangled name.
43 /// \return The remaining string on success or nullptr on failure.
45 /// \see https://dlang.org/spec/abi.html#name_mangling .
46 /// \see https://dlang.org/spec/abi.html#MangledName .
47 const char *parseMangle(OutputBuffer
*Demangled
);
50 /// Extract and demangle a given mangled symbol and append it to the output
53 /// \param Demangled output buffer to write the demangled name.
54 /// \param Mangled mangled symbol to be demangled.
56 /// \see https://dlang.org/spec/abi.html#name_mangling .
57 /// \see https://dlang.org/spec/abi.html#MangledName .
58 void parseMangle(OutputBuffer
*Demangled
, std::string_view
&Mangled
);
60 /// Extract the number from a given string.
62 /// \param Mangled string to extract the number.
63 /// \param Ret assigned result value.
65 /// \note Ret larger than UINT_MAX is considered a failure.
67 /// \see https://dlang.org/spec/abi.html#Number .
68 void decodeNumber(std::string_view
&Mangled
, unsigned long &Ret
);
70 /// Extract the back reference position from a given string.
72 /// \param Mangled string to extract the back reference position.
73 /// \param Ret assigned result value.
75 /// \return true on success, false on error.
77 /// \note Ret is always >= 0 on success, and unspecified on failure
79 /// \see https://dlang.org/spec/abi.html#back_ref .
80 /// \see https://dlang.org/spec/abi.html#NumberBackRef .
81 bool decodeBackrefPos(std::string_view
&Mangled
, long &Ret
);
83 /// Extract the symbol pointed by the back reference form a given string.
85 /// \param Mangled string to extract the back reference position.
86 /// \param Ret assigned result value.
88 /// \return true on success, false on error.
90 /// \see https://dlang.org/spec/abi.html#back_ref .
91 bool decodeBackref(std::string_view
&Mangled
, std::string_view
&Ret
);
93 /// Extract and demangle backreferenced symbol from a given mangled symbol
94 /// and append it to the output string.
96 /// \param Demangled output buffer to write the demangled name.
97 /// \param Mangled mangled symbol to be demangled.
99 /// \see https://dlang.org/spec/abi.html#back_ref .
100 /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
101 void parseSymbolBackref(OutputBuffer
*Demangled
, std::string_view
&Mangled
);
103 /// Extract and demangle backreferenced type from a given mangled symbol
104 /// and append it to the output string.
106 /// \param Mangled mangled symbol to be demangled.
108 /// \see https://dlang.org/spec/abi.html#back_ref .
109 /// \see https://dlang.org/spec/abi.html#TypeBackRef .
110 void parseTypeBackref(std::string_view
&Mangled
);
112 /// Check whether it is the beginning of a symbol name.
114 /// \param Mangled string to extract the symbol name.
116 /// \return true on success, false otherwise.
118 /// \see https://dlang.org/spec/abi.html#SymbolName .
119 bool isSymbolName(std::string_view Mangled
);
121 /// Extract and demangle an identifier from a given mangled symbol append it
122 /// to the output string.
124 /// \param Demangled Output buffer to write the demangled name.
125 /// \param Mangled Mangled symbol to be demangled.
127 /// \see https://dlang.org/spec/abi.html#SymbolName .
128 void parseIdentifier(OutputBuffer
*Demangled
, std::string_view
&Mangled
);
130 /// Extract and demangle the plain identifier from a given mangled symbol and
131 /// prepend/append it to the output string, with a special treatment for some
132 /// magic compiler generated symbols.
134 /// \param Demangled Output buffer to write the demangled name.
135 /// \param Mangled Mangled symbol to be demangled.
136 /// \param Len Length of the mangled symbol name.
138 /// \see https://dlang.org/spec/abi.html#LName .
139 void parseLName(OutputBuffer
*Demangled
, std::string_view
&Mangled
,
142 /// Extract and demangle the qualified symbol from a given mangled symbol
143 /// append it to the output string.
145 /// \param Demangled Output buffer to write the demangled name.
146 /// \param Mangled Mangled symbol to be demangled.
148 /// \see https://dlang.org/spec/abi.html#QualifiedName .
149 void parseQualified(OutputBuffer
*Demangled
, std::string_view
&Mangled
);
151 /// Extract and demangle a type from a given mangled symbol append it to
152 /// the output string.
154 /// \param Mangled mangled symbol to be demangled.
156 /// \return true on success, false on error.
158 /// \see https://dlang.org/spec/abi.html#Type .
159 bool parseType(std::string_view
&Mangled
);
161 /// An immutable view of the string we are demangling.
162 const std::string_view Str
;
163 /// The index of the last back reference.
169 void Demangler::decodeNumber(std::string_view
&Mangled
, unsigned long &Ret
) {
170 // Clear Mangled if trying to extract something that isn't a digit.
171 if (Mangled
.empty()) {
176 if (!std::isdigit(Mangled
.front())) {
181 unsigned long Val
= 0;
184 unsigned long Digit
= Mangled
[0] - '0';
186 // Check for overflow.
187 if (Val
> (std::numeric_limits
<unsigned int>::max() - Digit
) / 10) {
192 Val
= Val
* 10 + Digit
;
193 Mangled
.remove_prefix(1);
194 } while (!Mangled
.empty() && std::isdigit(Mangled
.front()));
196 if (Mangled
.empty()) {
204 bool Demangler::decodeBackrefPos(std::string_view
&Mangled
, long &Ret
) {
205 // Return nullptr if trying to extract something that isn't a digit
206 if (Mangled
.empty()) {
210 // Any identifier or non-basic type that has been emitted to the mangled
211 // symbol before will not be emitted again, but is referenced by a special
212 // sequence encoding the relative position of the original occurrence in the
213 // mangled symbol name.
214 // Numbers in back references are encoded with base 26 by upper case letters
215 // A-Z for higher digits but lower case letters a-z for the last digit.
218 // [A-Z] NumberBackRef
220 unsigned long Val
= 0;
222 while (!Mangled
.empty() && std::isalpha(Mangled
.front())) {
223 // Check for overflow
224 if (Val
> (std::numeric_limits
<unsigned long>::max() - 25) / 26)
229 if (Mangled
[0] >= 'a' && Mangled
[0] <= 'z') {
230 Val
+= Mangled
[0] - 'a';
234 Mangled
.remove_prefix(1);
238 Val
+= Mangled
[0] - 'A';
239 Mangled
.remove_prefix(1);
246 bool Demangler::decodeBackref(std::string_view
&Mangled
,
247 std::string_view
&Ret
) {
248 assert(!Mangled
.empty() && Mangled
.front() == 'Q' &&
249 "Invalid back reference!");
253 const char *Qpos
= Mangled
.data();
255 Mangled
.remove_prefix(1);
257 if (!decodeBackrefPos(Mangled
, RefPos
)) {
262 if (RefPos
> Qpos
- Str
.data()) {
267 // Set the position of the back reference.
273 void Demangler::parseSymbolBackref(OutputBuffer
*Demangled
,
274 std::string_view
&Mangled
) {
275 // An identifier back reference always points to a digit 0 to 9.
276 // IdentifierBackRef:
281 // Get position of the back reference
282 std::string_view Backref
;
283 if (!decodeBackref(Mangled
, Backref
)) {
288 // Must point to a simple identifier
289 decodeNumber(Backref
, Len
);
290 if (Backref
.empty() || Backref
.length() < Len
) {
295 parseLName(Demangled
, Backref
, Len
);
300 void Demangler::parseTypeBackref(std::string_view
&Mangled
) {
301 // A type back reference always points to a letter.
306 // If we appear to be moving backwards through the mangle string, then
307 // bail as this may be a recursive back reference.
308 if (Mangled
.data() - Str
.data() >= LastBackref
) {
313 int SaveRefPos
= LastBackref
;
314 LastBackref
= Mangled
.data() - Str
.data();
316 // Get position of the back reference.
317 std::string_view Backref
;
318 if (!decodeBackref(Mangled
, Backref
)) {
323 // Can't decode back reference.
324 if (Backref
.empty()) {
329 // TODO: Add support for function type back references.
330 if (!parseType(Backref
))
333 LastBackref
= SaveRefPos
;
339 bool Demangler::isSymbolName(std::string_view Mangled
) {
341 const char *Qref
= Mangled
.data();
343 if (std::isdigit(Mangled
.front()))
346 // TODO: Handle template instances.
348 if (Mangled
.front() != 'Q')
351 Mangled
.remove_prefix(1);
352 bool Valid
= decodeBackrefPos(Mangled
, Ret
);
353 if (!Valid
|| Ret
> Qref
- Str
.data())
356 return std::isdigit(Qref
[-Ret
]);
359 void Demangler::parseMangle(OutputBuffer
*Demangled
,
360 std::string_view
&Mangled
) {
361 // A D mangled symbol is comprised of both scope and type information.
363 // _D QualifiedName Type
364 // _D QualifiedName Z
366 // The caller should have guaranteed that the start pointer is at the
368 // Note that type is never a function type, but only the return type of
369 // a function or the type of a variable.
370 Mangled
.remove_prefix(2);
372 parseQualified(Demangled
, Mangled
);
374 if (Mangled
.empty()) {
379 // Artificial symbols end with 'Z' and have no type.
380 if (Mangled
.front() == 'Z') {
381 Mangled
.remove_prefix(1);
382 } else if (!parseType(Mangled
))
386 void Demangler::parseQualified(OutputBuffer
*Demangled
,
387 std::string_view
&Mangled
) {
388 // Qualified names are identifiers separated by their encoded length.
389 // Nested functions also encode their argument types without specifying
392 // SymbolFunctionName
393 // SymbolFunctionName QualifiedName
395 // SymbolFunctionName:
397 // SymbolName TypeFunctionNoReturn
398 // SymbolName M TypeFunctionNoReturn
399 // SymbolName M TypeModifiers TypeFunctionNoReturn
400 // The start pointer should be at the above location.
402 // Whether it has more than one symbol
403 size_t NotFirst
= false;
405 // Skip over anonymous symbols.
406 if (!Mangled
.empty() && Mangled
.front() == '0') {
408 Mangled
.remove_prefix(1);
409 while (!Mangled
.empty() && Mangled
.front() == '0');
418 parseIdentifier(Demangled
, Mangled
);
419 } while (!Mangled
.empty() && isSymbolName(Mangled
));
422 void Demangler::parseIdentifier(OutputBuffer
*Demangled
,
423 std::string_view
&Mangled
) {
424 if (Mangled
.empty()) {
429 if (Mangled
.front() == 'Q')
430 return parseSymbolBackref(Demangled
, Mangled
);
432 // TODO: Parse lengthless template instances.
435 decodeNumber(Mangled
, Len
);
437 if (Mangled
.empty()) {
441 if (!Len
|| Mangled
.length() < Len
) {
446 // TODO: Parse template instances with a length prefix.
448 // There can be multiple different declarations in the same function that
449 // have the same mangled name. To make the mangled names unique, a fake
450 // parent in the form `__Sddd' is added to the symbol.
451 if (Len
>= 4 && starts_with(Mangled
, "__S")) {
452 const size_t SuffixLen
= Mangled
.length() - Len
;
453 std::string_view P
= Mangled
.substr(3);
454 while (P
.length() > SuffixLen
&& std::isdigit(P
.front()))
456 if (P
.length() == SuffixLen
) {
457 // Skip over the fake parent.
458 Mangled
.remove_prefix(Len
);
459 return parseIdentifier(Demangled
, Mangled
);
462 // Else demangle it as a plain identifier.
465 parseLName(Demangled
, Mangled
, Len
);
468 bool Demangler::parseType(std::string_view
&Mangled
) {
469 if (Mangled
.empty()) {
474 switch (Mangled
.front()) {
475 // TODO: Parse type qualifiers.
476 // TODO: Parse function types.
477 // TODO: Parse compound types.
478 // TODO: Parse delegate types.
479 // TODO: Parse tuple types.
483 Mangled
.remove_prefix(1);
484 // TODO: Add type name dumping
487 // TODO: Add support for the rest of the basic types.
489 // Back referenced type.
491 parseTypeBackref(Mangled
);
495 default: // unhandled.
501 void Demangler::parseLName(OutputBuffer
*Demangled
, std::string_view
&Mangled
,
505 if (starts_with(Mangled
, "__initZ")) {
506 // The static initializer for a given symbol.
507 Demangled
->prepend("initializer for ");
508 Demangled
->setCurrentPosition(Demangled
->getCurrentPosition() - 1);
509 Mangled
.remove_prefix(Len
);
512 if (starts_with(Mangled
, "__vtblZ")) {
513 // The vtable symbol for a given class.
514 Demangled
->prepend("vtable for ");
515 Demangled
->setCurrentPosition(Demangled
->getCurrentPosition() - 1);
516 Mangled
.remove_prefix(Len
);
522 if (starts_with(Mangled
, "__ClassZ")) {
523 // The classinfo symbol for a given class.
524 Demangled
->prepend("ClassInfo for ");
525 Demangled
->setCurrentPosition(Demangled
->getCurrentPosition() - 1);
526 Mangled
.remove_prefix(Len
);
532 if (starts_with(Mangled
, "__InterfaceZ")) {
533 // The interface symbol for a given class.
534 Demangled
->prepend("Interface for ");
535 Demangled
->setCurrentPosition(Demangled
->getCurrentPosition() - 1);
536 Mangled
.remove_prefix(Len
);
542 if (starts_with(Mangled
, "__ModuleInfoZ")) {
543 // The ModuleInfo symbol for a given module.
544 Demangled
->prepend("ModuleInfo for ");
545 Demangled
->setCurrentPosition(Demangled
->getCurrentPosition() - 1);
546 Mangled
.remove_prefix(Len
);
552 *Demangled
<< Mangled
.substr(0, Len
);
553 Mangled
.remove_prefix(Len
);
556 Demangler::Demangler(std::string_view Mangled
)
557 : Str(Mangled
), LastBackref(Mangled
.length()) {}
559 const char *Demangler::parseMangle(OutputBuffer
*Demangled
) {
560 std::string_view
M(this->Str
);
561 parseMangle(Demangled
, M
);
565 char *llvm::dlangDemangle(std::string_view MangledName
) {
566 if (MangledName
.empty() || !starts_with(MangledName
, "_D"))
569 OutputBuffer Demangled
;
570 if (MangledName
== "_Dmain") {
571 Demangled
<< "D main";
574 Demangler
D(MangledName
);
575 const char *M
= D
.parseMangle(&Demangled
);
577 // Check that the entire symbol was successfully demangled.
578 if (M
== nullptr || *M
!= '\0') {
579 std::free(Demangled
.getBuffer());
584 // OutputBuffer's internal buffer is not null terminated and therefore we need
585 // to add it to comply with C null terminated strings.
586 if (Demangled
.getCurrentPosition() > 0) {
588 Demangled
.setCurrentPosition(Demangled
.getCurrentPosition() - 1);
589 return Demangled
.getBuffer();
592 std::free(Demangled
.getBuffer());