Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / lib / Demangle / MicrosoftDemangle.cpp
blobcd7ff40d63a492807bc59bade52ea4c7488570ec
1 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for MSVC-style mangled symbols.
11 // This file has no dependencies on the rest of LLVM so that it can be
12 // easily reused in other programs such as libcxxabi.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Demangle/MicrosoftDemangle.h"
18 #include "llvm/Demangle/Demangle.h"
19 #include "llvm/Demangle/DemangleConfig.h"
20 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
21 #include "llvm/Demangle/StringViewExtras.h"
22 #include "llvm/Demangle/Utility.h"
24 #include <array>
25 #include <cctype>
26 #include <cstdio>
27 #include <string_view>
28 #include <tuple>
30 using namespace llvm;
31 using namespace ms_demangle;
33 static bool startsWithDigit(std::string_view S) {
34 return !S.empty() && std::isdigit(S.front());
37 struct NodeList {
38 Node *N = nullptr;
39 NodeList *Next = nullptr;
42 static bool consumeFront(std::string_view &S, char C) {
43 if (!llvm::itanium_demangle::starts_with(S, C))
44 return false;
45 S.remove_prefix(1);
46 return true;
49 static bool consumeFront(std::string_view &S, std::string_view C) {
50 if (!llvm::itanium_demangle::starts_with(S, C))
51 return false;
52 S.remove_prefix(C.size());
53 return true;
56 static bool isMemberPointer(std::string_view MangledName, bool &Error) {
57 Error = false;
58 const char F = MangledName.front();
59 MangledName.remove_prefix(1);
60 switch (F) {
61 case '$':
62 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
63 // rvalue reference to a member.
64 return false;
65 case 'A':
66 // 'A' indicates a reference, and you cannot have a reference to a member
67 // function or member.
68 return false;
69 case 'P':
70 case 'Q':
71 case 'R':
72 case 'S':
73 // These 4 values indicate some kind of pointer, but we still don't know
74 // what.
75 break;
76 default:
77 // isMemberPointer() is called only if isPointerType() returns true,
78 // and it rejects other prefixes.
79 DEMANGLE_UNREACHABLE;
82 // If it starts with a number, then 6 indicates a non-member function
83 // pointer, and 8 indicates a member function pointer.
84 if (startsWithDigit(MangledName)) {
85 if (MangledName[0] != '6' && MangledName[0] != '8') {
86 Error = true;
87 return false;
89 return (MangledName[0] == '8');
92 // Remove ext qualifiers since those can appear on either type and are
93 // therefore not indicative.
94 consumeFront(MangledName, 'E'); // 64-bit
95 consumeFront(MangledName, 'I'); // restrict
96 consumeFront(MangledName, 'F'); // unaligned
98 if (MangledName.empty()) {
99 Error = true;
100 return false;
103 // The next value should be either ABCD (non-member) or QRST (member).
104 switch (MangledName.front()) {
105 case 'A':
106 case 'B':
107 case 'C':
108 case 'D':
109 return false;
110 case 'Q':
111 case 'R':
112 case 'S':
113 case 'T':
114 return true;
115 default:
116 Error = true;
117 return false;
121 static SpecialIntrinsicKind
122 consumeSpecialIntrinsicKind(std::string_view &MangledName) {
123 if (consumeFront(MangledName, "?_7"))
124 return SpecialIntrinsicKind::Vftable;
125 if (consumeFront(MangledName, "?_8"))
126 return SpecialIntrinsicKind::Vbtable;
127 if (consumeFront(MangledName, "?_9"))
128 return SpecialIntrinsicKind::VcallThunk;
129 if (consumeFront(MangledName, "?_A"))
130 return SpecialIntrinsicKind::Typeof;
131 if (consumeFront(MangledName, "?_B"))
132 return SpecialIntrinsicKind::LocalStaticGuard;
133 if (consumeFront(MangledName, "?_C"))
134 return SpecialIntrinsicKind::StringLiteralSymbol;
135 if (consumeFront(MangledName, "?_P"))
136 return SpecialIntrinsicKind::UdtReturning;
137 if (consumeFront(MangledName, "?_R0"))
138 return SpecialIntrinsicKind::RttiTypeDescriptor;
139 if (consumeFront(MangledName, "?_R1"))
140 return SpecialIntrinsicKind::RttiBaseClassDescriptor;
141 if (consumeFront(MangledName, "?_R2"))
142 return SpecialIntrinsicKind::RttiBaseClassArray;
143 if (consumeFront(MangledName, "?_R3"))
144 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor;
145 if (consumeFront(MangledName, "?_R4"))
146 return SpecialIntrinsicKind::RttiCompleteObjLocator;
147 if (consumeFront(MangledName, "?_S"))
148 return SpecialIntrinsicKind::LocalVftable;
149 if (consumeFront(MangledName, "?__E"))
150 return SpecialIntrinsicKind::DynamicInitializer;
151 if (consumeFront(MangledName, "?__F"))
152 return SpecialIntrinsicKind::DynamicAtexitDestructor;
153 if (consumeFront(MangledName, "?__J"))
154 return SpecialIntrinsicKind::LocalStaticThreadGuard;
155 return SpecialIntrinsicKind::None;
158 static bool startsWithLocalScopePattern(std::string_view S) {
159 if (!consumeFront(S, '?'))
160 return false;
162 size_t End = S.find('?');
163 if (End == std::string_view::npos)
164 return false;
165 std::string_view Candidate = S.substr(0, End);
166 if (Candidate.empty())
167 return false;
169 // \?[0-9]\?
170 // ?@? is the discriminator 0.
171 if (Candidate.size() == 1)
172 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
174 // If it's not 0-9, then it's an encoded number terminated with an @
175 if (Candidate.back() != '@')
176 return false;
177 Candidate.remove_suffix(1);
179 // An encoded number starts with B-P and all subsequent digits are in A-P.
180 // Note that the reason the first digit cannot be A is two fold. First, it
181 // would create an ambiguity with ?A which delimits the beginning of an
182 // anonymous namespace. Second, A represents 0, and you don't start a multi
183 // digit number with a leading 0. Presumably the anonymous namespace
184 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
185 if (Candidate[0] < 'B' || Candidate[0] > 'P')
186 return false;
187 Candidate.remove_prefix(1);
188 while (!Candidate.empty()) {
189 if (Candidate[0] < 'A' || Candidate[0] > 'P')
190 return false;
191 Candidate.remove_prefix(1);
194 return true;
197 static bool isTagType(std::string_view S) {
198 switch (S.front()) {
199 case 'T': // union
200 case 'U': // struct
201 case 'V': // class
202 case 'W': // enum
203 return true;
205 return false;
208 static bool isCustomType(std::string_view S) { return S[0] == '?'; }
210 static bool isPointerType(std::string_view S) {
211 if (llvm::itanium_demangle::starts_with(S, "$$Q")) // foo &&
212 return true;
214 switch (S.front()) {
215 case 'A': // foo &
216 case 'P': // foo *
217 case 'Q': // foo *const
218 case 'R': // foo *volatile
219 case 'S': // foo *const volatile
220 return true;
222 return false;
225 static bool isArrayType(std::string_view S) { return S[0] == 'Y'; }
227 static bool isFunctionType(std::string_view S) {
228 return llvm::itanium_demangle::starts_with(S, "$$A8@@") ||
229 llvm::itanium_demangle::starts_with(S, "$$A6");
232 static FunctionRefQualifier
233 demangleFunctionRefQualifier(std::string_view &MangledName) {
234 if (consumeFront(MangledName, 'G'))
235 return FunctionRefQualifier::Reference;
236 else if (consumeFront(MangledName, 'H'))
237 return FunctionRefQualifier::RValueReference;
238 return FunctionRefQualifier::None;
241 static std::pair<Qualifiers, PointerAffinity>
242 demanglePointerCVQualifiers(std::string_view &MangledName) {
243 if (consumeFront(MangledName, "$$Q"))
244 return std::make_pair(Q_None, PointerAffinity::RValueReference);
246 const char F = MangledName.front();
247 MangledName.remove_prefix(1);
248 switch (F) {
249 case 'A':
250 return std::make_pair(Q_None, PointerAffinity::Reference);
251 case 'P':
252 return std::make_pair(Q_None, PointerAffinity::Pointer);
253 case 'Q':
254 return std::make_pair(Q_Const, PointerAffinity::Pointer);
255 case 'R':
256 return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
257 case 'S':
258 return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
259 PointerAffinity::Pointer);
261 // This function is only called if isPointerType() returns true,
262 // and it only returns true for the six cases listed above.
263 DEMANGLE_UNREACHABLE;
266 std::string_view Demangler::copyString(std::string_view Borrowed) {
267 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size());
268 // This is not a micro-optimization, it avoids UB, should Borrowed be an null
269 // buffer.
270 if (Borrowed.size())
271 std::memcpy(Stable, Borrowed.data(), Borrowed.size());
273 return {Stable, Borrowed.size()};
276 SpecialTableSymbolNode *
277 Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName,
278 SpecialIntrinsicKind K) {
279 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
280 switch (K) {
281 case SpecialIntrinsicKind::Vftable:
282 NI->Name = "`vftable'";
283 break;
284 case SpecialIntrinsicKind::Vbtable:
285 NI->Name = "`vbtable'";
286 break;
287 case SpecialIntrinsicKind::LocalVftable:
288 NI->Name = "`local vftable'";
289 break;
290 case SpecialIntrinsicKind::RttiCompleteObjLocator:
291 NI->Name = "`RTTI Complete Object Locator'";
292 break;
293 default:
294 DEMANGLE_UNREACHABLE;
296 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
297 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
298 STSN->Name = QN;
299 bool IsMember = false;
300 if (MangledName.empty()) {
301 Error = true;
302 return nullptr;
304 char Front = MangledName.front();
305 MangledName.remove_prefix(1);
306 if (Front != '6' && Front != '7') {
307 Error = true;
308 return nullptr;
311 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
312 if (!consumeFront(MangledName, '@'))
313 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
314 return STSN;
317 LocalStaticGuardVariableNode *
318 Demangler::demangleLocalStaticGuard(std::string_view &MangledName,
319 bool IsThread) {
320 LocalStaticGuardIdentifierNode *LSGI =
321 Arena.alloc<LocalStaticGuardIdentifierNode>();
322 LSGI->IsThread = IsThread;
323 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
324 LocalStaticGuardVariableNode *LSGVN =
325 Arena.alloc<LocalStaticGuardVariableNode>();
326 LSGVN->Name = QN;
328 if (consumeFront(MangledName, "4IA"))
329 LSGVN->IsVisible = false;
330 else if (consumeFront(MangledName, "5"))
331 LSGVN->IsVisible = true;
332 else {
333 Error = true;
334 return nullptr;
337 if (!MangledName.empty())
338 LSGI->ScopeIndex = demangleUnsigned(MangledName);
339 return LSGVN;
342 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena,
343 std::string_view Name) {
344 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>();
345 Id->Name = Name;
346 return Id;
349 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
350 IdentifierNode *Identifier) {
351 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
352 QN->Components = Arena.alloc<NodeArrayNode>();
353 QN->Components->Count = 1;
354 QN->Components->Nodes = Arena.allocArray<Node *>(1);
355 QN->Components->Nodes[0] = Identifier;
356 return QN;
359 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
360 std::string_view Name) {
361 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name);
362 return synthesizeQualifiedName(Arena, Id);
365 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena,
366 TypeNode *Type,
367 std::string_view VariableName) {
368 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
369 VSN->Type = Type;
370 VSN->Name = synthesizeQualifiedName(Arena, VariableName);
371 return VSN;
374 VariableSymbolNode *
375 Demangler::demangleUntypedVariable(ArenaAllocator &Arena,
376 std::string_view &MangledName,
377 std::string_view VariableName) {
378 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
379 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
380 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
381 VSN->Name = QN;
382 if (consumeFront(MangledName, "8"))
383 return VSN;
385 Error = true;
386 return nullptr;
389 VariableSymbolNode *
390 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
391 std::string_view &MangledName) {
392 RttiBaseClassDescriptorNode *RBCDN =
393 Arena.alloc<RttiBaseClassDescriptorNode>();
394 RBCDN->NVOffset = demangleUnsigned(MangledName);
395 RBCDN->VBPtrOffset = demangleSigned(MangledName);
396 RBCDN->VBTableOffset = demangleUnsigned(MangledName);
397 RBCDN->Flags = demangleUnsigned(MangledName);
398 if (Error)
399 return nullptr;
401 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
402 VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
403 consumeFront(MangledName, '8');
404 return VSN;
407 FunctionSymbolNode *
408 Demangler::demangleInitFiniStub(std::string_view &MangledName,
409 bool IsDestructor) {
410 DynamicStructorIdentifierNode *DSIN =
411 Arena.alloc<DynamicStructorIdentifierNode>();
412 DSIN->IsDestructor = IsDestructor;
414 bool IsKnownStaticDataMember = false;
415 if (consumeFront(MangledName, '?'))
416 IsKnownStaticDataMember = true;
418 SymbolNode *Symbol = demangleDeclarator(MangledName);
419 if (Error)
420 return nullptr;
422 FunctionSymbolNode *FSN = nullptr;
424 if (Symbol->kind() == NodeKind::VariableSymbol) {
425 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
427 // Older versions of clang mangled this type of symbol incorrectly. They
428 // would omit the leading ? and they would only emit a single @ at the end.
429 // The correct mangling is a leading ? and 2 trailing @ signs. Handle
430 // both cases.
431 int AtCount = IsKnownStaticDataMember ? 2 : 1;
432 for (int I = 0; I < AtCount; ++I) {
433 if (consumeFront(MangledName, '@'))
434 continue;
435 Error = true;
436 return nullptr;
439 FSN = demangleFunctionEncoding(MangledName);
440 if (FSN)
441 FSN->Name = synthesizeQualifiedName(Arena, DSIN);
442 } else {
443 if (IsKnownStaticDataMember) {
444 // This was supposed to be a static data member, but we got a function.
445 Error = true;
446 return nullptr;
449 FSN = static_cast<FunctionSymbolNode *>(Symbol);
450 DSIN->Name = Symbol->Name;
451 FSN->Name = synthesizeQualifiedName(Arena, DSIN);
454 return FSN;
457 SymbolNode *Demangler::demangleSpecialIntrinsic(std::string_view &MangledName) {
458 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
460 switch (SIK) {
461 case SpecialIntrinsicKind::None:
462 return nullptr;
463 case SpecialIntrinsicKind::StringLiteralSymbol:
464 return demangleStringLiteral(MangledName);
465 case SpecialIntrinsicKind::Vftable:
466 case SpecialIntrinsicKind::Vbtable:
467 case SpecialIntrinsicKind::LocalVftable:
468 case SpecialIntrinsicKind::RttiCompleteObjLocator:
469 return demangleSpecialTableSymbolNode(MangledName, SIK);
470 case SpecialIntrinsicKind::VcallThunk:
471 return demangleVcallThunkNode(MangledName);
472 case SpecialIntrinsicKind::LocalStaticGuard:
473 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
474 case SpecialIntrinsicKind::LocalStaticThreadGuard:
475 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
476 case SpecialIntrinsicKind::RttiTypeDescriptor: {
477 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
478 if (Error)
479 break;
480 if (!consumeFront(MangledName, "@8"))
481 break;
482 if (!MangledName.empty())
483 break;
484 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
486 case SpecialIntrinsicKind::RttiBaseClassArray:
487 return demangleUntypedVariable(Arena, MangledName,
488 "`RTTI Base Class Array'");
489 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor:
490 return demangleUntypedVariable(Arena, MangledName,
491 "`RTTI Class Hierarchy Descriptor'");
492 case SpecialIntrinsicKind::RttiBaseClassDescriptor:
493 return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
494 case SpecialIntrinsicKind::DynamicInitializer:
495 return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
496 case SpecialIntrinsicKind::DynamicAtexitDestructor:
497 return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
498 case SpecialIntrinsicKind::Typeof:
499 case SpecialIntrinsicKind::UdtReturning:
500 // It's unclear which tools produces these manglings, so demangling
501 // support is not (yet?) implemented.
502 break;
503 case SpecialIntrinsicKind::Unknown:
504 DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
506 Error = true;
507 return nullptr;
510 IdentifierNode *
511 Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName) {
512 assert(llvm::itanium_demangle::starts_with(MangledName, '?'));
513 MangledName.remove_prefix(1);
514 if (MangledName.empty()) {
515 Error = true;
516 return nullptr;
519 if (consumeFront(MangledName, "__"))
520 return demangleFunctionIdentifierCode(
521 MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
522 if (consumeFront(MangledName, "_"))
523 return demangleFunctionIdentifierCode(MangledName,
524 FunctionIdentifierCodeGroup::Under);
525 return demangleFunctionIdentifierCode(MangledName,
526 FunctionIdentifierCodeGroup::Basic);
529 StructorIdentifierNode *
530 Demangler::demangleStructorIdentifier(std::string_view &MangledName,
531 bool IsDestructor) {
532 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
533 N->IsDestructor = IsDestructor;
534 return N;
537 ConversionOperatorIdentifierNode *
538 Demangler::demangleConversionOperatorIdentifier(std::string_view &MangledName) {
539 ConversionOperatorIdentifierNode *N =
540 Arena.alloc<ConversionOperatorIdentifierNode>();
541 return N;
544 LiteralOperatorIdentifierNode *
545 Demangler::demangleLiteralOperatorIdentifier(std::string_view &MangledName) {
546 LiteralOperatorIdentifierNode *N =
547 Arena.alloc<LiteralOperatorIdentifierNode>();
548 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
549 return N;
552 IntrinsicFunctionKind
553 Demangler::translateIntrinsicFunctionCode(char CH,
554 FunctionIdentifierCodeGroup Group) {
555 using IFK = IntrinsicFunctionKind;
556 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) {
557 Error = true;
558 return IFK::None;
561 // Not all ? identifiers are intrinsics *functions*. This function only maps
562 // operator codes for the special functions, all others are handled elsewhere,
563 // hence the IFK::None entries in the table.
564 static IFK Basic[36] = {
565 IFK::None, // ?0 # Foo::Foo()
566 IFK::None, // ?1 # Foo::~Foo()
567 IFK::New, // ?2 # operator new
568 IFK::Delete, // ?3 # operator delete
569 IFK::Assign, // ?4 # operator=
570 IFK::RightShift, // ?5 # operator>>
571 IFK::LeftShift, // ?6 # operator<<
572 IFK::LogicalNot, // ?7 # operator!
573 IFK::Equals, // ?8 # operator==
574 IFK::NotEquals, // ?9 # operator!=
575 IFK::ArraySubscript, // ?A # operator[]
576 IFK::None, // ?B # Foo::operator <type>()
577 IFK::Pointer, // ?C # operator->
578 IFK::Dereference, // ?D # operator*
579 IFK::Increment, // ?E # operator++
580 IFK::Decrement, // ?F # operator--
581 IFK::Minus, // ?G # operator-
582 IFK::Plus, // ?H # operator+
583 IFK::BitwiseAnd, // ?I # operator&
584 IFK::MemberPointer, // ?J # operator->*
585 IFK::Divide, // ?K # operator/
586 IFK::Modulus, // ?L # operator%
587 IFK::LessThan, // ?M operator<
588 IFK::LessThanEqual, // ?N operator<=
589 IFK::GreaterThan, // ?O operator>
590 IFK::GreaterThanEqual, // ?P operator>=
591 IFK::Comma, // ?Q operator,
592 IFK::Parens, // ?R operator()
593 IFK::BitwiseNot, // ?S operator~
594 IFK::BitwiseXor, // ?T operator^
595 IFK::BitwiseOr, // ?U operator|
596 IFK::LogicalAnd, // ?V operator&&
597 IFK::LogicalOr, // ?W operator||
598 IFK::TimesEqual, // ?X operator*=
599 IFK::PlusEqual, // ?Y operator+=
600 IFK::MinusEqual, // ?Z operator-=
602 static IFK Under[36] = {
603 IFK::DivEqual, // ?_0 operator/=
604 IFK::ModEqual, // ?_1 operator%=
605 IFK::RshEqual, // ?_2 operator>>=
606 IFK::LshEqual, // ?_3 operator<<=
607 IFK::BitwiseAndEqual, // ?_4 operator&=
608 IFK::BitwiseOrEqual, // ?_5 operator|=
609 IFK::BitwiseXorEqual, // ?_6 operator^=
610 IFK::None, // ?_7 # vftable
611 IFK::None, // ?_8 # vbtable
612 IFK::None, // ?_9 # vcall
613 IFK::None, // ?_A # typeof
614 IFK::None, // ?_B # local static guard
615 IFK::None, // ?_C # string literal
616 IFK::VbaseDtor, // ?_D # vbase destructor
617 IFK::VecDelDtor, // ?_E # vector deleting destructor
618 IFK::DefaultCtorClosure, // ?_F # default constructor closure
619 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor
620 IFK::VecCtorIter, // ?_H # vector constructor iterator
621 IFK::VecDtorIter, // ?_I # vector destructor iterator
622 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator
623 IFK::VdispMap, // ?_K # virtual displacement map
624 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator
625 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator
626 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
627 IFK::CopyCtorClosure, // ?_O # copy constructor closure
628 IFK::None, // ?_P<name> # udt returning <name>
629 IFK::None, // ?_Q # <unknown>
630 IFK::None, // ?_R0 - ?_R4 # RTTI Codes
631 IFK::None, // ?_S # local vftable
632 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
633 IFK::ArrayNew, // ?_U operator new[]
634 IFK::ArrayDelete, // ?_V operator delete[]
635 IFK::None, // ?_W <unused>
636 IFK::None, // ?_X <unused>
637 IFK::None, // ?_Y <unused>
638 IFK::None, // ?_Z <unused>
640 static IFK DoubleUnder[36] = {
641 IFK::None, // ?__0 <unused>
642 IFK::None, // ?__1 <unused>
643 IFK::None, // ?__2 <unused>
644 IFK::None, // ?__3 <unused>
645 IFK::None, // ?__4 <unused>
646 IFK::None, // ?__5 <unused>
647 IFK::None, // ?__6 <unused>
648 IFK::None, // ?__7 <unused>
649 IFK::None, // ?__8 <unused>
650 IFK::None, // ?__9 <unused>
651 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator
652 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator
653 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator
654 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter
655 IFK::None, // ?__E dynamic initializer for `T'
656 IFK::None, // ?__F dynamic atexit destructor for `T'
657 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter
658 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter
659 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
660 // iter
661 IFK::None, // ?__J local static thread guard
662 IFK::None, // ?__K operator ""_name
663 IFK::CoAwait, // ?__L operator co_await
664 IFK::Spaceship, // ?__M operator<=>
665 IFK::None, // ?__N <unused>
666 IFK::None, // ?__O <unused>
667 IFK::None, // ?__P <unused>
668 IFK::None, // ?__Q <unused>
669 IFK::None, // ?__R <unused>
670 IFK::None, // ?__S <unused>
671 IFK::None, // ?__T <unused>
672 IFK::None, // ?__U <unused>
673 IFK::None, // ?__V <unused>
674 IFK::None, // ?__W <unused>
675 IFK::None, // ?__X <unused>
676 IFK::None, // ?__Y <unused>
677 IFK::None, // ?__Z <unused>
680 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
681 switch (Group) {
682 case FunctionIdentifierCodeGroup::Basic:
683 return Basic[Index];
684 case FunctionIdentifierCodeGroup::Under:
685 return Under[Index];
686 case FunctionIdentifierCodeGroup::DoubleUnder:
687 return DoubleUnder[Index];
689 DEMANGLE_UNREACHABLE;
692 IdentifierNode *
693 Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName,
694 FunctionIdentifierCodeGroup Group) {
695 if (MangledName.empty()) {
696 Error = true;
697 return nullptr;
699 const char CH = MangledName.front();
700 switch (Group) {
701 case FunctionIdentifierCodeGroup::Basic:
702 MangledName.remove_prefix(1);
703 switch (CH) {
704 case '0':
705 case '1':
706 return demangleStructorIdentifier(MangledName, CH == '1');
707 case 'B':
708 return demangleConversionOperatorIdentifier(MangledName);
709 default:
710 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
711 translateIntrinsicFunctionCode(CH, Group));
713 case FunctionIdentifierCodeGroup::Under:
714 MangledName.remove_prefix(1);
715 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
716 translateIntrinsicFunctionCode(CH, Group));
717 case FunctionIdentifierCodeGroup::DoubleUnder:
718 MangledName.remove_prefix(1);
719 switch (CH) {
720 case 'K':
721 return demangleLiteralOperatorIdentifier(MangledName);
722 default:
723 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
724 translateIntrinsicFunctionCode(CH, Group));
728 DEMANGLE_UNREACHABLE;
731 SymbolNode *Demangler::demangleEncodedSymbol(std::string_view &MangledName,
732 QualifiedNameNode *Name) {
733 if (MangledName.empty()) {
734 Error = true;
735 return nullptr;
738 // Read a variable.
739 switch (MangledName.front()) {
740 case '0':
741 case '1':
742 case '2':
743 case '3':
744 case '4': {
745 StorageClass SC = demangleVariableStorageClass(MangledName);
746 return demangleVariableEncoding(MangledName, SC);
749 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
751 IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
752 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
753 ConversionOperatorIdentifierNode *COIN =
754 static_cast<ConversionOperatorIdentifierNode *>(UQN);
755 if (FSN)
756 COIN->TargetType = FSN->Signature->ReturnType;
758 return FSN;
761 SymbolNode *Demangler::demangleDeclarator(std::string_view &MangledName) {
762 // What follows is a main symbol name. This may include namespaces or class
763 // back references.
764 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
765 if (Error)
766 return nullptr;
768 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
769 if (Error)
770 return nullptr;
771 Symbol->Name = QN;
773 IdentifierNode *UQN = QN->getUnqualifiedIdentifier();
774 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
775 ConversionOperatorIdentifierNode *COIN =
776 static_cast<ConversionOperatorIdentifierNode *>(UQN);
777 if (!COIN->TargetType) {
778 Error = true;
779 return nullptr;
782 return Symbol;
785 SymbolNode *Demangler::demangleMD5Name(std::string_view &MangledName) {
786 assert(llvm::itanium_demangle::starts_with(MangledName, "??@"));
787 // This is an MD5 mangled name. We can't demangle it, just return the
788 // mangled name.
789 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
790 size_t MD5Last = MangledName.find('@', strlen("??@"));
791 if (MD5Last == std::string_view::npos) {
792 Error = true;
793 return nullptr;
795 const char *Start = MangledName.data();
796 const size_t StartSize = MangledName.size();
797 MangledName.remove_prefix(MD5Last + 1);
799 // There are two additional special cases for MD5 names:
800 // 1. For complete object locators where the object name is long enough
801 // for the object to have an MD5 name, the complete object locator is
802 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
803 // leading "??_R4". This is handled here.
804 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
805 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
806 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet
807 // demangle catchable types anywhere, this isn't handled for MD5 names
808 // either.
809 consumeFront(MangledName, "??_R4@");
811 assert(MangledName.size() < StartSize);
812 const size_t Count = StartSize - MangledName.size();
813 std::string_view MD5(Start, Count);
814 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
815 S->Name = synthesizeQualifiedName(Arena, MD5);
817 return S;
820 SymbolNode *Demangler::demangleTypeinfoName(std::string_view &MangledName) {
821 assert(llvm::itanium_demangle::starts_with(MangledName, '.'));
822 consumeFront(MangledName, '.');
824 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
825 if (Error || !MangledName.empty()) {
826 Error = true;
827 return nullptr;
829 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'");
832 // Parser entry point.
833 SymbolNode *Demangler::parse(std::string_view &MangledName) {
834 // Typeinfo names are strings stored in RTTI data. They're not symbol names.
835 // It's still useful to demangle them. They're the only demangled entity
836 // that doesn't start with a "?" but a ".".
837 if (llvm::itanium_demangle::starts_with(MangledName, '.'))
838 return demangleTypeinfoName(MangledName);
840 if (llvm::itanium_demangle::starts_with(MangledName, "??@"))
841 return demangleMD5Name(MangledName);
843 // MSVC-style mangled symbols must start with '?'.
844 if (!llvm::itanium_demangle::starts_with(MangledName, '?')) {
845 Error = true;
846 return nullptr;
849 consumeFront(MangledName, '?');
851 // ?$ is a template instantiation, but all other names that start with ? are
852 // operators / special names.
853 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
854 return SI;
856 return demangleDeclarator(MangledName);
859 TagTypeNode *Demangler::parseTagUniqueName(std::string_view &MangledName) {
860 if (!consumeFront(MangledName, ".?A")) {
861 Error = true;
862 return nullptr;
864 consumeFront(MangledName, ".?A");
865 if (MangledName.empty()) {
866 Error = true;
867 return nullptr;
870 return demangleClassType(MangledName);
873 // <type-encoding> ::= <storage-class> <variable-type>
874 // <storage-class> ::= 0 # private static member
875 // ::= 1 # protected static member
876 // ::= 2 # public static member
877 // ::= 3 # global
878 // ::= 4 # static local
880 VariableSymbolNode *
881 Demangler::demangleVariableEncoding(std::string_view &MangledName,
882 StorageClass SC) {
883 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
885 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
886 VSN->SC = SC;
888 if (Error)
889 return nullptr;
891 // <variable-type> ::= <type> <cvr-qualifiers>
892 // ::= <type> <pointee-cvr-qualifiers> # pointers, references
893 switch (VSN->Type->kind()) {
894 case NodeKind::PointerType: {
895 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
897 Qualifiers ExtraChildQuals = Q_None;
898 PTN->Quals = Qualifiers(VSN->Type->Quals |
899 demanglePointerExtQualifiers(MangledName));
901 bool IsMember = false;
902 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
904 if (PTN->ClassParent) {
905 QualifiedNameNode *BackRefName =
906 demangleFullyQualifiedTypeName(MangledName);
907 (void)BackRefName;
909 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
911 break;
913 default:
914 VSN->Type->Quals = demangleQualifiers(MangledName).first;
915 break;
918 return VSN;
921 // Sometimes numbers are encoded in mangled symbols. For example,
922 // "int (*x)[20]" is a valid C type (x is a pointer to an array of
923 // length 20), so we need some way to embed numbers as part of symbols.
924 // This function parses it.
926 // <number> ::= [?] <non-negative integer>
928 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
929 // ::= <hex digit>+ @ # when Number == 0 or >= 10
931 // <hex-digit> ::= [A-P] # A = 0, B = 1, ...
932 std::pair<uint64_t, bool>
933 Demangler::demangleNumber(std::string_view &MangledName) {
934 bool IsNegative = consumeFront(MangledName, '?');
936 if (startsWithDigit(MangledName)) {
937 uint64_t Ret = MangledName[0] - '0' + 1;
938 MangledName.remove_prefix(1);
939 return {Ret, IsNegative};
942 uint64_t Ret = 0;
943 for (size_t i = 0; i < MangledName.size(); ++i) {
944 char C = MangledName[i];
945 if (C == '@') {
946 MangledName.remove_prefix(i + 1);
947 return {Ret, IsNegative};
949 if ('A' <= C && C <= 'P') {
950 Ret = (Ret << 4) + (C - 'A');
951 continue;
953 break;
956 Error = true;
957 return {0ULL, false};
960 uint64_t Demangler::demangleUnsigned(std::string_view &MangledName) {
961 bool IsNegative = false;
962 uint64_t Number = 0;
963 std::tie(Number, IsNegative) = demangleNumber(MangledName);
964 if (IsNegative)
965 Error = true;
966 return Number;
969 int64_t Demangler::demangleSigned(std::string_view &MangledName) {
970 bool IsNegative = false;
971 uint64_t Number = 0;
972 std::tie(Number, IsNegative) = demangleNumber(MangledName);
973 if (Number > INT64_MAX)
974 Error = true;
975 int64_t I = static_cast<int64_t>(Number);
976 return IsNegative ? -I : I;
979 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
980 // Memorize it.
981 void Demangler::memorizeString(std::string_view S) {
982 if (Backrefs.NamesCount >= BackrefContext::Max)
983 return;
984 for (size_t i = 0; i < Backrefs.NamesCount; ++i)
985 if (S == Backrefs.Names[i]->Name)
986 return;
987 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
988 N->Name = S;
989 Backrefs.Names[Backrefs.NamesCount++] = N;
992 NamedIdentifierNode *
993 Demangler::demangleBackRefName(std::string_view &MangledName) {
994 assert(startsWithDigit(MangledName));
996 size_t I = MangledName[0] - '0';
997 if (I >= Backrefs.NamesCount) {
998 Error = true;
999 return nullptr;
1002 MangledName.remove_prefix(1);
1003 return Backrefs.Names[I];
1006 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
1007 // Render this class template name into a string buffer so that we can
1008 // memorize it for the purpose of back-referencing.
1009 OutputBuffer OB;
1010 Identifier->output(OB, OF_Default);
1011 std::string_view Owned = copyString(OB);
1012 memorizeString(Owned);
1013 std::free(OB.getBuffer());
1016 IdentifierNode *
1017 Demangler::demangleTemplateInstantiationName(std::string_view &MangledName,
1018 NameBackrefBehavior NBB) {
1019 assert(llvm::itanium_demangle::starts_with(MangledName, "?$"));
1020 consumeFront(MangledName, "?$");
1022 BackrefContext OuterContext;
1023 std::swap(OuterContext, Backrefs);
1025 IdentifierNode *Identifier =
1026 demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1027 if (!Error)
1028 Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
1030 std::swap(OuterContext, Backrefs);
1031 if (Error)
1032 return nullptr;
1034 if (NBB & NBB_Template) {
1035 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b").
1036 // Structors and conversion operators only makes sense in a leaf name, so
1037 // reject them in NBB_Template contexts.
1038 if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier ||
1039 Identifier->kind() == NodeKind::StructorIdentifier) {
1040 Error = true;
1041 return nullptr;
1044 memorizeIdentifier(Identifier);
1047 return Identifier;
1050 NamedIdentifierNode *
1051 Demangler::demangleSimpleName(std::string_view &MangledName, bool Memorize) {
1052 std::string_view S = demangleSimpleString(MangledName, Memorize);
1053 if (Error)
1054 return nullptr;
1056 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
1057 Name->Name = S;
1058 return Name;
1061 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
1063 static uint8_t rebasedHexDigitToNumber(char C) {
1064 assert(isRebasedHexDigit(C));
1065 return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
1068 uint8_t Demangler::demangleCharLiteral(std::string_view &MangledName) {
1069 assert(!MangledName.empty());
1070 if (!llvm::itanium_demangle::starts_with(MangledName, '?')) {
1071 const uint8_t F = MangledName.front();
1072 MangledName.remove_prefix(1);
1073 return F;
1076 MangledName.remove_prefix(1);
1077 if (MangledName.empty())
1078 goto CharLiteralError;
1080 if (consumeFront(MangledName, '$')) {
1081 // Two hex digits
1082 if (MangledName.size() < 2)
1083 goto CharLiteralError;
1084 std::string_view Nibbles = MangledName.substr(0, 2);
1085 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
1086 goto CharLiteralError;
1087 // Don't append the null terminator.
1088 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
1089 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
1090 MangledName.remove_prefix(2);
1091 return (C1 << 4) | C2;
1094 if (startsWithDigit(MangledName)) {
1095 const char *Lookup = ",/\\:. \n\t'-";
1096 char C = Lookup[MangledName[0] - '0'];
1097 MangledName.remove_prefix(1);
1098 return C;
1101 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
1102 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
1103 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
1104 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
1105 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
1106 char C = Lookup[MangledName[0] - 'a'];
1107 MangledName.remove_prefix(1);
1108 return C;
1111 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
1112 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
1113 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
1114 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
1115 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
1116 char C = Lookup[MangledName[0] - 'A'];
1117 MangledName.remove_prefix(1);
1118 return C;
1121 CharLiteralError:
1122 Error = true;
1123 return '\0';
1126 wchar_t Demangler::demangleWcharLiteral(std::string_view &MangledName) {
1127 uint8_t C1, C2;
1129 C1 = demangleCharLiteral(MangledName);
1130 if (Error || MangledName.empty())
1131 goto WCharLiteralError;
1132 C2 = demangleCharLiteral(MangledName);
1133 if (Error)
1134 goto WCharLiteralError;
1136 return ((wchar_t)C1 << 8) | (wchar_t)C2;
1138 WCharLiteralError:
1139 Error = true;
1140 return L'\0';
1143 static void writeHexDigit(char *Buffer, uint8_t Digit) {
1144 assert(Digit <= 15);
1145 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
1148 static void outputHex(OutputBuffer &OB, unsigned C) {
1149 assert (C != 0);
1151 // It's easier to do the math if we can work from right to left, but we need
1152 // to print the numbers from left to right. So render this into a temporary
1153 // buffer first, then output the temporary buffer. Each byte is of the form
1154 // \xAB, which means that each byte needs 4 characters. Since there are at
1155 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
1156 char TempBuffer[17];
1158 ::memset(TempBuffer, 0, sizeof(TempBuffer));
1159 constexpr int MaxPos = sizeof(TempBuffer) - 1;
1161 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
1162 while (C != 0) {
1163 for (int I = 0; I < 2; ++I) {
1164 writeHexDigit(&TempBuffer[Pos--], C % 16);
1165 C /= 16;
1168 TempBuffer[Pos--] = 'x';
1169 assert(Pos >= 0);
1170 TempBuffer[Pos--] = '\\';
1171 OB << std::string_view(&TempBuffer[Pos + 1]);
1174 static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
1175 switch (C) {
1176 case '\0': // nul
1177 OB << "\\0";
1178 return;
1179 case '\'': // single quote
1180 OB << "\\\'";
1181 return;
1182 case '\"': // double quote
1183 OB << "\\\"";
1184 return;
1185 case '\\': // backslash
1186 OB << "\\\\";
1187 return;
1188 case '\a': // bell
1189 OB << "\\a";
1190 return;
1191 case '\b': // backspace
1192 OB << "\\b";
1193 return;
1194 case '\f': // form feed
1195 OB << "\\f";
1196 return;
1197 case '\n': // new line
1198 OB << "\\n";
1199 return;
1200 case '\r': // carriage return
1201 OB << "\\r";
1202 return;
1203 case '\t': // tab
1204 OB << "\\t";
1205 return;
1206 case '\v': // vertical tab
1207 OB << "\\v";
1208 return;
1209 default:
1210 break;
1213 if (C > 0x1F && C < 0x7F) {
1214 // Standard ascii char.
1215 OB << (char)C;
1216 return;
1219 outputHex(OB, C);
1222 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
1223 const uint8_t *End = StringBytes + Length - 1;
1224 unsigned Count = 0;
1225 while (Length > 0 && *End == 0) {
1226 --Length;
1227 --End;
1228 ++Count;
1230 return Count;
1233 static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
1234 unsigned Length) {
1235 unsigned Result = 0;
1236 for (unsigned I = 0; I < Length; ++I) {
1237 if (*StringBytes++ == 0)
1238 ++Result;
1240 return Result;
1243 // A mangled (non-wide) string literal stores the total length of the string it
1244 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text
1245 // (passed in StringBytes, NumChars).
1246 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
1247 uint64_t NumBytes) {
1248 assert(NumBytes > 0);
1250 // If the number of bytes is odd, this is guaranteed to be a char string.
1251 if (NumBytes % 2 == 1)
1252 return 1;
1254 // All strings can encode at most 32 bytes of data. If it's less than that,
1255 // then we encoded the entire string. In this case we check for a 1-byte,
1256 // 2-byte, or 4-byte null terminator.
1257 if (NumBytes < 32) {
1258 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
1259 if (TrailingNulls >= 4 && NumBytes % 4 == 0)
1260 return 4;
1261 if (TrailingNulls >= 2)
1262 return 2;
1263 return 1;
1266 // The whole string was not able to be encoded. Try to look at embedded null
1267 // terminators to guess. The heuristic is that we count all embedded null
1268 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3
1269 // are null, it's a char16. Otherwise it's a char8. This obviously isn't
1270 // perfect and is biased towards languages that have ascii alphabets, but this
1271 // was always going to be best effort since the encoding is lossy.
1272 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
1273 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
1274 return 4;
1275 if (Nulls >= NumChars / 3)
1276 return 2;
1277 return 1;
1280 static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
1281 unsigned CharIndex, unsigned CharBytes) {
1282 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
1283 unsigned Offset = CharIndex * CharBytes;
1284 unsigned Result = 0;
1285 StringBytes = StringBytes + Offset;
1286 for (unsigned I = 0; I < CharBytes; ++I) {
1287 unsigned C = static_cast<unsigned>(StringBytes[I]);
1288 Result |= C << (8 * I);
1290 return Result;
1293 FunctionSymbolNode *
1294 Demangler::demangleVcallThunkNode(std::string_view &MangledName) {
1295 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
1296 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
1297 FSN->Signature = Arena.alloc<ThunkSignatureNode>();
1298 FSN->Signature->FunctionClass = FC_NoParameterList;
1300 FSN->Name = demangleNameScopeChain(MangledName, VTIN);
1301 if (!Error)
1302 Error = !consumeFront(MangledName, "$B");
1303 if (!Error)
1304 VTIN->OffsetInVTable = demangleUnsigned(MangledName);
1305 if (!Error)
1306 Error = !consumeFront(MangledName, 'A');
1307 if (!Error)
1308 FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
1309 return (Error) ? nullptr : FSN;
1312 EncodedStringLiteralNode *
1313 Demangler::demangleStringLiteral(std::string_view &MangledName) {
1314 // This function uses goto, so declare all variables up front.
1315 OutputBuffer OB;
1316 std::string_view CRC;
1317 uint64_t StringByteSize;
1318 bool IsWcharT = false;
1319 bool IsNegative = false;
1320 size_t CrcEndPos = 0;
1321 char F;
1323 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
1325 // Prefix indicating the beginning of a string literal
1326 if (!consumeFront(MangledName, "@_"))
1327 goto StringLiteralError;
1328 if (MangledName.empty())
1329 goto StringLiteralError;
1331 // Char Type (regular or wchar_t)
1332 F = MangledName.front();
1333 MangledName.remove_prefix(1);
1334 switch (F) {
1335 case '1':
1336 IsWcharT = true;
1337 DEMANGLE_FALLTHROUGH;
1338 case '0':
1339 break;
1340 default:
1341 goto StringLiteralError;
1344 // Encoded Length
1345 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
1346 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1))
1347 goto StringLiteralError;
1349 // CRC 32 (always 8 characters plus a terminator)
1350 CrcEndPos = MangledName.find('@');
1351 if (CrcEndPos == std::string_view::npos)
1352 goto StringLiteralError;
1353 CRC = MangledName.substr(0, CrcEndPos);
1354 MangledName.remove_prefix(CrcEndPos + 1);
1355 if (MangledName.empty())
1356 goto StringLiteralError;
1358 if (IsWcharT) {
1359 Result->Char = CharKind::Wchar;
1360 if (StringByteSize > 64)
1361 Result->IsTruncated = true;
1363 while (!consumeFront(MangledName, '@')) {
1364 if (MangledName.size() < 2)
1365 goto StringLiteralError;
1366 wchar_t W = demangleWcharLiteral(MangledName);
1367 if (StringByteSize != 2 || Result->IsTruncated)
1368 outputEscapedChar(OB, W);
1369 StringByteSize -= 2;
1370 if (Error)
1371 goto StringLiteralError;
1373 } else {
1374 // The max byte length is actually 32, but some compilers mangled strings
1375 // incorrectly, so we have to assume it can go higher.
1376 constexpr unsigned MaxStringByteLength = 32 * 4;
1377 uint8_t StringBytes[MaxStringByteLength];
1379 unsigned BytesDecoded = 0;
1380 while (!consumeFront(MangledName, '@')) {
1381 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
1382 goto StringLiteralError;
1383 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
1386 if (StringByteSize > BytesDecoded)
1387 Result->IsTruncated = true;
1389 unsigned CharBytes =
1390 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
1391 assert(StringByteSize % CharBytes == 0);
1392 switch (CharBytes) {
1393 case 1:
1394 Result->Char = CharKind::Char;
1395 break;
1396 case 2:
1397 Result->Char = CharKind::Char16;
1398 break;
1399 case 4:
1400 Result->Char = CharKind::Char32;
1401 break;
1402 default:
1403 DEMANGLE_UNREACHABLE;
1405 const unsigned NumChars = BytesDecoded / CharBytes;
1406 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
1407 unsigned NextChar =
1408 decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
1409 if (CharIndex + 1 < NumChars || Result->IsTruncated)
1410 outputEscapedChar(OB, NextChar);
1414 Result->DecodedString = copyString(OB);
1415 std::free(OB.getBuffer());
1416 return Result;
1418 StringLiteralError:
1419 Error = true;
1420 std::free(OB.getBuffer());
1421 return nullptr;
1424 // Returns MangledName's prefix before the first '@', or an error if
1425 // MangledName contains no '@' or the prefix has length 0.
1426 std::string_view Demangler::demangleSimpleString(std::string_view &MangledName,
1427 bool Memorize) {
1428 std::string_view S;
1429 for (size_t i = 0; i < MangledName.size(); ++i) {
1430 if (MangledName[i] != '@')
1431 continue;
1432 if (i == 0)
1433 break;
1434 S = MangledName.substr(0, i);
1435 MangledName.remove_prefix(i + 1);
1437 if (Memorize)
1438 memorizeString(S);
1439 return S;
1442 Error = true;
1443 return {};
1446 NamedIdentifierNode *
1447 Demangler::demangleAnonymousNamespaceName(std::string_view &MangledName) {
1448 assert(llvm::itanium_demangle::starts_with(MangledName, "?A"));
1449 consumeFront(MangledName, "?A");
1451 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
1452 Node->Name = "`anonymous namespace'";
1453 size_t EndPos = MangledName.find('@');
1454 if (EndPos == std::string_view::npos) {
1455 Error = true;
1456 return nullptr;
1458 std::string_view NamespaceKey = MangledName.substr(0, EndPos);
1459 memorizeString(NamespaceKey);
1460 MangledName = MangledName.substr(EndPos + 1);
1461 return Node;
1464 NamedIdentifierNode *
1465 Demangler::demangleLocallyScopedNamePiece(std::string_view &MangledName) {
1466 assert(startsWithLocalScopePattern(MangledName));
1468 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
1469 consumeFront(MangledName, '?');
1470 uint64_t Number = 0;
1471 bool IsNegative = false;
1472 std::tie(Number, IsNegative) = demangleNumber(MangledName);
1473 assert(!IsNegative);
1475 // One ? to terminate the number
1476 consumeFront(MangledName, '?');
1478 assert(!Error);
1479 Node *Scope = parse(MangledName);
1480 if (Error)
1481 return nullptr;
1483 // Render the parent symbol's name into a buffer.
1484 OutputBuffer OB;
1485 OB << '`';
1486 Scope->output(OB, OF_Default);
1487 OB << '\'';
1488 OB << "::`" << Number << "'";
1490 Identifier->Name = copyString(OB);
1491 std::free(OB.getBuffer());
1492 return Identifier;
1495 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
1496 QualifiedNameNode *
1497 Demangler::demangleFullyQualifiedTypeName(std::string_view &MangledName) {
1498 IdentifierNode *Identifier =
1499 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1500 if (Error)
1501 return nullptr;
1502 assert(Identifier);
1504 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1505 if (Error)
1506 return nullptr;
1507 assert(QN);
1508 return QN;
1511 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1512 // Symbol names have slightly different rules regarding what can appear
1513 // so we separate out the implementations for flexibility.
1514 QualifiedNameNode *
1515 Demangler::demangleFullyQualifiedSymbolName(std::string_view &MangledName) {
1516 // This is the final component of a symbol name (i.e. the leftmost component
1517 // of a mangled name. Since the only possible template instantiation that
1518 // can appear in this context is a function template, and since those are
1519 // not saved for the purposes of name backreferences, only backref simple
1520 // names.
1521 IdentifierNode *Identifier =
1522 demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1523 if (Error)
1524 return nullptr;
1526 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1527 if (Error)
1528 return nullptr;
1530 if (Identifier->kind() == NodeKind::StructorIdentifier) {
1531 if (QN->Components->Count < 2) {
1532 Error = true;
1533 return nullptr;
1535 StructorIdentifierNode *SIN =
1536 static_cast<StructorIdentifierNode *>(Identifier);
1537 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
1538 SIN->Class = static_cast<IdentifierNode *>(ClassNode);
1540 assert(QN);
1541 return QN;
1544 IdentifierNode *
1545 Demangler::demangleUnqualifiedTypeName(std::string_view &MangledName,
1546 bool Memorize) {
1547 // An inner-most name can be a back-reference, because a fully-qualified name
1548 // (e.g. Scope + Inner) can contain other fully qualified names inside of
1549 // them (for example template parameters), and these nested parameters can
1550 // refer to previously mangled types.
1551 if (startsWithDigit(MangledName))
1552 return demangleBackRefName(MangledName);
1554 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1555 return demangleTemplateInstantiationName(MangledName, NBB_Template);
1557 return demangleSimpleName(MangledName, Memorize);
1560 IdentifierNode *
1561 Demangler::demangleUnqualifiedSymbolName(std::string_view &MangledName,
1562 NameBackrefBehavior NBB) {
1563 if (startsWithDigit(MangledName))
1564 return demangleBackRefName(MangledName);
1565 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1566 return demangleTemplateInstantiationName(MangledName, NBB);
1567 if (llvm::itanium_demangle::starts_with(MangledName, '?'))
1568 return demangleFunctionIdentifierCode(MangledName);
1569 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
1572 IdentifierNode *
1573 Demangler::demangleNameScopePiece(std::string_view &MangledName) {
1574 if (startsWithDigit(MangledName))
1575 return demangleBackRefName(MangledName);
1577 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1578 return demangleTemplateInstantiationName(MangledName, NBB_Template);
1580 if (llvm::itanium_demangle::starts_with(MangledName, "?A"))
1581 return demangleAnonymousNamespaceName(MangledName);
1583 if (startsWithLocalScopePattern(MangledName))
1584 return demangleLocallyScopedNamePiece(MangledName);
1586 return demangleSimpleName(MangledName, /*Memorize=*/true);
1589 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
1590 size_t Count) {
1591 NodeArrayNode *N = Arena.alloc<NodeArrayNode>();
1592 N->Count = Count;
1593 N->Nodes = Arena.allocArray<Node *>(Count);
1594 for (size_t I = 0; I < Count; ++I) {
1595 N->Nodes[I] = Head->N;
1596 Head = Head->Next;
1598 return N;
1601 QualifiedNameNode *
1602 Demangler::demangleNameScopeChain(std::string_view &MangledName,
1603 IdentifierNode *UnqualifiedName) {
1604 NodeList *Head = Arena.alloc<NodeList>();
1606 Head->N = UnqualifiedName;
1608 size_t Count = 1;
1609 while (!consumeFront(MangledName, "@")) {
1610 ++Count;
1611 NodeList *NewHead = Arena.alloc<NodeList>();
1612 NewHead->Next = Head;
1613 Head = NewHead;
1615 if (MangledName.empty()) {
1616 Error = true;
1617 return nullptr;
1620 assert(!Error);
1621 IdentifierNode *Elem = demangleNameScopePiece(MangledName);
1622 if (Error)
1623 return nullptr;
1625 Head->N = Elem;
1628 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
1629 QN->Components = nodeListToNodeArray(Arena, Head, Count);
1630 return QN;
1633 FuncClass Demangler::demangleFunctionClass(std::string_view &MangledName) {
1634 const char F = MangledName.front();
1635 MangledName.remove_prefix(1);
1636 switch (F) {
1637 case '9':
1638 return FuncClass(FC_ExternC | FC_NoParameterList);
1639 case 'A':
1640 return FC_Private;
1641 case 'B':
1642 return FuncClass(FC_Private | FC_Far);
1643 case 'C':
1644 return FuncClass(FC_Private | FC_Static);
1645 case 'D':
1646 return FuncClass(FC_Private | FC_Static | FC_Far);
1647 case 'E':
1648 return FuncClass(FC_Private | FC_Virtual);
1649 case 'F':
1650 return FuncClass(FC_Private | FC_Virtual | FC_Far);
1651 case 'G':
1652 return FuncClass(FC_Private | FC_StaticThisAdjust);
1653 case 'H':
1654 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
1655 case 'I':
1656 return FuncClass(FC_Protected);
1657 case 'J':
1658 return FuncClass(FC_Protected | FC_Far);
1659 case 'K':
1660 return FuncClass(FC_Protected | FC_Static);
1661 case 'L':
1662 return FuncClass(FC_Protected | FC_Static | FC_Far);
1663 case 'M':
1664 return FuncClass(FC_Protected | FC_Virtual);
1665 case 'N':
1666 return FuncClass(FC_Protected | FC_Virtual | FC_Far);
1667 case 'O':
1668 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust);
1669 case 'P':
1670 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1671 case 'Q':
1672 return FuncClass(FC_Public);
1673 case 'R':
1674 return FuncClass(FC_Public | FC_Far);
1675 case 'S':
1676 return FuncClass(FC_Public | FC_Static);
1677 case 'T':
1678 return FuncClass(FC_Public | FC_Static | FC_Far);
1679 case 'U':
1680 return FuncClass(FC_Public | FC_Virtual);
1681 case 'V':
1682 return FuncClass(FC_Public | FC_Virtual | FC_Far);
1683 case 'W':
1684 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust);
1685 case 'X':
1686 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1687 case 'Y':
1688 return FuncClass(FC_Global);
1689 case 'Z':
1690 return FuncClass(FC_Global | FC_Far);
1691 case '$': {
1692 FuncClass VFlag = FC_VirtualThisAdjust;
1693 if (consumeFront(MangledName, 'R'))
1694 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
1695 if (MangledName.empty())
1696 break;
1697 const char F = MangledName.front();
1698 MangledName.remove_prefix(1);
1699 switch (F) {
1700 case '0':
1701 return FuncClass(FC_Private | FC_Virtual | VFlag);
1702 case '1':
1703 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
1704 case '2':
1705 return FuncClass(FC_Protected | FC_Virtual | VFlag);
1706 case '3':
1707 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
1708 case '4':
1709 return FuncClass(FC_Public | FC_Virtual | VFlag);
1710 case '5':
1711 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
1716 Error = true;
1717 return FC_Public;
1720 CallingConv
1721 Demangler::demangleCallingConvention(std::string_view &MangledName) {
1722 if (MangledName.empty()) {
1723 Error = true;
1724 return CallingConv::None;
1727 const char F = MangledName.front();
1728 MangledName.remove_prefix(1);
1729 switch (F) {
1730 case 'A':
1731 case 'B':
1732 return CallingConv::Cdecl;
1733 case 'C':
1734 case 'D':
1735 return CallingConv::Pascal;
1736 case 'E':
1737 case 'F':
1738 return CallingConv::Thiscall;
1739 case 'G':
1740 case 'H':
1741 return CallingConv::Stdcall;
1742 case 'I':
1743 case 'J':
1744 return CallingConv::Fastcall;
1745 case 'M':
1746 case 'N':
1747 return CallingConv::Clrcall;
1748 case 'O':
1749 case 'P':
1750 return CallingConv::Eabi;
1751 case 'Q':
1752 return CallingConv::Vectorcall;
1753 case 'S':
1754 return CallingConv::Swift;
1755 case 'W':
1756 return CallingConv::SwiftAsync;
1759 return CallingConv::None;
1762 StorageClass
1763 Demangler::demangleVariableStorageClass(std::string_view &MangledName) {
1764 assert(MangledName.front() >= '0' && MangledName.front() <= '4');
1766 const char F = MangledName.front();
1767 MangledName.remove_prefix(1);
1768 switch (F) {
1769 case '0':
1770 return StorageClass::PrivateStatic;
1771 case '1':
1772 return StorageClass::ProtectedStatic;
1773 case '2':
1774 return StorageClass::PublicStatic;
1775 case '3':
1776 return StorageClass::Global;
1777 case '4':
1778 return StorageClass::FunctionLocalStatic;
1780 DEMANGLE_UNREACHABLE;
1783 std::pair<Qualifiers, bool>
1784 Demangler::demangleQualifiers(std::string_view &MangledName) {
1785 if (MangledName.empty()) {
1786 Error = true;
1787 return std::make_pair(Q_None, false);
1790 const char F = MangledName.front();
1791 MangledName.remove_prefix(1);
1792 switch (F) {
1793 // Member qualifiers
1794 case 'Q':
1795 return std::make_pair(Q_None, true);
1796 case 'R':
1797 return std::make_pair(Q_Const, true);
1798 case 'S':
1799 return std::make_pair(Q_Volatile, true);
1800 case 'T':
1801 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1802 // Non-Member qualifiers
1803 case 'A':
1804 return std::make_pair(Q_None, false);
1805 case 'B':
1806 return std::make_pair(Q_Const, false);
1807 case 'C':
1808 return std::make_pair(Q_Volatile, false);
1809 case 'D':
1810 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1812 Error = true;
1813 return std::make_pair(Q_None, false);
1816 // <variable-type> ::= <type> <cvr-qualifiers>
1817 // ::= <type> <pointee-cvr-qualifiers> # pointers, references
1818 TypeNode *Demangler::demangleType(std::string_view &MangledName,
1819 QualifierMangleMode QMM) {
1820 Qualifiers Quals = Q_None;
1821 bool IsMember = false;
1822 if (QMM == QualifierMangleMode::Mangle) {
1823 std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1824 } else if (QMM == QualifierMangleMode::Result) {
1825 if (consumeFront(MangledName, '?'))
1826 std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1829 if (MangledName.empty()) {
1830 Error = true;
1831 return nullptr;
1834 TypeNode *Ty = nullptr;
1835 if (isTagType(MangledName))
1836 Ty = demangleClassType(MangledName);
1837 else if (isPointerType(MangledName)) {
1838 if (isMemberPointer(MangledName, Error))
1839 Ty = demangleMemberPointerType(MangledName);
1840 else if (!Error)
1841 Ty = demanglePointerType(MangledName);
1842 else
1843 return nullptr;
1844 } else if (isArrayType(MangledName))
1845 Ty = demangleArrayType(MangledName);
1846 else if (isFunctionType(MangledName)) {
1847 if (consumeFront(MangledName, "$$A8@@"))
1848 Ty = demangleFunctionType(MangledName, true);
1849 else {
1850 assert(llvm::itanium_demangle::starts_with(MangledName, "$$A6"));
1851 consumeFront(MangledName, "$$A6");
1852 Ty = demangleFunctionType(MangledName, false);
1854 } else if (isCustomType(MangledName)) {
1855 Ty = demangleCustomType(MangledName);
1856 } else {
1857 Ty = demanglePrimitiveType(MangledName);
1860 if (!Ty || Error)
1861 return Ty;
1862 Ty->Quals = Qualifiers(Ty->Quals | Quals);
1863 return Ty;
1866 bool Demangler::demangleThrowSpecification(std::string_view &MangledName) {
1867 if (consumeFront(MangledName, "_E"))
1868 return true;
1869 if (consumeFront(MangledName, 'Z'))
1870 return false;
1872 Error = true;
1873 return false;
1876 FunctionSignatureNode *
1877 Demangler::demangleFunctionType(std::string_view &MangledName,
1878 bool HasThisQuals) {
1879 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
1881 if (HasThisQuals) {
1882 FTy->Quals = demanglePointerExtQualifiers(MangledName);
1883 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
1884 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1887 // Fields that appear on both member and non-member functions.
1888 FTy->CallConvention = demangleCallingConvention(MangledName);
1890 // <return-type> ::= <type>
1891 // ::= @ # structors (they have no declared return type)
1892 bool IsStructor = consumeFront(MangledName, '@');
1893 if (!IsStructor)
1894 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1896 FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
1898 FTy->IsNoexcept = demangleThrowSpecification(MangledName);
1900 return FTy;
1903 FunctionSymbolNode *
1904 Demangler::demangleFunctionEncoding(std::string_view &MangledName) {
1905 FuncClass ExtraFlags = FC_None;
1906 if (consumeFront(MangledName, "$$J0"))
1907 ExtraFlags = FC_ExternC;
1909 if (MangledName.empty()) {
1910 Error = true;
1911 return nullptr;
1914 FuncClass FC = demangleFunctionClass(MangledName);
1915 FC = FuncClass(ExtraFlags | FC);
1917 FunctionSignatureNode *FSN = nullptr;
1918 ThunkSignatureNode *TTN = nullptr;
1919 if (FC & FC_StaticThisAdjust) {
1920 TTN = Arena.alloc<ThunkSignatureNode>();
1921 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1922 } else if (FC & FC_VirtualThisAdjust) {
1923 TTN = Arena.alloc<ThunkSignatureNode>();
1924 if (FC & FC_VirtualThisAdjustEx) {
1925 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
1926 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
1928 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
1929 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1932 if (FC & FC_NoParameterList) {
1933 // This is an extern "C" function whose full signature hasn't been mangled.
1934 // This happens when we need to mangle a local symbol inside of an extern
1935 // "C" function.
1936 FSN = Arena.alloc<FunctionSignatureNode>();
1937 } else {
1938 bool HasThisQuals = !(FC & (FC_Global | FC_Static));
1939 FSN = demangleFunctionType(MangledName, HasThisQuals);
1942 if (Error)
1943 return nullptr;
1945 if (TTN) {
1946 *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
1947 FSN = TTN;
1949 FSN->FunctionClass = FC;
1951 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
1952 Symbol->Signature = FSN;
1953 return Symbol;
1956 CustomTypeNode *Demangler::demangleCustomType(std::string_view &MangledName) {
1957 assert(llvm::itanium_demangle::starts_with(MangledName, '?'));
1958 MangledName.remove_prefix(1);
1960 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
1961 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1962 if (!consumeFront(MangledName, '@'))
1963 Error = true;
1964 if (Error)
1965 return nullptr;
1966 return CTN;
1969 // Reads a primitive type.
1970 PrimitiveTypeNode *
1971 Demangler::demanglePrimitiveType(std::string_view &MangledName) {
1972 if (consumeFront(MangledName, "$$T"))
1973 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
1975 const char F = MangledName.front();
1976 MangledName.remove_prefix(1);
1977 switch (F) {
1978 case 'X':
1979 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
1980 case 'D':
1981 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
1982 case 'C':
1983 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
1984 case 'E':
1985 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
1986 case 'F':
1987 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
1988 case 'G':
1989 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
1990 case 'H':
1991 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
1992 case 'I':
1993 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
1994 case 'J':
1995 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
1996 case 'K':
1997 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
1998 case 'M':
1999 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
2000 case 'N':
2001 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
2002 case 'O':
2003 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
2004 case '_': {
2005 if (MangledName.empty()) {
2006 Error = true;
2007 return nullptr;
2009 const char F = MangledName.front();
2010 MangledName.remove_prefix(1);
2011 switch (F) {
2012 case 'N':
2013 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
2014 case 'J':
2015 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
2016 case 'K':
2017 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
2018 case 'W':
2019 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
2020 case 'Q':
2021 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
2022 case 'S':
2023 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
2024 case 'U':
2025 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
2027 break;
2030 Error = true;
2031 return nullptr;
2034 TagTypeNode *Demangler::demangleClassType(std::string_view &MangledName) {
2035 TagTypeNode *TT = nullptr;
2037 const char F = MangledName.front();
2038 MangledName.remove_prefix(1);
2039 switch (F) {
2040 case 'T':
2041 TT = Arena.alloc<TagTypeNode>(TagKind::Union);
2042 break;
2043 case 'U':
2044 TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
2045 break;
2046 case 'V':
2047 TT = Arena.alloc<TagTypeNode>(TagKind::Class);
2048 break;
2049 case 'W':
2050 if (!consumeFront(MangledName, '4')) {
2051 Error = true;
2052 return nullptr;
2054 TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
2055 break;
2056 default:
2057 assert(false);
2060 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
2061 return TT;
2064 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
2065 // # the E is required for 64-bit non-static pointers
2066 PointerTypeNode *Demangler::demanglePointerType(std::string_view &MangledName) {
2067 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2069 std::tie(Pointer->Quals, Pointer->Affinity) =
2070 demanglePointerCVQualifiers(MangledName);
2072 if (consumeFront(MangledName, "6")) {
2073 Pointer->Pointee = demangleFunctionType(MangledName, false);
2074 return Pointer;
2077 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2078 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2080 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
2081 return Pointer;
2084 PointerTypeNode *
2085 Demangler::demangleMemberPointerType(std::string_view &MangledName) {
2086 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2088 std::tie(Pointer->Quals, Pointer->Affinity) =
2089 demanglePointerCVQualifiers(MangledName);
2090 assert(Pointer->Affinity == PointerAffinity::Pointer);
2092 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2093 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2095 // isMemberPointer() only returns true if there is at least one character
2096 // after the qualifiers.
2097 if (consumeFront(MangledName, "8")) {
2098 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2099 Pointer->Pointee = demangleFunctionType(MangledName, true);
2100 } else {
2101 Qualifiers PointeeQuals = Q_None;
2102 bool IsMember = false;
2103 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
2104 assert(IsMember || Error);
2105 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2107 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
2108 if (Pointer->Pointee)
2109 Pointer->Pointee->Quals = PointeeQuals;
2112 return Pointer;
2115 Qualifiers
2116 Demangler::demanglePointerExtQualifiers(std::string_view &MangledName) {
2117 Qualifiers Quals = Q_None;
2118 if (consumeFront(MangledName, 'E'))
2119 Quals = Qualifiers(Quals | Q_Pointer64);
2120 if (consumeFront(MangledName, 'I'))
2121 Quals = Qualifiers(Quals | Q_Restrict);
2122 if (consumeFront(MangledName, 'F'))
2123 Quals = Qualifiers(Quals | Q_Unaligned);
2125 return Quals;
2128 ArrayTypeNode *Demangler::demangleArrayType(std::string_view &MangledName) {
2129 assert(MangledName.front() == 'Y');
2130 MangledName.remove_prefix(1);
2132 uint64_t Rank = 0;
2133 bool IsNegative = false;
2134 std::tie(Rank, IsNegative) = demangleNumber(MangledName);
2135 if (IsNegative || Rank == 0) {
2136 Error = true;
2137 return nullptr;
2140 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
2141 NodeList *Head = Arena.alloc<NodeList>();
2142 NodeList *Tail = Head;
2144 for (uint64_t I = 0; I < Rank; ++I) {
2145 uint64_t D = 0;
2146 std::tie(D, IsNegative) = demangleNumber(MangledName);
2147 if (Error || IsNegative) {
2148 Error = true;
2149 return nullptr;
2151 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
2152 if (I + 1 < Rank) {
2153 Tail->Next = Arena.alloc<NodeList>();
2154 Tail = Tail->Next;
2157 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
2159 if (consumeFront(MangledName, "$$C")) {
2160 bool IsMember = false;
2161 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
2162 if (IsMember) {
2163 Error = true;
2164 return nullptr;
2168 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
2169 return ATy;
2172 // Reads a function's parameters.
2173 NodeArrayNode *
2174 Demangler::demangleFunctionParameterList(std::string_view &MangledName,
2175 bool &IsVariadic) {
2176 // Empty parameter list.
2177 if (consumeFront(MangledName, 'X'))
2178 return nullptr;
2180 NodeList *Head = Arena.alloc<NodeList>();
2181 NodeList **Current = &Head;
2182 size_t Count = 0;
2183 while (!Error && !llvm::itanium_demangle::starts_with(MangledName, '@') &&
2184 !llvm::itanium_demangle::starts_with(MangledName, 'Z')) {
2185 ++Count;
2187 if (startsWithDigit(MangledName)) {
2188 size_t N = MangledName[0] - '0';
2189 if (N >= Backrefs.FunctionParamCount) {
2190 Error = true;
2191 return nullptr;
2193 MangledName.remove_prefix(1);
2195 *Current = Arena.alloc<NodeList>();
2196 (*Current)->N = Backrefs.FunctionParams[N];
2197 Current = &(*Current)->Next;
2198 continue;
2201 size_t OldSize = MangledName.size();
2203 *Current = Arena.alloc<NodeList>();
2204 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
2205 if (!TN || Error)
2206 return nullptr;
2208 (*Current)->N = TN;
2210 size_t CharsConsumed = OldSize - MangledName.size();
2211 assert(CharsConsumed != 0);
2213 // Single-letter types are ignored for backreferences because memorizing
2214 // them doesn't save anything.
2215 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
2216 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
2218 Current = &(*Current)->Next;
2221 if (Error)
2222 return nullptr;
2224 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
2225 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
2226 // list or '@' (non variadic). Careful not to consume "@Z", as in that case
2227 // the following Z could be a throw specifier.
2228 if (consumeFront(MangledName, '@'))
2229 return NA;
2231 if (consumeFront(MangledName, 'Z')) {
2232 IsVariadic = true;
2233 return NA;
2236 DEMANGLE_UNREACHABLE;
2239 NodeArrayNode *
2240 Demangler::demangleTemplateParameterList(std::string_view &MangledName) {
2241 NodeList *Head = nullptr;
2242 NodeList **Current = &Head;
2243 size_t Count = 0;
2245 while (!llvm::itanium_demangle::starts_with(MangledName, '@')) {
2246 if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") ||
2247 consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) {
2248 // parameter pack separator
2249 continue;
2252 ++Count;
2254 // Template parameter lists don't participate in back-referencing.
2255 *Current = Arena.alloc<NodeList>();
2257 NodeList &TP = **Current;
2259 TemplateParameterReferenceNode *TPRN = nullptr;
2260 if (consumeFront(MangledName, "$$Y")) {
2261 // Template alias
2262 TP.N = demangleFullyQualifiedTypeName(MangledName);
2263 } else if (consumeFront(MangledName, "$$B")) {
2264 // Array
2265 TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2266 } else if (consumeFront(MangledName, "$$C")) {
2267 // Type has qualifiers.
2268 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
2269 } else if (llvm::itanium_demangle::starts_with(MangledName, "$1") ||
2270 llvm::itanium_demangle::starts_with(MangledName, "$H") ||
2271 llvm::itanium_demangle::starts_with(MangledName, "$I") ||
2272 llvm::itanium_demangle::starts_with(MangledName, "$J")) {
2273 // Pointer to member
2274 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2275 TPRN->IsMemberPointer = true;
2277 MangledName.remove_prefix(1);
2278 // 1 - single inheritance <name>
2279 // H - multiple inheritance <name> <number>
2280 // I - virtual inheritance <name> <number> <number>
2281 // J - unspecified inheritance <name> <number> <number> <number>
2282 char InheritanceSpecifier = MangledName.front();
2283 MangledName.remove_prefix(1);
2284 SymbolNode *S = nullptr;
2285 if (llvm::itanium_demangle::starts_with(MangledName, '?')) {
2286 S = parse(MangledName);
2287 if (Error || !S->Name) {
2288 Error = true;
2289 return nullptr;
2291 memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
2294 switch (InheritanceSpecifier) {
2295 case 'J':
2296 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2297 demangleSigned(MangledName);
2298 DEMANGLE_FALLTHROUGH;
2299 case 'I':
2300 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2301 demangleSigned(MangledName);
2302 DEMANGLE_FALLTHROUGH;
2303 case 'H':
2304 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2305 demangleSigned(MangledName);
2306 DEMANGLE_FALLTHROUGH;
2307 case '1':
2308 break;
2309 default:
2310 DEMANGLE_UNREACHABLE;
2312 TPRN->Affinity = PointerAffinity::Pointer;
2313 TPRN->Symbol = S;
2314 } else if (llvm::itanium_demangle::starts_with(MangledName, "$E?")) {
2315 consumeFront(MangledName, "$E");
2316 // Reference to symbol
2317 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2318 TPRN->Symbol = parse(MangledName);
2319 TPRN->Affinity = PointerAffinity::Reference;
2320 } else if (llvm::itanium_demangle::starts_with(MangledName, "$F") ||
2321 llvm::itanium_demangle::starts_with(MangledName, "$G")) {
2322 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2324 // Data member pointer.
2325 MangledName.remove_prefix(1);
2326 char InheritanceSpecifier = MangledName.front();
2327 MangledName.remove_prefix(1);
2329 switch (InheritanceSpecifier) {
2330 case 'G':
2331 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2332 demangleSigned(MangledName);
2333 DEMANGLE_FALLTHROUGH;
2334 case 'F':
2335 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2336 demangleSigned(MangledName);
2337 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2338 demangleSigned(MangledName);
2339 break;
2340 default:
2341 DEMANGLE_UNREACHABLE;
2343 TPRN->IsMemberPointer = true;
2345 } else if (consumeFront(MangledName, "$0")) {
2346 // Integral non-type template parameter
2347 bool IsNegative = false;
2348 uint64_t Value = 0;
2349 std::tie(Value, IsNegative) = demangleNumber(MangledName);
2351 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
2352 } else {
2353 TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2355 if (Error)
2356 return nullptr;
2358 Current = &TP.Next;
2361 // The loop above returns nullptr on Error.
2362 assert(!Error);
2364 // Template parameter lists cannot be variadic, so it can only be terminated
2365 // by @ (as opposed to 'Z' in the function parameter case).
2366 assert(llvm::itanium_demangle::starts_with(
2367 MangledName, '@')); // The above loop exits only on '@'.
2368 consumeFront(MangledName, '@');
2369 return nodeListToNodeArray(Arena, Head, Count);
2372 void Demangler::dumpBackReferences() {
2373 std::printf("%d function parameter backreferences\n",
2374 (int)Backrefs.FunctionParamCount);
2376 // Create an output stream so we can render each type.
2377 OutputBuffer OB;
2378 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
2379 OB.setCurrentPosition(0);
2381 TypeNode *T = Backrefs.FunctionParams[I];
2382 T->output(OB, OF_Default);
2384 std::string_view B = OB;
2385 std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.data());
2387 std::free(OB.getBuffer());
2389 if (Backrefs.FunctionParamCount > 0)
2390 std::printf("\n");
2391 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
2392 for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
2393 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
2394 Backrefs.Names[I]->Name.data());
2396 if (Backrefs.NamesCount > 0)
2397 std::printf("\n");
2400 char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled,
2401 int *Status, MSDemangleFlags Flags) {
2402 Demangler D;
2404 std::string_view Name{MangledName};
2405 SymbolNode *AST = D.parse(Name);
2406 if (!D.Error && NMangled)
2407 *NMangled = MangledName.size() - Name.size();
2409 if (Flags & MSDF_DumpBackrefs)
2410 D.dumpBackReferences();
2412 OutputFlags OF = OF_Default;
2413 if (Flags & MSDF_NoCallingConvention)
2414 OF = OutputFlags(OF | OF_NoCallingConvention);
2415 if (Flags & MSDF_NoAccessSpecifier)
2416 OF = OutputFlags(OF | OF_NoAccessSpecifier);
2417 if (Flags & MSDF_NoReturnType)
2418 OF = OutputFlags(OF | OF_NoReturnType);
2419 if (Flags & MSDF_NoMemberType)
2420 OF = OutputFlags(OF | OF_NoMemberType);
2421 if (Flags & MSDF_NoVariableType)
2422 OF = OutputFlags(OF | OF_NoVariableType);
2424 int InternalStatus = demangle_success;
2425 char *Buf;
2426 if (D.Error)
2427 InternalStatus = demangle_invalid_mangled_name;
2428 else {
2429 OutputBuffer OB;
2430 AST->output(OB, OF);
2431 OB += '\0';
2432 Buf = OB.getBuffer();
2435 if (Status)
2436 *Status = InternalStatus;
2437 return InternalStatus == demangle_success ? Buf : nullptr;