1 //===-- Mangled.cpp -------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "lldb/Core/Mangled.h"
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Demangle/Demangle.h"
25 #include "llvm/Support/Compiler.h"
29 #include <string_view>
34 using namespace lldb_private
;
36 static inline bool cstring_is_mangled(llvm::StringRef s
) {
37 return Mangled::GetManglingScheme(s
) != Mangled::eManglingSchemeNone
;
42 Mangled::ManglingScheme
Mangled::GetManglingScheme(llvm::StringRef
const name
) {
44 return Mangled::eManglingSchemeNone
;
46 if (name
.starts_with("?"))
47 return Mangled::eManglingSchemeMSVC
;
49 if (name
.starts_with("_R"))
50 return Mangled::eManglingSchemeRustV0
;
52 if (name
.starts_with("_D")) {
53 // A dlang mangled name begins with `_D`, followed by a numeric length. One
54 // known exception is the symbol `_Dmain`.
55 // See `SymbolName` and `LName` in
56 // https://dlang.org/spec/abi.html#name_mangling
57 llvm::StringRef buf
= name
.drop_front(2);
58 if (!buf
.empty() && (llvm::isDigit(buf
.front()) || name
== "_Dmain"))
59 return Mangled::eManglingSchemeD
;
62 if (name
.starts_with("_Z"))
63 return Mangled::eManglingSchemeItanium
;
65 // ___Z is a clang extension of block invocations
66 if (name
.starts_with("___Z"))
67 return Mangled::eManglingSchemeItanium
;
69 // Swift's older style of mangling used "_T" as a mangling prefix. This can
70 // lead to false positives with other symbols that just so happen to start
71 // with "_T". To minimize the chance of that happening, we only return true
72 // for select old-style swift mangled names. The known cases are ObjC classes
73 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
74 // Protocols are prefixed with "_TtP".
75 if (name
.starts_with("_TtC") || name
.starts_with("_TtGC") ||
76 name
.starts_with("_TtP"))
77 return Mangled::eManglingSchemeSwift
;
79 // Swift 4.2 used "$S" and "_$S".
80 // Swift 5 and onward uses "$s" and "_$s".
81 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
82 if (name
.starts_with("$S") || name
.starts_with("_$S") ||
83 name
.starts_with("$s") || name
.starts_with("_$s") ||
84 name
.starts_with("@__swiftmacro_"))
85 return Mangled::eManglingSchemeSwift
;
87 return Mangled::eManglingSchemeNone
;
90 Mangled::Mangled(ConstString s
) : m_mangled(), m_demangled() {
95 Mangled::Mangled(llvm::StringRef name
) {
97 SetValue(ConstString(name
));
100 // Convert to bool operator. This allows code to check any Mangled objects
101 // to see if they contain anything valid using code such as:
103 // Mangled mangled(...);
106 Mangled::operator bool() const { return m_mangled
|| m_demangled
; }
108 // Clear the mangled and demangled values.
109 void Mangled::Clear() {
114 // Compare the string values.
115 int Mangled::Compare(const Mangled
&a
, const Mangled
&b
) {
116 return ConstString::Compare(a
.GetName(ePreferMangled
),
117 b
.GetName(ePreferMangled
));
120 void Mangled::SetValue(ConstString name
) {
122 if (cstring_is_mangled(name
.GetStringRef())) {
135 // Local helpers for different demangling implementations.
136 static char *GetMSVCDemangledStr(llvm::StringRef M
) {
137 char *demangled_cstr
= llvm::microsoftDemangle(
139 llvm::MSDemangleFlags(
140 llvm::MSDF_NoAccessSpecifier
| llvm::MSDF_NoCallingConvention
|
141 llvm::MSDF_NoMemberType
| llvm::MSDF_NoVariableType
));
143 if (Log
*log
= GetLog(LLDBLog::Demangle
)) {
144 if (demangled_cstr
&& demangled_cstr
[0])
145 LLDB_LOGF(log
, "demangled msvc: %s -> \"%s\"", M
.data(), demangled_cstr
);
147 LLDB_LOGF(log
, "demangled msvc: %s -> error", M
.data());
150 return demangled_cstr
;
153 static char *GetItaniumDemangledStr(const char *M
) {
154 char *demangled_cstr
= nullptr;
156 llvm::ItaniumPartialDemangler ipd
;
157 bool err
= ipd
.partialDemangle(M
);
159 // Default buffer and size (will realloc in case it's too small).
160 size_t demangled_size
= 80;
161 demangled_cstr
= static_cast<char *>(std::malloc(demangled_size
));
162 demangled_cstr
= ipd
.finishDemangle(demangled_cstr
, &demangled_size
);
164 assert(demangled_cstr
&&
165 "finishDemangle must always succeed if partialDemangle did");
166 assert(demangled_cstr
[demangled_size
- 1] == '\0' &&
167 "Expected demangled_size to return length including trailing null");
170 if (Log
*log
= GetLog(LLDBLog::Demangle
)) {
172 LLDB_LOGF(log
, "demangled itanium: %s -> \"%s\"", M
, demangled_cstr
);
174 LLDB_LOGF(log
, "demangled itanium: %s -> error: failed to demangle", M
);
177 return demangled_cstr
;
180 static char *GetRustV0DemangledStr(llvm::StringRef M
) {
181 char *demangled_cstr
= llvm::rustDemangle(M
);
183 if (Log
*log
= GetLog(LLDBLog::Demangle
)) {
184 if (demangled_cstr
&& demangled_cstr
[0])
185 LLDB_LOG(log
, "demangled rustv0: {0} -> \"{1}\"", M
, demangled_cstr
);
187 LLDB_LOG(log
, "demangled rustv0: {0} -> error: failed to demangle",
188 static_cast<std::string_view
>(M
));
191 return demangled_cstr
;
194 static char *GetDLangDemangledStr(llvm::StringRef M
) {
195 char *demangled_cstr
= llvm::dlangDemangle(M
);
197 if (Log
*log
= GetLog(LLDBLog::Demangle
)) {
198 if (demangled_cstr
&& demangled_cstr
[0])
199 LLDB_LOG(log
, "demangled dlang: {0} -> \"{1}\"", M
, demangled_cstr
);
201 LLDB_LOG(log
, "demangled dlang: {0} -> error: failed to demangle",
202 static_cast<std::string_view
>(M
));
205 return demangled_cstr
;
208 // Explicit demangling for scheduled requests during batch processing. This
209 // makes use of ItaniumPartialDemangler's rich demangle info
210 bool Mangled::GetRichManglingInfo(RichManglingContext
&context
,
211 SkipMangledNameFn
*skip_mangled_name
) {
212 // Others are not meant to arrive here. ObjC names or C's main() for example
213 // have their names stored in m_demangled, while m_mangled is empty.
216 // Check whether or not we are interested in this name at all.
217 ManglingScheme scheme
= GetManglingScheme(m_mangled
.GetStringRef());
218 if (skip_mangled_name
&& skip_mangled_name(m_mangled
.GetStringRef(), scheme
))
222 case eManglingSchemeNone
:
223 // The current mangled_name_filter would allow llvm_unreachable here.
226 case eManglingSchemeItanium
:
227 // We want the rich mangling info here, so we don't care whether or not
228 // there is a demangled string in the pool already.
229 return context
.FromItaniumName(m_mangled
);
231 case eManglingSchemeMSVC
: {
232 // We have no rich mangling for MSVC-mangled names yet, so first try to
233 // demangle it if necessary.
234 if (!m_demangled
&& !m_mangled
.GetMangledCounterpart(m_demangled
)) {
235 if (char *d
= GetMSVCDemangledStr(m_mangled
)) {
236 // Without the rich mangling info we have to demangle the full name.
237 // Copy it to string pool and connect the counterparts to accelerate
238 // later access in GetDemangledName().
239 m_demangled
.SetStringWithMangledCounterpart(llvm::StringRef(d
),
243 m_demangled
.SetCString("");
247 if (m_demangled
.IsEmpty()) {
248 // Cannot demangle it, so don't try parsing.
251 // Demangled successfully, we can try and parse it with
252 // CPlusPlusLanguage::MethodName.
253 return context
.FromCxxMethodName(m_demangled
);
257 case eManglingSchemeRustV0
:
258 case eManglingSchemeD
:
259 case eManglingSchemeSwift
:
260 // Rich demangling scheme is not supported
263 llvm_unreachable("Fully covered switch above!");
266 // Generate the demangled name on demand using this accessor. Code in this
267 // class will need to use this accessor if it wishes to decode the demangled
268 // name. The result is cached and will be kept until a new string value is
269 // supplied to this object, or until the end of the object's lifetime.
270 ConstString
Mangled::GetDemangledName() const {
271 // Check to make sure we have a valid mangled name and that we haven't
272 // already decoded our mangled name.
273 if (m_mangled
&& m_demangled
.IsNull()) {
274 // Don't bother running anything that isn't mangled
275 const char *mangled_name
= m_mangled
.GetCString();
276 ManglingScheme mangling_scheme
=
277 GetManglingScheme(m_mangled
.GetStringRef());
278 if (mangling_scheme
!= eManglingSchemeNone
&&
279 !m_mangled
.GetMangledCounterpart(m_demangled
)) {
280 // We didn't already mangle this name, demangle it and if all goes well
281 // add it to our map.
282 char *demangled_name
= nullptr;
283 switch (mangling_scheme
) {
284 case eManglingSchemeMSVC
:
285 demangled_name
= GetMSVCDemangledStr(mangled_name
);
287 case eManglingSchemeItanium
: {
288 demangled_name
= GetItaniumDemangledStr(mangled_name
);
291 case eManglingSchemeRustV0
:
292 demangled_name
= GetRustV0DemangledStr(m_mangled
);
294 case eManglingSchemeD
:
295 demangled_name
= GetDLangDemangledStr(m_mangled
);
297 case eManglingSchemeSwift
:
298 // Demangling a swift name requires the swift compiler. This is
299 // explicitly unsupported on llvm.org.
301 case eManglingSchemeNone
:
302 llvm_unreachable("eManglingSchemeNone was handled already");
304 if (demangled_name
) {
305 m_demangled
.SetStringWithMangledCounterpart(
306 llvm::StringRef(demangled_name
), m_mangled
);
307 free(demangled_name
);
310 if (m_demangled
.IsNull()) {
311 // Set the demangled string to the empty string to indicate we tried to
312 // parse it once and failed.
313 m_demangled
.SetCString("");
320 ConstString
Mangled::GetDisplayDemangledName() const {
321 if (Language
*lang
= Language::FindPlugin(GuessLanguage()))
322 return lang
->GetDisplayDemangledName(*this);
323 return GetDemangledName();
326 bool Mangled::NameMatches(const RegularExpression
®ex
) const {
327 if (m_mangled
&& regex
.Execute(m_mangled
.GetStringRef()))
330 ConstString demangled
= GetDemangledName();
331 return demangled
&& regex
.Execute(demangled
.GetStringRef());
334 // Get the demangled name if there is one, else return the mangled name.
335 ConstString
Mangled::GetName(Mangled::NamePreference preference
) const {
336 if (preference
== ePreferMangled
&& m_mangled
)
339 // Call the accessor to make sure we get a demangled name in case it hasn't
340 // been demangled yet...
341 ConstString demangled
= GetDemangledName();
343 if (preference
== ePreferDemangledWithoutArguments
) {
344 if (Language
*lang
= Language::FindPlugin(GuessLanguage())) {
345 return lang
->GetDemangledFunctionNameWithoutArguments(*this);
348 if (preference
== ePreferDemangled
) {
356 // Dump a Mangled object to stream "s". We don't force our demangled name to be
357 // computed currently (we don't use the accessor).
358 void Mangled::Dump(Stream
*s
) const {
360 *s
<< ", mangled = " << m_mangled
;
363 const char *demangled
= m_demangled
.AsCString();
364 s
->Printf(", demangled = %s", demangled
[0] ? demangled
: "<error>");
368 // Dumps a debug version of this string with extra object and state information
370 void Mangled::DumpDebug(Stream
*s
) const {
371 s
->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
372 static_cast<const void *>(this));
373 m_mangled
.DumpDebug(s
);
374 s
->Printf(", demangled = ");
375 m_demangled
.DumpDebug(s
);
378 // Return the size in byte that this object takes in memory. The size includes
379 // the size of the objects it owns, and not the strings that it references
380 // because they are shared strings.
381 size_t Mangled::MemorySize() const {
382 return m_mangled
.MemorySize() + m_demangled
.MemorySize();
385 // We "guess" the language because we can't determine a symbol's language from
386 // it's name. For example, a Pascal symbol can be mangled using the C++
387 // Itanium scheme, and defined in a compilation unit within the same module as
388 // other C++ units. In addition, different targets could have different ways
389 // of mangling names from a given language, likewise the compilation units
390 // within those targets.
391 lldb::LanguageType
Mangled::GuessLanguage() const {
392 lldb::LanguageType result
= lldb::eLanguageTypeUnknown
;
393 // Ask each language plugin to check if the mangled name belongs to it.
394 Language::ForEach([this, &result
](Language
*l
) {
395 if (l
->SymbolNameFitsToLanguage(*this)) {
396 result
= l
->GetLanguageType();
404 // Dump OBJ to the supplied stream S.
405 Stream
&operator<<(Stream
&s
, const Mangled
&obj
) {
406 if (obj
.GetMangledName())
407 s
<< "mangled = '" << obj
.GetMangledName() << "'";
409 ConstString demangled
= obj
.GetDemangledName();
411 s
<< ", demangled = '" << demangled
<< '\'';
413 s
<< ", demangled = <error>";
417 // When encoding Mangled objects we can get away with encoding as little
418 // information as is required. The enumeration below helps us to efficiently
419 // encode Mangled objects.
420 enum MangledEncoding
{
421 /// If the Mangled object has neither a mangled name or demangled name we can
422 /// encode the object with one zero byte using the Empty enumeration.
424 /// If the Mangled object has only a demangled name and no mangled named, we
425 /// can encode only the demangled name.
427 /// If the mangle name can calculate the demangled name (it is the
428 /// mangled/demangled counterpart), then we only need to encode the mangled
429 /// name as the demangled name can be recomputed.
431 /// If we have a Mangled object with two different names that are not related
432 /// then we need to save both strings. This can happen if we have a name that
433 /// isn't a true mangled name, but we want to be able to lookup a symbol by
434 /// name and type in the symbol table. We do this for Objective C symbols like
435 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
436 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
437 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
438 /// would fail, but in these cases we want these unrelated names to be
440 MangledAndDemangled
= 3u
443 bool Mangled::Decode(const DataExtractor
&data
, lldb::offset_t
*offset_ptr
,
444 const StringTableReader
&strtab
) {
447 MangledEncoding encoding
= (MangledEncoding
)data
.GetU8(offset_ptr
);
453 m_demangled
.SetString(strtab
.Get(data
.GetU32(offset_ptr
)));
457 m_mangled
.SetString(strtab
.Get(data
.GetU32(offset_ptr
)));
460 case MangledAndDemangled
:
461 m_mangled
.SetString(strtab
.Get(data
.GetU32(offset_ptr
)));
462 m_demangled
.SetString(strtab
.Get(data
.GetU32(offset_ptr
)));
467 /// The encoding format for the Mangled object is as follows:
469 /// uint8_t encoding;
470 /// char str1[]; (only if DemangledOnly, MangledOnly)
471 /// char str2[]; (only if MangledAndDemangled)
473 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
474 /// are only saved if we need them based on the encoding.
476 /// Some mangled names have a mangled name that can be demangled by the built
477 /// in demanglers. These kinds of mangled objects know when the mangled and
478 /// demangled names are the counterparts for each other. This is done because
479 /// demangling is very expensive and avoiding demangling the same name twice
480 /// saves us a lot of compute time. For these kinds of names we only need to
481 /// save the mangled name and have the encoding set to "MangledOnly".
483 /// If a mangled obejct has only a demangled name, then we save only that string
484 /// and have the encoding set to "DemangledOnly".
486 /// Some mangled objects have both mangled and demangled names, but the
487 /// demangled name can not be computed from the mangled name. This is often used
488 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
489 /// names must be saved and the encoding is set to "MangledAndDemangled".
491 /// For a Mangled object with no names, we only need to set the encoding to
492 /// "Empty" and not store any string values.
493 void Mangled::Encode(DataEncoder
&file
, ConstStringTable
&strtab
) const {
494 MangledEncoding encoding
= Empty
;
496 encoding
= MangledOnly
;
498 // We have both mangled and demangled names. If the demangled name is the
499 // counterpart of the mangled name, then we only need to save the mangled
500 // named. If they are different, we need to save both.
502 if (!(m_mangled
.GetMangledCounterpart(s
) && s
== m_demangled
))
503 encoding
= MangledAndDemangled
;
505 } else if (m_demangled
) {
506 encoding
= DemangledOnly
;
508 file
.AppendU8(encoding
);
513 file
.AppendU32(strtab
.Add(m_demangled
));
516 file
.AppendU32(strtab
.Add(m_mangled
));
518 case MangledAndDemangled
:
519 file
.AppendU32(strtab
.Add(m_mangled
));
520 file
.AppendU32(strtab
.Add(m_demangled
));