1 //===-- Mangled.cpp -------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "lldb/Core/Mangled.h"
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Demangle/Demangle.h"
24 #include "llvm/Support/Compiler.h"
28 #include <string_view>
33 using namespace lldb_private
;
35 static inline bool cstring_is_mangled(llvm::StringRef s
) {
36 return Mangled::GetManglingScheme(s
) != Mangled::eManglingSchemeNone
;
41 Mangled::ManglingScheme
Mangled::GetManglingScheme(llvm::StringRef
const name
) {
43 return Mangled::eManglingSchemeNone
;
45 if (name
.startswith("?"))
46 return Mangled::eManglingSchemeMSVC
;
48 if (name
.startswith("_R"))
49 return Mangled::eManglingSchemeRustV0
;
51 if (name
.startswith("_D"))
52 return Mangled::eManglingSchemeD
;
54 if (name
.startswith("_Z"))
55 return Mangled::eManglingSchemeItanium
;
57 // ___Z is a clang extension of block invocations
58 if (name
.startswith("___Z"))
59 return Mangled::eManglingSchemeItanium
;
61 // Swift's older style of mangling used "_T" as a mangling prefix. This can
62 // lead to false positives with other symbols that just so happen to start
63 // with "_T". To minimize the chance of that happening, we only return true
64 // for select old-style swift mangled names. The known cases are ObjC classes
65 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
66 // Protocols are prefixed with "_TtP".
67 if (name
.startswith("_TtC") || name
.startswith("_TtGC") ||
68 name
.startswith("_TtP"))
69 return Mangled::eManglingSchemeSwift
;
71 // Swift 4.2 used "$S" and "_$S".
72 // Swift 5 and onward uses "$s" and "_$s".
73 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
74 if (name
.startswith("$S") || name
.startswith("_$S") ||
75 name
.startswith("$s") || name
.startswith("_$s") ||
76 name
.startswith("@__swiftmacro_"))
77 return Mangled::eManglingSchemeSwift
;
79 return Mangled::eManglingSchemeNone
;
82 Mangled::Mangled(ConstString s
) : m_mangled(), m_demangled() {
87 Mangled::Mangled(llvm::StringRef name
) {
89 SetValue(ConstString(name
));
92 // Convert to bool operator. This allows code to check any Mangled objects
93 // to see if they contain anything valid using code such as:
95 // Mangled mangled(...);
98 Mangled::operator bool() const { return m_mangled
|| m_demangled
; }
100 // Clear the mangled and demangled values.
101 void Mangled::Clear() {
106 // Compare the string values.
107 int Mangled::Compare(const Mangled
&a
, const Mangled
&b
) {
108 return ConstString::Compare(a
.GetName(ePreferMangled
),
109 b
.GetName(ePreferMangled
));
112 void Mangled::SetValue(ConstString name
) {
114 if (cstring_is_mangled(name
.GetStringRef())) {
127 // Local helpers for different demangling implementations.
128 static char *GetMSVCDemangledStr(std::string_view M
) {
129 char *demangled_cstr
= llvm::microsoftDemangle(
131 llvm::MSDemangleFlags(
132 llvm::MSDF_NoAccessSpecifier
| llvm::MSDF_NoCallingConvention
|
133 llvm::MSDF_NoMemberType
| llvm::MSDF_NoVariableType
));
135 if (Log
*log
= GetLog(LLDBLog::Demangle
)) {
136 if (demangled_cstr
&& demangled_cstr
[0])
137 LLDB_LOGF(log
, "demangled msvc: %s -> \"%s\"", M
.data(), demangled_cstr
);
139 LLDB_LOGF(log
, "demangled msvc: %s -> error", M
.data());
142 return demangled_cstr
;
145 static char *GetItaniumDemangledStr(const char *M
) {
146 char *demangled_cstr
= nullptr;
148 llvm::ItaniumPartialDemangler ipd
;
149 bool err
= ipd
.partialDemangle(M
);
151 // Default buffer and size (will realloc in case it's too small).
152 size_t demangled_size
= 80;
153 demangled_cstr
= static_cast<char *>(std::malloc(demangled_size
));
154 demangled_cstr
= ipd
.finishDemangle(demangled_cstr
, &demangled_size
);
156 assert(demangled_cstr
&&
157 "finishDemangle must always succeed if partialDemangle did");
158 assert(demangled_cstr
[demangled_size
- 1] == '\0' &&
159 "Expected demangled_size to return length including trailing null");
162 if (Log
*log
= GetLog(LLDBLog::Demangle
)) {
164 LLDB_LOGF(log
, "demangled itanium: %s -> \"%s\"", M
, demangled_cstr
);
166 LLDB_LOGF(log
, "demangled itanium: %s -> error: failed to demangle", M
);
169 return demangled_cstr
;
172 static char *GetRustV0DemangledStr(std::string_view M
) {
173 char *demangled_cstr
= llvm::rustDemangle(M
);
175 if (Log
*log
= GetLog(LLDBLog::Demangle
)) {
176 if (demangled_cstr
&& demangled_cstr
[0])
177 LLDB_LOG(log
, "demangled rustv0: {0} -> \"{1}\"", M
, demangled_cstr
);
179 LLDB_LOG(log
, "demangled rustv0: {0} -> error: failed to demangle", M
);
182 return demangled_cstr
;
185 static char *GetDLangDemangledStr(std::string_view M
) {
186 char *demangled_cstr
= llvm::dlangDemangle(M
);
188 if (Log
*log
= GetLog(LLDBLog::Demangle
)) {
189 if (demangled_cstr
&& demangled_cstr
[0])
190 LLDB_LOG(log
, "demangled dlang: {0} -> \"{1}\"", M
, demangled_cstr
);
192 LLDB_LOG(log
, "demangled dlang: {0} -> error: failed to demangle", M
);
195 return demangled_cstr
;
198 // Explicit demangling for scheduled requests during batch processing. This
199 // makes use of ItaniumPartialDemangler's rich demangle info
200 bool Mangled::GetRichManglingInfo(RichManglingContext
&context
,
201 SkipMangledNameFn
*skip_mangled_name
) {
202 // Others are not meant to arrive here. ObjC names or C's main() for example
203 // have their names stored in m_demangled, while m_mangled is empty.
206 // Check whether or not we are interested in this name at all.
207 ManglingScheme scheme
= GetManglingScheme(m_mangled
.GetStringRef());
208 if (skip_mangled_name
&& skip_mangled_name(m_mangled
.GetStringRef(), scheme
))
212 case eManglingSchemeNone
:
213 // The current mangled_name_filter would allow llvm_unreachable here.
216 case eManglingSchemeItanium
:
217 // We want the rich mangling info here, so we don't care whether or not
218 // there is a demangled string in the pool already.
219 return context
.FromItaniumName(m_mangled
);
221 case eManglingSchemeMSVC
: {
222 // We have no rich mangling for MSVC-mangled names yet, so first try to
223 // demangle it if necessary.
224 if (!m_demangled
&& !m_mangled
.GetMangledCounterpart(m_demangled
)) {
225 if (char *d
= GetMSVCDemangledStr(m_mangled
)) {
226 // Without the rich mangling info we have to demangle the full name.
227 // Copy it to string pool and connect the counterparts to accelerate
228 // later access in GetDemangledName().
229 m_demangled
.SetStringWithMangledCounterpart(llvm::StringRef(d
),
233 m_demangled
.SetCString("");
237 if (m_demangled
.IsEmpty()) {
238 // Cannot demangle it, so don't try parsing.
241 // Demangled successfully, we can try and parse it with
242 // CPlusPlusLanguage::MethodName.
243 return context
.FromCxxMethodName(m_demangled
);
247 case eManglingSchemeRustV0
:
248 case eManglingSchemeD
:
249 case eManglingSchemeSwift
:
250 // Rich demangling scheme is not supported
253 llvm_unreachable("Fully covered switch above!");
256 // Generate the demangled name on demand using this accessor. Code in this
257 // class will need to use this accessor if it wishes to decode the demangled
258 // name. The result is cached and will be kept until a new string value is
259 // supplied to this object, or until the end of the object's lifetime.
260 ConstString
Mangled::GetDemangledName() const {
261 // Check to make sure we have a valid mangled name and that we haven't
262 // already decoded our mangled name.
263 if (m_mangled
&& m_demangled
.IsNull()) {
264 // Don't bother running anything that isn't mangled
265 const char *mangled_name
= m_mangled
.GetCString();
266 ManglingScheme mangling_scheme
=
267 GetManglingScheme(m_mangled
.GetStringRef());
268 if (mangling_scheme
!= eManglingSchemeNone
&&
269 !m_mangled
.GetMangledCounterpart(m_demangled
)) {
270 // We didn't already mangle this name, demangle it and if all goes well
271 // add it to our map.
272 char *demangled_name
= nullptr;
273 switch (mangling_scheme
) {
274 case eManglingSchemeMSVC
:
275 demangled_name
= GetMSVCDemangledStr(mangled_name
);
277 case eManglingSchemeItanium
: {
278 demangled_name
= GetItaniumDemangledStr(mangled_name
);
281 case eManglingSchemeRustV0
:
282 demangled_name
= GetRustV0DemangledStr(m_mangled
);
284 case eManglingSchemeD
:
285 demangled_name
= GetDLangDemangledStr(m_mangled
);
287 case eManglingSchemeSwift
:
288 // Demangling a swift name requires the swift compiler. This is
289 // explicitly unsupported on llvm.org.
291 case eManglingSchemeNone
:
292 llvm_unreachable("eManglingSchemeNone was handled already");
294 if (demangled_name
) {
295 m_demangled
.SetStringWithMangledCounterpart(
296 llvm::StringRef(demangled_name
), m_mangled
);
297 free(demangled_name
);
300 if (m_demangled
.IsNull()) {
301 // Set the demangled string to the empty string to indicate we tried to
302 // parse it once and failed.
303 m_demangled
.SetCString("");
310 ConstString
Mangled::GetDisplayDemangledName() const {
311 return GetDemangledName();
314 bool Mangled::NameMatches(const RegularExpression
®ex
) const {
315 if (m_mangled
&& regex
.Execute(m_mangled
.GetStringRef()))
318 ConstString demangled
= GetDemangledName();
319 return demangled
&& regex
.Execute(demangled
.GetStringRef());
322 // Get the demangled name if there is one, else return the mangled name.
323 ConstString
Mangled::GetName(Mangled::NamePreference preference
) const {
324 if (preference
== ePreferMangled
&& m_mangled
)
327 // Call the accessor to make sure we get a demangled name in case it hasn't
328 // been demangled yet...
329 ConstString demangled
= GetDemangledName();
331 if (preference
== ePreferDemangledWithoutArguments
) {
332 if (Language
*lang
= Language::FindPlugin(GuessLanguage())) {
333 return lang
->GetDemangledFunctionNameWithoutArguments(*this);
336 if (preference
== ePreferDemangled
) {
344 // Dump a Mangled object to stream "s". We don't force our demangled name to be
345 // computed currently (we don't use the accessor).
346 void Mangled::Dump(Stream
*s
) const {
348 *s
<< ", mangled = " << m_mangled
;
351 const char *demangled
= m_demangled
.AsCString();
352 s
->Printf(", demangled = %s", demangled
[0] ? demangled
: "<error>");
356 // Dumps a debug version of this string with extra object and state information
358 void Mangled::DumpDebug(Stream
*s
) const {
359 s
->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
360 static_cast<const void *>(this));
361 m_mangled
.DumpDebug(s
);
362 s
->Printf(", demangled = ");
363 m_demangled
.DumpDebug(s
);
366 // Return the size in byte that this object takes in memory. The size includes
367 // the size of the objects it owns, and not the strings that it references
368 // because they are shared strings.
369 size_t Mangled::MemorySize() const {
370 return m_mangled
.MemorySize() + m_demangled
.MemorySize();
373 // We "guess" the language because we can't determine a symbol's language from
374 // it's name. For example, a Pascal symbol can be mangled using the C++
375 // Itanium scheme, and defined in a compilation unit within the same module as
376 // other C++ units. In addition, different targets could have different ways
377 // of mangling names from a given language, likewise the compilation units
378 // within those targets.
379 lldb::LanguageType
Mangled::GuessLanguage() const {
380 lldb::LanguageType result
= lldb::eLanguageTypeUnknown
;
381 // Ask each language plugin to check if the mangled name belongs to it.
382 Language::ForEach([this, &result
](Language
*l
) {
383 if (l
->SymbolNameFitsToLanguage(*this)) {
384 result
= l
->GetLanguageType();
392 // Dump OBJ to the supplied stream S.
393 Stream
&operator<<(Stream
&s
, const Mangled
&obj
) {
394 if (obj
.GetMangledName())
395 s
<< "mangled = '" << obj
.GetMangledName() << "'";
397 ConstString demangled
= obj
.GetDemangledName();
399 s
<< ", demangled = '" << demangled
<< '\'';
401 s
<< ", demangled = <error>";
405 // When encoding Mangled objects we can get away with encoding as little
406 // information as is required. The enumeration below helps us to efficiently
407 // encode Mangled objects.
408 enum MangledEncoding
{
409 /// If the Mangled object has neither a mangled name or demangled name we can
410 /// encode the object with one zero byte using the Empty enumeration.
412 /// If the Mangled object has only a demangled name and no mangled named, we
413 /// can encode only the demangled name.
415 /// If the mangle name can calculate the demangled name (it is the
416 /// mangled/demangled counterpart), then we only need to encode the mangled
417 /// name as the demangled name can be recomputed.
419 /// If we have a Mangled object with two different names that are not related
420 /// then we need to save both strings. This can happen if we have a name that
421 /// isn't a true mangled name, but we want to be able to lookup a symbol by
422 /// name and type in the symbol table. We do this for Objective C symbols like
423 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
424 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
425 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
426 /// would fail, but in these cases we want these unrelated names to be
428 MangledAndDemangled
= 3u
431 bool Mangled::Decode(const DataExtractor
&data
, lldb::offset_t
*offset_ptr
,
432 const StringTableReader
&strtab
) {
435 MangledEncoding encoding
= (MangledEncoding
)data
.GetU8(offset_ptr
);
441 m_demangled
.SetString(strtab
.Get(data
.GetU32(offset_ptr
)));
445 m_mangled
.SetString(strtab
.Get(data
.GetU32(offset_ptr
)));
448 case MangledAndDemangled
:
449 m_mangled
.SetString(strtab
.Get(data
.GetU32(offset_ptr
)));
450 m_demangled
.SetString(strtab
.Get(data
.GetU32(offset_ptr
)));
455 /// The encoding format for the Mangled object is as follows:
457 /// uint8_t encoding;
458 /// char str1[]; (only if DemangledOnly, MangledOnly)
459 /// char str2[]; (only if MangledAndDemangled)
461 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
462 /// are only saved if we need them based on the encoding.
464 /// Some mangled names have a mangled name that can be demangled by the built
465 /// in demanglers. These kinds of mangled objects know when the mangled and
466 /// demangled names are the counterparts for each other. This is done because
467 /// demangling is very expensive and avoiding demangling the same name twice
468 /// saves us a lot of compute time. For these kinds of names we only need to
469 /// save the mangled name and have the encoding set to "MangledOnly".
471 /// If a mangled obejct has only a demangled name, then we save only that string
472 /// and have the encoding set to "DemangledOnly".
474 /// Some mangled objects have both mangled and demangled names, but the
475 /// demangled name can not be computed from the mangled name. This is often used
476 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
477 /// names must be saved and the encoding is set to "MangledAndDemangled".
479 /// For a Mangled object with no names, we only need to set the encoding to
480 /// "Empty" and not store any string values.
481 void Mangled::Encode(DataEncoder
&file
, ConstStringTable
&strtab
) const {
482 MangledEncoding encoding
= Empty
;
484 encoding
= MangledOnly
;
486 // We have both mangled and demangled names. If the demangled name is the
487 // counterpart of the mangled name, then we only need to save the mangled
488 // named. If they are different, we need to save both.
490 if (!(m_mangled
.GetMangledCounterpart(s
) && s
== m_demangled
))
491 encoding
= MangledAndDemangled
;
493 } else if (m_demangled
) {
494 encoding
= DemangledOnly
;
496 file
.AppendU8(encoding
);
501 file
.AppendU32(strtab
.Add(m_demangled
));
504 file
.AppendU32(strtab
.Add(m_mangled
));
506 case MangledAndDemangled
:
507 file
.AppendU32(strtab
.Add(m_mangled
));
508 file
.AppendU32(strtab
.Add(m_demangled
));