Make test more lenient for custom clang version strings
[llvm-project.git] / lldb / source / Core / Mangled.cpp
blob387c4fac6b0f8c3b39036df80cb741efec36aba2
1 //===-- Mangled.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "lldb/Core/Mangled.h"
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Demangle/Demangle.h"
25 #include "llvm/Support/Compiler.h"
27 #include <mutex>
28 #include <string>
29 #include <string_view>
30 #include <utility>
32 #include <cstdlib>
33 #include <cstring>
34 using namespace lldb_private;
36 static inline bool cstring_is_mangled(llvm::StringRef s) {
37 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
40 #pragma mark Mangled
42 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
43 if (name.empty())
44 return Mangled::eManglingSchemeNone;
46 if (name.starts_with("?"))
47 return Mangled::eManglingSchemeMSVC;
49 if (name.starts_with("_R"))
50 return Mangled::eManglingSchemeRustV0;
52 if (name.starts_with("_D")) {
53 // A dlang mangled name begins with `_D`, followed by a numeric length. One
54 // known exception is the symbol `_Dmain`.
55 // See `SymbolName` and `LName` in
56 // https://dlang.org/spec/abi.html#name_mangling
57 llvm::StringRef buf = name.drop_front(2);
58 if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain"))
59 return Mangled::eManglingSchemeD;
62 if (name.starts_with("_Z"))
63 return Mangled::eManglingSchemeItanium;
65 // ___Z is a clang extension of block invocations
66 if (name.starts_with("___Z"))
67 return Mangled::eManglingSchemeItanium;
69 // Swift's older style of mangling used "_T" as a mangling prefix. This can
70 // lead to false positives with other symbols that just so happen to start
71 // with "_T". To minimize the chance of that happening, we only return true
72 // for select old-style swift mangled names. The known cases are ObjC classes
73 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
74 // Protocols are prefixed with "_TtP".
75 if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||
76 name.starts_with("_TtP"))
77 return Mangled::eManglingSchemeSwift;
79 // Swift 4.2 used "$S" and "_$S".
80 // Swift 5 and onward uses "$s" and "_$s".
81 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
82 if (name.starts_with("$S") || name.starts_with("_$S") ||
83 name.starts_with("$s") || name.starts_with("_$s") ||
84 name.starts_with("@__swiftmacro_"))
85 return Mangled::eManglingSchemeSwift;
87 return Mangled::eManglingSchemeNone;
90 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
91 if (s)
92 SetValue(s);
95 Mangled::Mangled(llvm::StringRef name) {
96 if (!name.empty())
97 SetValue(ConstString(name));
100 // Convert to bool operator. This allows code to check any Mangled objects
101 // to see if they contain anything valid using code such as:
103 // Mangled mangled(...);
104 // if (mangled)
105 // { ...
106 Mangled::operator bool() const { return m_mangled || m_demangled; }
108 // Clear the mangled and demangled values.
109 void Mangled::Clear() {
110 m_mangled.Clear();
111 m_demangled.Clear();
114 // Compare the string values.
115 int Mangled::Compare(const Mangled &a, const Mangled &b) {
116 return ConstString::Compare(a.GetName(ePreferMangled),
117 b.GetName(ePreferMangled));
120 void Mangled::SetValue(ConstString name) {
121 if (name) {
122 if (cstring_is_mangled(name.GetStringRef())) {
123 m_demangled.Clear();
124 m_mangled = name;
125 } else {
126 m_demangled = name;
127 m_mangled.Clear();
129 } else {
130 m_demangled.Clear();
131 m_mangled.Clear();
135 // Local helpers for different demangling implementations.
136 static char *GetMSVCDemangledStr(llvm::StringRef M) {
137 char *demangled_cstr = llvm::microsoftDemangle(
138 M, nullptr, nullptr,
139 llvm::MSDemangleFlags(
140 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
141 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
143 if (Log *log = GetLog(LLDBLog::Demangle)) {
144 if (demangled_cstr && demangled_cstr[0])
145 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
146 else
147 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
150 return demangled_cstr;
153 static char *GetItaniumDemangledStr(const char *M) {
154 char *demangled_cstr = nullptr;
156 llvm::ItaniumPartialDemangler ipd;
157 bool err = ipd.partialDemangle(M);
158 if (!err) {
159 // Default buffer and size (will realloc in case it's too small).
160 size_t demangled_size = 80;
161 demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
162 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
164 assert(demangled_cstr &&
165 "finishDemangle must always succeed if partialDemangle did");
166 assert(demangled_cstr[demangled_size - 1] == '\0' &&
167 "Expected demangled_size to return length including trailing null");
170 if (Log *log = GetLog(LLDBLog::Demangle)) {
171 if (demangled_cstr)
172 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
173 else
174 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
177 return demangled_cstr;
180 static char *GetRustV0DemangledStr(llvm::StringRef M) {
181 char *demangled_cstr = llvm::rustDemangle(M);
183 if (Log *log = GetLog(LLDBLog::Demangle)) {
184 if (demangled_cstr && demangled_cstr[0])
185 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
186 else
187 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
188 static_cast<std::string_view>(M));
191 return demangled_cstr;
194 static char *GetDLangDemangledStr(llvm::StringRef M) {
195 char *demangled_cstr = llvm::dlangDemangle(M);
197 if (Log *log = GetLog(LLDBLog::Demangle)) {
198 if (demangled_cstr && demangled_cstr[0])
199 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
200 else
201 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
202 static_cast<std::string_view>(M));
205 return demangled_cstr;
208 // Explicit demangling for scheduled requests during batch processing. This
209 // makes use of ItaniumPartialDemangler's rich demangle info
210 bool Mangled::GetRichManglingInfo(RichManglingContext &context,
211 SkipMangledNameFn *skip_mangled_name) {
212 // Others are not meant to arrive here. ObjC names or C's main() for example
213 // have their names stored in m_demangled, while m_mangled is empty.
214 assert(m_mangled);
216 // Check whether or not we are interested in this name at all.
217 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
218 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
219 return false;
221 switch (scheme) {
222 case eManglingSchemeNone:
223 // The current mangled_name_filter would allow llvm_unreachable here.
224 return false;
226 case eManglingSchemeItanium:
227 // We want the rich mangling info here, so we don't care whether or not
228 // there is a demangled string in the pool already.
229 return context.FromItaniumName(m_mangled);
231 case eManglingSchemeMSVC: {
232 // We have no rich mangling for MSVC-mangled names yet, so first try to
233 // demangle it if necessary.
234 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
235 if (char *d = GetMSVCDemangledStr(m_mangled)) {
236 // Without the rich mangling info we have to demangle the full name.
237 // Copy it to string pool and connect the counterparts to accelerate
238 // later access in GetDemangledName().
239 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
240 m_mangled);
241 ::free(d);
242 } else {
243 m_demangled.SetCString("");
247 if (m_demangled.IsEmpty()) {
248 // Cannot demangle it, so don't try parsing.
249 return false;
250 } else {
251 // Demangled successfully, we can try and parse it with
252 // CPlusPlusLanguage::MethodName.
253 return context.FromCxxMethodName(m_demangled);
257 case eManglingSchemeRustV0:
258 case eManglingSchemeD:
259 case eManglingSchemeSwift:
260 // Rich demangling scheme is not supported
261 return false;
263 llvm_unreachable("Fully covered switch above!");
266 // Generate the demangled name on demand using this accessor. Code in this
267 // class will need to use this accessor if it wishes to decode the demangled
268 // name. The result is cached and will be kept until a new string value is
269 // supplied to this object, or until the end of the object's lifetime.
270 ConstString Mangled::GetDemangledName() const {
271 // Check to make sure we have a valid mangled name and that we haven't
272 // already decoded our mangled name.
273 if (m_mangled && m_demangled.IsNull()) {
274 // Don't bother running anything that isn't mangled
275 const char *mangled_name = m_mangled.GetCString();
276 ManglingScheme mangling_scheme =
277 GetManglingScheme(m_mangled.GetStringRef());
278 if (mangling_scheme != eManglingSchemeNone &&
279 !m_mangled.GetMangledCounterpart(m_demangled)) {
280 // We didn't already mangle this name, demangle it and if all goes well
281 // add it to our map.
282 char *demangled_name = nullptr;
283 switch (mangling_scheme) {
284 case eManglingSchemeMSVC:
285 demangled_name = GetMSVCDemangledStr(mangled_name);
286 break;
287 case eManglingSchemeItanium: {
288 demangled_name = GetItaniumDemangledStr(mangled_name);
289 break;
291 case eManglingSchemeRustV0:
292 demangled_name = GetRustV0DemangledStr(m_mangled);
293 break;
294 case eManglingSchemeD:
295 demangled_name = GetDLangDemangledStr(m_mangled);
296 break;
297 case eManglingSchemeSwift:
298 // Demangling a swift name requires the swift compiler. This is
299 // explicitly unsupported on llvm.org.
300 break;
301 case eManglingSchemeNone:
302 llvm_unreachable("eManglingSchemeNone was handled already");
304 if (demangled_name) {
305 m_demangled.SetStringWithMangledCounterpart(
306 llvm::StringRef(demangled_name), m_mangled);
307 free(demangled_name);
310 if (m_demangled.IsNull()) {
311 // Set the demangled string to the empty string to indicate we tried to
312 // parse it once and failed.
313 m_demangled.SetCString("");
317 return m_demangled;
320 ConstString Mangled::GetDisplayDemangledName() const {
321 if (Language *lang = Language::FindPlugin(GuessLanguage()))
322 return lang->GetDisplayDemangledName(*this);
323 return GetDemangledName();
326 bool Mangled::NameMatches(const RegularExpression &regex) const {
327 if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
328 return true;
330 ConstString demangled = GetDemangledName();
331 return demangled && regex.Execute(demangled.GetStringRef());
334 // Get the demangled name if there is one, else return the mangled name.
335 ConstString Mangled::GetName(Mangled::NamePreference preference) const {
336 if (preference == ePreferMangled && m_mangled)
337 return m_mangled;
339 // Call the accessor to make sure we get a demangled name in case it hasn't
340 // been demangled yet...
341 ConstString demangled = GetDemangledName();
343 if (preference == ePreferDemangledWithoutArguments) {
344 if (Language *lang = Language::FindPlugin(GuessLanguage())) {
345 return lang->GetDemangledFunctionNameWithoutArguments(*this);
348 if (preference == ePreferDemangled) {
349 if (demangled)
350 return demangled;
351 return m_mangled;
353 return demangled;
356 // Dump a Mangled object to stream "s". We don't force our demangled name to be
357 // computed currently (we don't use the accessor).
358 void Mangled::Dump(Stream *s) const {
359 if (m_mangled) {
360 *s << ", mangled = " << m_mangled;
362 if (m_demangled) {
363 const char *demangled = m_demangled.AsCString();
364 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
368 // Dumps a debug version of this string with extra object and state information
369 // to stream "s".
370 void Mangled::DumpDebug(Stream *s) const {
371 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
372 static_cast<const void *>(this));
373 m_mangled.DumpDebug(s);
374 s->Printf(", demangled = ");
375 m_demangled.DumpDebug(s);
378 // Return the size in byte that this object takes in memory. The size includes
379 // the size of the objects it owns, and not the strings that it references
380 // because they are shared strings.
381 size_t Mangled::MemorySize() const {
382 return m_mangled.MemorySize() + m_demangled.MemorySize();
385 // We "guess" the language because we can't determine a symbol's language from
386 // it's name. For example, a Pascal symbol can be mangled using the C++
387 // Itanium scheme, and defined in a compilation unit within the same module as
388 // other C++ units. In addition, different targets could have different ways
389 // of mangling names from a given language, likewise the compilation units
390 // within those targets.
391 lldb::LanguageType Mangled::GuessLanguage() const {
392 lldb::LanguageType result = lldb::eLanguageTypeUnknown;
393 // Ask each language plugin to check if the mangled name belongs to it.
394 Language::ForEach([this, &result](Language *l) {
395 if (l->SymbolNameFitsToLanguage(*this)) {
396 result = l->GetLanguageType();
397 return false;
399 return true;
401 return result;
404 // Dump OBJ to the supplied stream S.
405 Stream &operator<<(Stream &s, const Mangled &obj) {
406 if (obj.GetMangledName())
407 s << "mangled = '" << obj.GetMangledName() << "'";
409 ConstString demangled = obj.GetDemangledName();
410 if (demangled)
411 s << ", demangled = '" << demangled << '\'';
412 else
413 s << ", demangled = <error>";
414 return s;
417 // When encoding Mangled objects we can get away with encoding as little
418 // information as is required. The enumeration below helps us to efficiently
419 // encode Mangled objects.
420 enum MangledEncoding {
421 /// If the Mangled object has neither a mangled name or demangled name we can
422 /// encode the object with one zero byte using the Empty enumeration.
423 Empty = 0u,
424 /// If the Mangled object has only a demangled name and no mangled named, we
425 /// can encode only the demangled name.
426 DemangledOnly = 1u,
427 /// If the mangle name can calculate the demangled name (it is the
428 /// mangled/demangled counterpart), then we only need to encode the mangled
429 /// name as the demangled name can be recomputed.
430 MangledOnly = 2u,
431 /// If we have a Mangled object with two different names that are not related
432 /// then we need to save both strings. This can happen if we have a name that
433 /// isn't a true mangled name, but we want to be able to lookup a symbol by
434 /// name and type in the symbol table. We do this for Objective C symbols like
435 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
436 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
437 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
438 /// would fail, but in these cases we want these unrelated names to be
439 /// preserved.
440 MangledAndDemangled = 3u
443 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
444 const StringTableReader &strtab) {
445 m_mangled.Clear();
446 m_demangled.Clear();
447 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
448 switch (encoding) {
449 case Empty:
450 return true;
452 case DemangledOnly:
453 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
454 return true;
456 case MangledOnly:
457 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
458 return true;
460 case MangledAndDemangled:
461 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
462 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
463 return true;
465 return false;
467 /// The encoding format for the Mangled object is as follows:
469 /// uint8_t encoding;
470 /// char str1[]; (only if DemangledOnly, MangledOnly)
471 /// char str2[]; (only if MangledAndDemangled)
473 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
474 /// are only saved if we need them based on the encoding.
476 /// Some mangled names have a mangled name that can be demangled by the built
477 /// in demanglers. These kinds of mangled objects know when the mangled and
478 /// demangled names are the counterparts for each other. This is done because
479 /// demangling is very expensive and avoiding demangling the same name twice
480 /// saves us a lot of compute time. For these kinds of names we only need to
481 /// save the mangled name and have the encoding set to "MangledOnly".
483 /// If a mangled obejct has only a demangled name, then we save only that string
484 /// and have the encoding set to "DemangledOnly".
486 /// Some mangled objects have both mangled and demangled names, but the
487 /// demangled name can not be computed from the mangled name. This is often used
488 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
489 /// names must be saved and the encoding is set to "MangledAndDemangled".
491 /// For a Mangled object with no names, we only need to set the encoding to
492 /// "Empty" and not store any string values.
493 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
494 MangledEncoding encoding = Empty;
495 if (m_mangled) {
496 encoding = MangledOnly;
497 if (m_demangled) {
498 // We have both mangled and demangled names. If the demangled name is the
499 // counterpart of the mangled name, then we only need to save the mangled
500 // named. If they are different, we need to save both.
501 ConstString s;
502 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
503 encoding = MangledAndDemangled;
505 } else if (m_demangled) {
506 encoding = DemangledOnly;
508 file.AppendU8(encoding);
509 switch (encoding) {
510 case Empty:
511 break;
512 case DemangledOnly:
513 file.AppendU32(strtab.Add(m_demangled));
514 break;
515 case MangledOnly:
516 file.AppendU32(strtab.Add(m_mangled));
517 break;
518 case MangledAndDemangled:
519 file.AppendU32(strtab.Add(m_mangled));
520 file.AppendU32(strtab.Add(m_demangled));
521 break;