1 //===-- BreakpadRecords.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
16 using namespace lldb_private
;
17 using namespace lldb_private::breakpad
;
38 static T
stringTo(llvm::StringRef Str
);
40 template <> Token stringTo
<Token
>(llvm::StringRef Str
) {
41 return llvm::StringSwitch
<Token
>(Str
)
42 .Case("MODULE", Token::Module
)
43 .Case("INFO", Token::Info
)
44 .Case("CODE_ID", Token::CodeID
)
45 .Case("FILE", Token::File
)
46 .Case("FUNC", Token::Func
)
47 .Case("INLINE", Token::Inline
)
48 .Case("INLINE_ORIGIN", Token::InlineOrigin
)
49 .Case("PUBLIC", Token::Public
)
50 .Case("STACK", Token::Stack
)
51 .Case("CFI", Token::CFI
)
52 .Case("INIT", Token::Init
)
53 .Case("WIN", Token::Win
)
54 .Default(Token::Unknown
);
58 llvm::Triple::OSType stringTo
<llvm::Triple::OSType
>(llvm::StringRef Str
) {
60 return llvm::StringSwitch
<Triple::OSType
>(Str
)
61 .Case("Linux", Triple::Linux
)
62 .Case("mac", Triple::MacOSX
)
63 .Case("windows", Triple::Win32
)
64 .Default(Triple::UnknownOS
);
68 llvm::Triple::ArchType stringTo
<llvm::Triple::ArchType
>(llvm::StringRef Str
) {
70 return llvm::StringSwitch
<Triple::ArchType
>(Str
)
71 .Case("arm", Triple::arm
)
72 .Cases("arm64", "arm64e", Triple::aarch64
)
73 .Case("mips", Triple::mips
)
74 .Case("msp430", Triple::msp430
)
75 .Case("ppc", Triple::ppc
)
76 .Case("ppc64", Triple::ppc64
)
77 .Case("s390", Triple::systemz
)
78 .Case("sparc", Triple::sparc
)
79 .Case("sparcv9", Triple::sparcv9
)
80 .Case("x86", Triple::x86
)
81 .Cases("x86_64", "x86_64h", Triple::x86_64
)
82 .Default(Triple::UnknownArch
);
86 static T
consume(llvm::StringRef
&Str
) {
87 llvm::StringRef Token
;
88 std::tie(Token
, Str
) = getToken(Str
);
89 return stringTo
<T
>(Token
);
92 /// Return the number of hex digits needed to encode an (POD) object of a given
94 template <typename T
> static constexpr size_t hex_digits() {
98 static UUID
parseModuleId(llvm::Triple::OSType os
, llvm::StringRef str
) {
100 using uuid_t
= uint8_t[16];
102 llvm::support::ubig32_t age
;
104 static_assert(sizeof(data
) == 20);
105 // The textual module id encoding should be between 33 and 40 bytes long,
106 // depending on the size of the age field, which is of variable length.
107 // The first three chunks of the id are encoded in big endian, so we need to
109 if (str
.size() <= hex_digits
<data_t::uuid_t
>() ||
110 str
.size() > hex_digits
<data_t
>())
112 if (!all_of(str
, llvm::isHexDigit
))
115 llvm::StringRef uuid_str
= str
.take_front(hex_digits
<data_t::uuid_t
>());
116 llvm::StringRef age_str
= str
.drop_front(hex_digits
<data_t::uuid_t
>());
118 llvm::copy(fromHex(uuid_str
), data
.uuid
);
120 bool success
= to_integer(age_str
, age
, 16);
125 // On non-windows, the age field should always be zero, so we don't include to
126 // match the native uuid format of these platforms.
127 return UUID(&data
, os
== llvm::Triple::Win32
? sizeof(data
)
128 : sizeof(data
.uuid
));
131 std::optional
<Record::Kind
> Record::classify(llvm::StringRef Line
) {
132 Token Tok
= consume
<Token
>(Line
);
135 return Record::Module
;
143 return Record::Public
;
145 Tok
= consume
<Token
>(Line
);
148 return Record::StackCFI
;
150 return Record::StackWin
;
155 return Record::Inline
;
156 case Token::InlineOrigin
:
157 return Record::InlineOrigin
;
159 // Optimistically assume that any unrecognised token means this is a line
160 // record, those don't have a special keyword and start directly with a
168 // These should never appear at the start of a valid record.
171 llvm_unreachable("Fully covered switch above!");
174 std::optional
<ModuleRecord
> ModuleRecord::parse(llvm::StringRef Line
) {
175 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
176 if (consume
<Token
>(Line
) != Token::Module
)
179 llvm::Triple::OSType OS
= consume
<llvm::Triple::OSType
>(Line
);
180 if (OS
== llvm::Triple::UnknownOS
)
183 llvm::Triple::ArchType Arch
= consume
<llvm::Triple::ArchType
>(Line
);
184 if (Arch
== llvm::Triple::UnknownArch
)
188 std::tie(Str
, Line
) = getToken(Line
);
189 UUID ID
= parseModuleId(OS
, Str
);
193 return ModuleRecord(OS
, Arch
, std::move(ID
));
196 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
197 const ModuleRecord
&R
) {
198 return OS
<< "MODULE " << llvm::Triple::getOSTypeName(R
.OS
) << " "
199 << llvm::Triple::getArchTypeName(R
.Arch
) << " "
200 << R
.ID
.GetAsString();
203 std::optional
<InfoRecord
> InfoRecord::parse(llvm::StringRef Line
) {
204 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
205 if (consume
<Token
>(Line
) != Token::Info
)
208 if (consume
<Token
>(Line
) != Token::CodeID
)
212 std::tie(Str
, Line
) = getToken(Line
);
213 // If we don't have any text following the code ID (e.g. on linux), we should
214 // use this as the UUID. Otherwise, we should revert back to the module ID.
216 if (Line
.trim().empty()) {
217 if (Str
.empty() || !ID
.SetFromStringRef(Str
))
220 return InfoRecord(std::move(ID
));
223 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
224 const InfoRecord
&R
) {
225 return OS
<< "INFO CODE_ID " << R
.ID
.GetAsString();
228 template <typename T
>
229 static std::optional
<T
> parseNumberName(llvm::StringRef Line
, Token TokenType
) {
231 if (consume
<Token
>(Line
) != TokenType
)
236 std::tie(Str
, Line
) = getToken(Line
);
237 if (!to_integer(Str
, Number
))
240 llvm::StringRef Name
= Line
.trim();
244 return T(Number
, Name
);
247 std::optional
<FileRecord
> FileRecord::parse(llvm::StringRef Line
) {
249 return parseNumberName
<FileRecord
>(Line
, Token::File
);
252 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
253 const FileRecord
&R
) {
254 return OS
<< "FILE " << R
.Number
<< " " << R
.Name
;
257 std::optional
<InlineOriginRecord
>
258 InlineOriginRecord::parse(llvm::StringRef Line
) {
259 // INLINE_ORIGIN number name
260 return parseNumberName
<InlineOriginRecord
>(Line
, Token::InlineOrigin
);
263 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
264 const InlineOriginRecord
&R
) {
265 return OS
<< "INLINE_ORIGIN " << R
.Number
<< " " << R
.Name
;
268 static bool parsePublicOrFunc(llvm::StringRef Line
, bool &Multiple
,
269 lldb::addr_t
&Address
, lldb::addr_t
*Size
,
270 lldb::addr_t
&ParamSize
, llvm::StringRef
&Name
) {
271 // PUBLIC [m] address param_size name
273 // FUNC [m] address size param_size name
275 Token Tok
= Size
? Token::Func
: Token::Public
;
277 if (consume
<Token
>(Line
) != Tok
)
281 std::tie(Str
, Line
) = getToken(Line
);
282 Multiple
= Str
== "m";
285 std::tie(Str
, Line
) = getToken(Line
);
286 if (!to_integer(Str
, Address
, 16))
289 if (Tok
== Token::Func
) {
290 std::tie(Str
, Line
) = getToken(Line
);
291 if (!to_integer(Str
, *Size
, 16))
295 std::tie(Str
, Line
) = getToken(Line
);
296 if (!to_integer(Str
, ParamSize
, 16))
306 std::optional
<FuncRecord
> FuncRecord::parse(llvm::StringRef Line
) {
308 lldb::addr_t Address
, Size
, ParamSize
;
309 llvm::StringRef Name
;
311 if (parsePublicOrFunc(Line
, Multiple
, Address
, &Size
, ParamSize
, Name
))
312 return FuncRecord(Multiple
, Address
, Size
, ParamSize
, Name
);
317 bool breakpad::operator==(const FuncRecord
&L
, const FuncRecord
&R
) {
318 return L
.Multiple
== R
.Multiple
&& L
.Address
== R
.Address
&&
319 L
.Size
== R
.Size
&& L
.ParamSize
== R
.ParamSize
&& L
.Name
== R
.Name
;
321 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
322 const FuncRecord
&R
) {
323 return OS
<< llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
324 R
.Multiple
? "m " : "", R
.Address
, R
.Size
,
325 R
.ParamSize
, R
.Name
);
328 std::optional
<InlineRecord
> InlineRecord::parse(llvm::StringRef Line
) {
329 // INLINE inline_nest_level call_site_line call_site_file_num origin_num
331 if (consume
<Token
>(Line
) != Token::Inline
)
334 llvm::SmallVector
<llvm::StringRef
> Tokens
;
335 SplitString(Line
, Tokens
, " ");
336 if (Tokens
.size() < 6 || Tokens
.size() % 2 == 1)
339 size_t InlineNestLevel
;
340 uint32_t CallSiteLineNum
;
341 size_t CallSiteFileNum
;
343 if (!(to_integer(Tokens
[0], InlineNestLevel
) &&
344 to_integer(Tokens
[1], CallSiteLineNum
) &&
345 to_integer(Tokens
[2], CallSiteFileNum
) &&
346 to_integer(Tokens
[3], OriginNum
)))
349 InlineRecord Record
= InlineRecord(InlineNestLevel
, CallSiteLineNum
,
350 CallSiteFileNum
, OriginNum
);
351 for (size_t i
= 4; i
< Tokens
.size(); i
+= 2) {
352 lldb::addr_t Address
;
353 if (!to_integer(Tokens
[i
], Address
, 16))
356 if (!to_integer(Tokens
[i
+ 1].trim(), Size
, 16))
358 Record
.Ranges
.emplace_back(Address
, Size
);
363 bool breakpad::operator==(const InlineRecord
&L
, const InlineRecord
&R
) {
364 return L
.InlineNestLevel
== R
.InlineNestLevel
&&
365 L
.CallSiteLineNum
== R
.CallSiteLineNum
&&
366 L
.CallSiteFileNum
== R
.CallSiteFileNum
&& L
.OriginNum
== R
.OriginNum
&&
367 L
.Ranges
== R
.Ranges
;
370 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
371 const InlineRecord
&R
) {
372 OS
<< llvm::formatv("INLINE {0} {1} {2} {3}", R
.InlineNestLevel
,
373 R
.CallSiteLineNum
, R
.CallSiteFileNum
, R
.OriginNum
);
374 for (const auto &range
: R
.Ranges
) {
375 OS
<< llvm::formatv(" {0:x-} {1:x-}", range
.first
, range
.second
);
380 std::optional
<LineRecord
> LineRecord::parse(llvm::StringRef Line
) {
381 lldb::addr_t Address
;
383 std::tie(Str
, Line
) = getToken(Line
);
384 if (!to_integer(Str
, Address
, 16))
388 std::tie(Str
, Line
) = getToken(Line
);
389 if (!to_integer(Str
, Size
, 16))
393 std::tie(Str
, Line
) = getToken(Line
);
394 if (!to_integer(Str
, LineNum
))
398 std::tie(Str
, Line
) = getToken(Line
);
399 if (!to_integer(Str
, FileNum
))
402 return LineRecord(Address
, Size
, LineNum
, FileNum
);
405 bool breakpad::operator==(const LineRecord
&L
, const LineRecord
&R
) {
406 return L
.Address
== R
.Address
&& L
.Size
== R
.Size
&& L
.LineNum
== R
.LineNum
&&
407 L
.FileNum
== R
.FileNum
;
409 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
410 const LineRecord
&R
) {
411 return OS
<< llvm::formatv("{0:x-} {1:x-} {2} {3}", R
.Address
, R
.Size
,
412 R
.LineNum
, R
.FileNum
);
415 std::optional
<PublicRecord
> PublicRecord::parse(llvm::StringRef Line
) {
417 lldb::addr_t Address
, ParamSize
;
418 llvm::StringRef Name
;
420 if (parsePublicOrFunc(Line
, Multiple
, Address
, nullptr, ParamSize
, Name
))
421 return PublicRecord(Multiple
, Address
, ParamSize
, Name
);
426 bool breakpad::operator==(const PublicRecord
&L
, const PublicRecord
&R
) {
427 return L
.Multiple
== R
.Multiple
&& L
.Address
== R
.Address
&&
428 L
.ParamSize
== R
.ParamSize
&& L
.Name
== R
.Name
;
430 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
431 const PublicRecord
&R
) {
432 return OS
<< llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
433 R
.Multiple
? "m " : "", R
.Address
, R
.ParamSize
,
437 std::optional
<StackCFIRecord
> StackCFIRecord::parse(llvm::StringRef Line
) {
438 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
440 // STACK CFI address reg1: expr1 reg2: expr2 ...
441 // No token in exprN ends with a colon.
443 if (consume
<Token
>(Line
) != Token::Stack
)
445 if (consume
<Token
>(Line
) != Token::CFI
)
449 std::tie(Str
, Line
) = getToken(Line
);
451 bool IsInitRecord
= stringTo
<Token
>(Str
) == Token::Init
;
453 std::tie(Str
, Line
) = getToken(Line
);
455 lldb::addr_t Address
;
456 if (!to_integer(Str
, Address
, 16))
459 std::optional
<lldb::addr_t
> Size
;
462 std::tie(Str
, Line
) = getToken(Line
);
463 if (!to_integer(Str
, *Size
, 16))
467 return StackCFIRecord(Address
, Size
, Line
.trim());
470 bool breakpad::operator==(const StackCFIRecord
&L
, const StackCFIRecord
&R
) {
471 return L
.Address
== R
.Address
&& L
.Size
== R
.Size
&&
472 L
.UnwindRules
== R
.UnwindRules
;
475 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
476 const StackCFIRecord
&R
) {
480 OS
<< llvm::formatv("{0:x-} ", R
.Address
);
482 OS
<< llvm::formatv("{0:x-} ", *R
.Size
);
483 return OS
<< " " << R
.UnwindRules
;
486 std::optional
<StackWinRecord
> StackWinRecord::parse(llvm::StringRef Line
) {
487 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
488 // saved_register_size local_size max_stack_size has_program_string
489 // program_string_OR_allocates_base_pointer
491 if (consume
<Token
>(Line
) != Token::Stack
)
493 if (consume
<Token
>(Line
) != Token::Win
)
498 std::tie(Str
, Line
) = getToken(Line
);
499 // Right now we only support the "FrameData" frame type.
500 if (!to_integer(Str
, Type
) || FrameType(Type
) != FrameType::FrameData
)
504 std::tie(Str
, Line
) = getToken(Line
);
505 if (!to_integer(Str
, RVA
, 16))
508 lldb::addr_t CodeSize
;
509 std::tie(Str
, Line
) = getToken(Line
);
510 if (!to_integer(Str
, CodeSize
, 16))
513 // Skip fields which we aren't using right now.
514 std::tie(Str
, Line
) = getToken(Line
); // prologue_size
515 std::tie(Str
, Line
) = getToken(Line
); // epilogue_size
517 lldb::addr_t ParameterSize
;
518 std::tie(Str
, Line
) = getToken(Line
);
519 if (!to_integer(Str
, ParameterSize
, 16))
522 lldb::addr_t SavedRegisterSize
;
523 std::tie(Str
, Line
) = getToken(Line
);
524 if (!to_integer(Str
, SavedRegisterSize
, 16))
527 lldb::addr_t LocalSize
;
528 std::tie(Str
, Line
) = getToken(Line
);
529 if (!to_integer(Str
, LocalSize
, 16))
532 std::tie(Str
, Line
) = getToken(Line
); // max_stack_size
534 uint8_t HasProgramString
;
535 std::tie(Str
, Line
) = getToken(Line
);
536 if (!to_integer(Str
, HasProgramString
))
538 // FrameData records should always have a program string.
539 if (!HasProgramString
)
542 return StackWinRecord(RVA
, CodeSize
, ParameterSize
, SavedRegisterSize
,
543 LocalSize
, Line
.trim());
546 bool breakpad::operator==(const StackWinRecord
&L
, const StackWinRecord
&R
) {
547 return L
.RVA
== R
.RVA
&& L
.CodeSize
== R
.CodeSize
&&
548 L
.ParameterSize
== R
.ParameterSize
&&
549 L
.SavedRegisterSize
== R
.SavedRegisterSize
&&
550 L
.LocalSize
== R
.LocalSize
&& L
.ProgramString
== R
.ProgramString
;
553 llvm::raw_ostream
&breakpad::operator<<(llvm::raw_ostream
&OS
,
554 const StackWinRecord
&R
) {
555 return OS
<< llvm::formatv(
556 "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R
.RVA
,
557 R
.CodeSize
, R
.ParameterSize
, R
.SavedRegisterSize
, R
.LocalSize
,
561 llvm::StringRef
breakpad::toString(Record::Kind K
) {
573 case Record::InlineOrigin
:
574 return "INLINE_ORIGIN";
579 case Record::StackCFI
:
581 case Record::StackWin
:
584 llvm_unreachable("Unknown record kind!");