Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / lldb / source / Plugins / ObjectFile / Breakpad / BreakpadRecords.cpp
blobb0afe03946220758699262f6e0fb30b5aa14270e
1 //===-- BreakpadRecords.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 #include <optional>
16 using namespace lldb_private;
17 using namespace lldb_private::breakpad;
19 namespace {
20 enum class Token {
21 Unknown,
22 Module,
23 Info,
24 CodeID,
25 File,
26 Func,
27 Inline,
28 InlineOrigin,
29 Public,
30 Stack,
31 CFI,
32 Init,
33 Win,
37 template<typename T>
38 static T stringTo(llvm::StringRef Str);
40 template <> Token stringTo<Token>(llvm::StringRef Str) {
41 return llvm::StringSwitch<Token>(Str)
42 .Case("MODULE", Token::Module)
43 .Case("INFO", Token::Info)
44 .Case("CODE_ID", Token::CodeID)
45 .Case("FILE", Token::File)
46 .Case("FUNC", Token::Func)
47 .Case("INLINE", Token::Inline)
48 .Case("INLINE_ORIGIN", Token::InlineOrigin)
49 .Case("PUBLIC", Token::Public)
50 .Case("STACK", Token::Stack)
51 .Case("CFI", Token::CFI)
52 .Case("INIT", Token::Init)
53 .Case("WIN", Token::Win)
54 .Default(Token::Unknown);
57 template <>
58 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
59 using llvm::Triple;
60 return llvm::StringSwitch<Triple::OSType>(Str)
61 .Case("Linux", Triple::Linux)
62 .Case("mac", Triple::MacOSX)
63 .Case("windows", Triple::Win32)
64 .Default(Triple::UnknownOS);
67 template <>
68 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
69 using llvm::Triple;
70 return llvm::StringSwitch<Triple::ArchType>(Str)
71 .Case("arm", Triple::arm)
72 .Cases("arm64", "arm64e", Triple::aarch64)
73 .Case("mips", Triple::mips)
74 .Case("msp430", Triple::msp430)
75 .Case("ppc", Triple::ppc)
76 .Case("ppc64", Triple::ppc64)
77 .Case("s390", Triple::systemz)
78 .Case("sparc", Triple::sparc)
79 .Case("sparcv9", Triple::sparcv9)
80 .Case("x86", Triple::x86)
81 .Cases("x86_64", "x86_64h", Triple::x86_64)
82 .Default(Triple::UnknownArch);
85 template<typename T>
86 static T consume(llvm::StringRef &Str) {
87 llvm::StringRef Token;
88 std::tie(Token, Str) = getToken(Str);
89 return stringTo<T>(Token);
92 /// Return the number of hex digits needed to encode an (POD) object of a given
93 /// type.
94 template <typename T> static constexpr size_t hex_digits() {
95 return 2 * sizeof(T);
98 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
99 struct data_t {
100 using uuid_t = uint8_t[16];
101 uuid_t uuid;
102 llvm::support::ubig32_t age;
103 } data;
104 static_assert(sizeof(data) == 20);
105 // The textual module id encoding should be between 33 and 40 bytes long,
106 // depending on the size of the age field, which is of variable length.
107 // The first three chunks of the id are encoded in big endian, so we need to
108 // byte-swap those.
109 if (str.size() <= hex_digits<data_t::uuid_t>() ||
110 str.size() > hex_digits<data_t>())
111 return UUID();
112 if (!all_of(str, llvm::isHexDigit))
113 return UUID();
115 llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
116 llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
118 llvm::copy(fromHex(uuid_str), data.uuid);
119 uint32_t age;
120 bool success = to_integer(age_str, age, 16);
121 assert(success);
122 (void)success;
123 data.age = age;
125 // On non-windows, the age field should always be zero, so we don't include to
126 // match the native uuid format of these platforms.
127 return UUID(&data, os == llvm::Triple::Win32 ? sizeof(data)
128 : sizeof(data.uuid));
131 std::optional<Record::Kind> Record::classify(llvm::StringRef Line) {
132 Token Tok = consume<Token>(Line);
133 switch (Tok) {
134 case Token::Module:
135 return Record::Module;
136 case Token::Info:
137 return Record::Info;
138 case Token::File:
139 return Record::File;
140 case Token::Func:
141 return Record::Func;
142 case Token::Public:
143 return Record::Public;
144 case Token::Stack:
145 Tok = consume<Token>(Line);
146 switch (Tok) {
147 case Token::CFI:
148 return Record::StackCFI;
149 case Token::Win:
150 return Record::StackWin;
151 default:
152 return std::nullopt;
154 case Token::Inline:
155 return Record::Inline;
156 case Token::InlineOrigin:
157 return Record::InlineOrigin;
158 case Token::Unknown:
159 // Optimistically assume that any unrecognised token means this is a line
160 // record, those don't have a special keyword and start directly with a
161 // hex number.
162 return Record::Line;
164 case Token::CodeID:
165 case Token::CFI:
166 case Token::Init:
167 case Token::Win:
168 // These should never appear at the start of a valid record.
169 return std::nullopt;
171 llvm_unreachable("Fully covered switch above!");
174 std::optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
175 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
176 if (consume<Token>(Line) != Token::Module)
177 return std::nullopt;
179 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
180 if (OS == llvm::Triple::UnknownOS)
181 return std::nullopt;
183 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
184 if (Arch == llvm::Triple::UnknownArch)
185 return std::nullopt;
187 llvm::StringRef Str;
188 std::tie(Str, Line) = getToken(Line);
189 UUID ID = parseModuleId(OS, Str);
190 if (!ID)
191 return std::nullopt;
193 return ModuleRecord(OS, Arch, std::move(ID));
196 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
197 const ModuleRecord &R) {
198 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
199 << llvm::Triple::getArchTypeName(R.Arch) << " "
200 << R.ID.GetAsString();
203 std::optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
204 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
205 if (consume<Token>(Line) != Token::Info)
206 return std::nullopt;
208 if (consume<Token>(Line) != Token::CodeID)
209 return std::nullopt;
211 llvm::StringRef Str;
212 std::tie(Str, Line) = getToken(Line);
213 // If we don't have any text following the code ID (e.g. on linux), we should
214 // use this as the UUID. Otherwise, we should revert back to the module ID.
215 UUID ID;
216 if (Line.trim().empty()) {
217 if (Str.empty() || !ID.SetFromStringRef(Str))
218 return std::nullopt;
220 return InfoRecord(std::move(ID));
223 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
224 const InfoRecord &R) {
225 return OS << "INFO CODE_ID " << R.ID.GetAsString();
228 template <typename T>
229 static std::optional<T> parseNumberName(llvm::StringRef Line, Token TokenType) {
230 // TOKEN number name
231 if (consume<Token>(Line) != TokenType)
232 return std::nullopt;
234 llvm::StringRef Str;
235 size_t Number;
236 std::tie(Str, Line) = getToken(Line);
237 if (!to_integer(Str, Number))
238 return std::nullopt;
240 llvm::StringRef Name = Line.trim();
241 if (Name.empty())
242 return std::nullopt;
244 return T(Number, Name);
247 std::optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
248 // FILE number name
249 return parseNumberName<FileRecord>(Line, Token::File);
252 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
253 const FileRecord &R) {
254 return OS << "FILE " << R.Number << " " << R.Name;
257 std::optional<InlineOriginRecord>
258 InlineOriginRecord::parse(llvm::StringRef Line) {
259 // INLINE_ORIGIN number name
260 return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin);
263 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
264 const InlineOriginRecord &R) {
265 return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name;
268 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
269 lldb::addr_t &Address, lldb::addr_t *Size,
270 lldb::addr_t &ParamSize, llvm::StringRef &Name) {
271 // PUBLIC [m] address param_size name
272 // or
273 // FUNC [m] address size param_size name
275 Token Tok = Size ? Token::Func : Token::Public;
277 if (consume<Token>(Line) != Tok)
278 return false;
280 llvm::StringRef Str;
281 std::tie(Str, Line) = getToken(Line);
282 Multiple = Str == "m";
284 if (Multiple)
285 std::tie(Str, Line) = getToken(Line);
286 if (!to_integer(Str, Address, 16))
287 return false;
289 if (Tok == Token::Func) {
290 std::tie(Str, Line) = getToken(Line);
291 if (!to_integer(Str, *Size, 16))
292 return false;
295 std::tie(Str, Line) = getToken(Line);
296 if (!to_integer(Str, ParamSize, 16))
297 return false;
299 Name = Line.trim();
300 if (Name.empty())
301 return false;
303 return true;
306 std::optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
307 bool Multiple;
308 lldb::addr_t Address, Size, ParamSize;
309 llvm::StringRef Name;
311 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
312 return FuncRecord(Multiple, Address, Size, ParamSize, Name);
314 return std::nullopt;
317 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
318 return L.Multiple == R.Multiple && L.Address == R.Address &&
319 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
321 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
322 const FuncRecord &R) {
323 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
324 R.Multiple ? "m " : "", R.Address, R.Size,
325 R.ParamSize, R.Name);
328 std::optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) {
329 // INLINE inline_nest_level call_site_line call_site_file_num origin_num
330 // [address size]+
331 if (consume<Token>(Line) != Token::Inline)
332 return std::nullopt;
334 llvm::SmallVector<llvm::StringRef> Tokens;
335 SplitString(Line, Tokens, " ");
336 if (Tokens.size() < 6 || Tokens.size() % 2 == 1)
337 return std::nullopt;
339 size_t InlineNestLevel;
340 uint32_t CallSiteLineNum;
341 size_t CallSiteFileNum;
342 size_t OriginNum;
343 if (!(to_integer(Tokens[0], InlineNestLevel) &&
344 to_integer(Tokens[1], CallSiteLineNum) &&
345 to_integer(Tokens[2], CallSiteFileNum) &&
346 to_integer(Tokens[3], OriginNum)))
347 return std::nullopt;
349 InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum,
350 CallSiteFileNum, OriginNum);
351 for (size_t i = 4; i < Tokens.size(); i += 2) {
352 lldb::addr_t Address;
353 if (!to_integer(Tokens[i], Address, 16))
354 return std::nullopt;
355 lldb::addr_t Size;
356 if (!to_integer(Tokens[i + 1].trim(), Size, 16))
357 return std::nullopt;
358 Record.Ranges.emplace_back(Address, Size);
360 return Record;
363 bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) {
364 return L.InlineNestLevel == R.InlineNestLevel &&
365 L.CallSiteLineNum == R.CallSiteLineNum &&
366 L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum &&
367 L.Ranges == R.Ranges;
370 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
371 const InlineRecord &R) {
372 OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel,
373 R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum);
374 for (const auto &range : R.Ranges) {
375 OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second);
377 return OS;
380 std::optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
381 lldb::addr_t Address;
382 llvm::StringRef Str;
383 std::tie(Str, Line) = getToken(Line);
384 if (!to_integer(Str, Address, 16))
385 return std::nullopt;
387 lldb::addr_t Size;
388 std::tie(Str, Line) = getToken(Line);
389 if (!to_integer(Str, Size, 16))
390 return std::nullopt;
392 uint32_t LineNum;
393 std::tie(Str, Line) = getToken(Line);
394 if (!to_integer(Str, LineNum))
395 return std::nullopt;
397 size_t FileNum;
398 std::tie(Str, Line) = getToken(Line);
399 if (!to_integer(Str, FileNum))
400 return std::nullopt;
402 return LineRecord(Address, Size, LineNum, FileNum);
405 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
406 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
407 L.FileNum == R.FileNum;
409 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
410 const LineRecord &R) {
411 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
412 R.LineNum, R.FileNum);
415 std::optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
416 bool Multiple;
417 lldb::addr_t Address, ParamSize;
418 llvm::StringRef Name;
420 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
421 return PublicRecord(Multiple, Address, ParamSize, Name);
423 return std::nullopt;
426 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
427 return L.Multiple == R.Multiple && L.Address == R.Address &&
428 L.ParamSize == R.ParamSize && L.Name == R.Name;
430 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
431 const PublicRecord &R) {
432 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
433 R.Multiple ? "m " : "", R.Address, R.ParamSize,
434 R.Name);
437 std::optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
438 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
439 // or
440 // STACK CFI address reg1: expr1 reg2: expr2 ...
441 // No token in exprN ends with a colon.
443 if (consume<Token>(Line) != Token::Stack)
444 return std::nullopt;
445 if (consume<Token>(Line) != Token::CFI)
446 return std::nullopt;
448 llvm::StringRef Str;
449 std::tie(Str, Line) = getToken(Line);
451 bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
452 if (IsInitRecord)
453 std::tie(Str, Line) = getToken(Line);
455 lldb::addr_t Address;
456 if (!to_integer(Str, Address, 16))
457 return std::nullopt;
459 std::optional<lldb::addr_t> Size;
460 if (IsInitRecord) {
461 Size.emplace();
462 std::tie(Str, Line) = getToken(Line);
463 if (!to_integer(Str, *Size, 16))
464 return std::nullopt;
467 return StackCFIRecord(Address, Size, Line.trim());
470 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
471 return L.Address == R.Address && L.Size == R.Size &&
472 L.UnwindRules == R.UnwindRules;
475 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
476 const StackCFIRecord &R) {
477 OS << "STACK CFI ";
478 if (R.Size)
479 OS << "INIT ";
480 OS << llvm::formatv("{0:x-} ", R.Address);
481 if (R.Size)
482 OS << llvm::formatv("{0:x-} ", *R.Size);
483 return OS << " " << R.UnwindRules;
486 std::optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
487 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
488 // saved_register_size local_size max_stack_size has_program_string
489 // program_string_OR_allocates_base_pointer
491 if (consume<Token>(Line) != Token::Stack)
492 return std::nullopt;
493 if (consume<Token>(Line) != Token::Win)
494 return std::nullopt;
496 llvm::StringRef Str;
497 uint8_t Type;
498 std::tie(Str, Line) = getToken(Line);
499 // Right now we only support the "FrameData" frame type.
500 if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData)
501 return std::nullopt;
503 lldb::addr_t RVA;
504 std::tie(Str, Line) = getToken(Line);
505 if (!to_integer(Str, RVA, 16))
506 return std::nullopt;
508 lldb::addr_t CodeSize;
509 std::tie(Str, Line) = getToken(Line);
510 if (!to_integer(Str, CodeSize, 16))
511 return std::nullopt;
513 // Skip fields which we aren't using right now.
514 std::tie(Str, Line) = getToken(Line); // prologue_size
515 std::tie(Str, Line) = getToken(Line); // epilogue_size
517 lldb::addr_t ParameterSize;
518 std::tie(Str, Line) = getToken(Line);
519 if (!to_integer(Str, ParameterSize, 16))
520 return std::nullopt;
522 lldb::addr_t SavedRegisterSize;
523 std::tie(Str, Line) = getToken(Line);
524 if (!to_integer(Str, SavedRegisterSize, 16))
525 return std::nullopt;
527 lldb::addr_t LocalSize;
528 std::tie(Str, Line) = getToken(Line);
529 if (!to_integer(Str, LocalSize, 16))
530 return std::nullopt;
532 std::tie(Str, Line) = getToken(Line); // max_stack_size
534 uint8_t HasProgramString;
535 std::tie(Str, Line) = getToken(Line);
536 if (!to_integer(Str, HasProgramString))
537 return std::nullopt;
538 // FrameData records should always have a program string.
539 if (!HasProgramString)
540 return std::nullopt;
542 return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize,
543 LocalSize, Line.trim());
546 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) {
547 return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
548 L.ParameterSize == R.ParameterSize &&
549 L.SavedRegisterSize == R.SavedRegisterSize &&
550 L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString;
553 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
554 const StackWinRecord &R) {
555 return OS << llvm::formatv(
556 "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA,
557 R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize,
558 R.ProgramString);
561 llvm::StringRef breakpad::toString(Record::Kind K) {
562 switch (K) {
563 case Record::Module:
564 return "MODULE";
565 case Record::Info:
566 return "INFO";
567 case Record::File:
568 return "FILE";
569 case Record::Func:
570 return "FUNC";
571 case Record::Inline:
572 return "INLINE";
573 case Record::InlineOrigin:
574 return "INLINE_ORIGIN";
575 case Record::Line:
576 return "LINE";
577 case Record::Public:
578 return "PUBLIC";
579 case Record::StackCFI:
580 return "STACK CFI";
581 case Record::StackWin:
582 return "STACK WIN";
584 llvm_unreachable("Unknown record kind!");