1 //===- DwarfTransformer.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/DIContext.h"
10 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
11 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
12 #include "llvm/Support/Error.h"
13 #include "llvm/Support/ThreadPool.h"
14 #include "llvm/Support/raw_ostream.h"
16 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
17 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
18 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
19 #include "llvm/DebugInfo/GSYM/GsymReader.h"
20 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
21 #include "llvm/DebugInfo/GSYM/OutputAggregator.h"
28 struct llvm::gsym::CUInfo
{
29 const DWARFDebugLine::LineTable
*LineTable
;
31 std::vector
<uint32_t> FileCache
;
32 uint64_t Language
= 0;
35 CUInfo(DWARFContext
&DICtx
, DWARFCompileUnit
*CU
) {
36 LineTable
= DICtx
.getLineTableForUnit(CU
);
37 CompDir
= CU
->getCompilationDir();
40 FileCache
.assign(LineTable
->Prologue
.FileNames
.size() + 1, UINT32_MAX
);
41 DWARFDie Die
= CU
->getUnitDIE();
42 Language
= dwarf::toUnsigned(Die
.find(dwarf::DW_AT_language
), 0);
43 AddrSize
= CU
->getAddressByteSize();
46 /// Return true if Addr is the highest address for a given compile unit. The
47 /// highest address is encoded as -1, of all ones in the address. These high
48 /// addresses are used by some linkers to indicate that a function has been
49 /// dead stripped or didn't end up in the linked executable.
50 bool isHighestAddress(uint64_t Addr
) const {
52 return Addr
== UINT32_MAX
;
53 else if (AddrSize
== 8)
54 return Addr
== UINT64_MAX
;
58 /// Convert a DWARF compile unit file index into a GSYM global file index.
60 /// Each compile unit in DWARF has its own file table in the line table
61 /// prologue. GSYM has a single large file table that applies to all files
62 /// from all of the info in a GSYM file. This function converts between the
63 /// two and caches and DWARF CU file index that has already been converted so
64 /// the first client that asks for a compile unit file index will end up
65 /// doing the conversion, and subsequent clients will get the cached GSYM
67 std::optional
<uint32_t> DWARFToGSYMFileIndex(GsymCreator
&Gsym
,
68 uint32_t DwarfFileIdx
) {
69 if (!LineTable
|| DwarfFileIdx
>= FileCache
.size())
71 uint32_t &GsymFileIdx
= FileCache
[DwarfFileIdx
];
72 if (GsymFileIdx
!= UINT32_MAX
)
75 if (LineTable
->getFileNameByIndex(
76 DwarfFileIdx
, CompDir
,
77 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath
, File
))
78 GsymFileIdx
= Gsym
.insertFile(File
);
86 static DWARFDie
GetParentDeclContextDIE(DWARFDie
&Die
) {
87 if (DWARFDie SpecDie
=
88 Die
.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification
)) {
89 if (DWARFDie SpecParent
= GetParentDeclContextDIE(SpecDie
))
92 if (DWARFDie AbstDie
=
93 Die
.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin
)) {
94 if (DWARFDie AbstParent
= GetParentDeclContextDIE(AbstDie
))
98 // We never want to follow parent for inlined subroutine - that would
99 // give us information about where the function is inlined, not what
100 // function is inlined
101 if (Die
.getTag() == dwarf::DW_TAG_inlined_subroutine
)
104 DWARFDie ParentDie
= Die
.getParent();
108 switch (ParentDie
.getTag()) {
109 case dwarf::DW_TAG_namespace
:
110 case dwarf::DW_TAG_structure_type
:
111 case dwarf::DW_TAG_union_type
:
112 case dwarf::DW_TAG_class_type
:
113 case dwarf::DW_TAG_subprogram
:
114 return ParentDie
; // Found parent decl context DIE
115 case dwarf::DW_TAG_lexical_block
:
116 return GetParentDeclContextDIE(ParentDie
);
124 /// Get the GsymCreator string table offset for the qualified name for the
125 /// DIE passed in. This function will avoid making copies of any strings in
126 /// the GsymCreator when possible. We don't need to copy a string when the
127 /// string comes from our .debug_str section or is an inlined string in the
128 /// .debug_info. If we create a qualified name string in this function by
129 /// combining multiple strings in the DWARF string table or info, we will make
130 /// a copy of the string when we add it to the string table.
131 static std::optional
<uint32_t>
132 getQualifiedNameIndex(DWARFDie
&Die
, uint64_t Language
, GsymCreator
&Gsym
) {
133 // If the dwarf has mangled name, use mangled name
134 if (auto LinkageName
= Die
.getLinkageName()) {
135 // We have seen cases were linkage name is actually empty.
136 if (strlen(LinkageName
) > 0)
137 return Gsym
.insertString(LinkageName
, /* Copy */ false);
140 StringRef
ShortName(Die
.getName(DINameKind::ShortName
));
141 if (ShortName
.empty())
144 // For C++ and ObjC, prepend names of all parent declaration contexts
145 if (!(Language
== dwarf::DW_LANG_C_plus_plus
||
146 Language
== dwarf::DW_LANG_C_plus_plus_03
||
147 Language
== dwarf::DW_LANG_C_plus_plus_11
||
148 Language
== dwarf::DW_LANG_C_plus_plus_14
||
149 Language
== dwarf::DW_LANG_ObjC_plus_plus
||
150 // This should not be needed for C, but we see C++ code marked as C
151 // in some binaries. This should hurt, so let's do it for C as well
152 Language
== dwarf::DW_LANG_C
))
153 return Gsym
.insertString(ShortName
, /* Copy */ false);
155 // Some GCC optimizations create functions with names ending with .isra.<num>
156 // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
157 // If it looks like it could be the case, don't add any prefix
158 if (ShortName
.starts_with("_Z") &&
159 (ShortName
.contains(".isra.") || ShortName
.contains(".part.")))
160 return Gsym
.insertString(ShortName
, /* Copy */ false);
162 DWARFDie ParentDeclCtxDie
= GetParentDeclContextDIE(Die
);
163 if (ParentDeclCtxDie
) {
164 std::string Name
= ShortName
.str();
165 while (ParentDeclCtxDie
) {
166 StringRef
ParentName(ParentDeclCtxDie
.getName(DINameKind::ShortName
));
167 if (!ParentName
.empty()) {
168 // "lambda" names are wrapped in < >. Replace with { }
169 // to be consistent with demangled names and not to confuse with
171 if (ParentName
.front() == '<' && ParentName
.back() == '>')
172 Name
= "{" + ParentName
.substr(1, ParentName
.size() - 2).str() + "}" +
175 Name
= ParentName
.str() + "::" + Name
;
177 ParentDeclCtxDie
= GetParentDeclContextDIE(ParentDeclCtxDie
);
179 // Copy the name since we created a new name in a std::string.
180 return Gsym
.insertString(Name
, /* Copy */ true);
182 // Don't copy the name since it exists in the DWARF object file.
183 return Gsym
.insertString(ShortName
, /* Copy */ false);
186 static bool hasInlineInfo(DWARFDie Die
, uint32_t Depth
) {
187 bool CheckChildren
= true;
188 switch (Die
.getTag()) {
189 case dwarf::DW_TAG_subprogram
:
190 // Don't look into functions within functions.
191 CheckChildren
= Depth
== 0;
193 case dwarf::DW_TAG_inlined_subroutine
:
200 for (DWARFDie ChildDie
: Die
.children()) {
201 if (hasInlineInfo(ChildDie
, Depth
+ 1))
208 ConvertDWARFRanges(const DWARFAddressRangesVector
&DwarfRanges
) {
209 AddressRanges Ranges
;
210 for (const DWARFAddressRange
&DwarfRange
: DwarfRanges
) {
211 if (DwarfRange
.LowPC
< DwarfRange
.HighPC
)
212 Ranges
.insert({DwarfRange
.LowPC
, DwarfRange
.HighPC
});
217 static void parseInlineInfo(GsymCreator
&Gsym
, OutputAggregator
&Out
,
218 CUInfo
&CUI
, DWARFDie Die
, uint32_t Depth
,
219 FunctionInfo
&FI
, InlineInfo
&Parent
,
220 const AddressRanges
&AllParentRanges
,
222 if (!hasInlineInfo(Die
, Depth
))
225 dwarf::Tag Tag
= Die
.getTag();
226 if (Tag
== dwarf::DW_TAG_inlined_subroutine
) {
227 // create new InlineInfo and append to parent.children
229 AddressRanges AllInlineRanges
;
230 Expected
<DWARFAddressRangesVector
> RangesOrError
= Die
.getAddressRanges();
232 AllInlineRanges
= ConvertDWARFRanges(RangesOrError
.get());
233 uint32_t EmptyCount
= 0;
234 for (const AddressRange
&InlineRange
: AllInlineRanges
) {
235 // Check for empty inline range in case inline function was outlined
237 if (InlineRange
.empty()) {
240 if (Parent
.Ranges
.contains(InlineRange
)) {
241 II
.Ranges
.insert(InlineRange
);
243 // Only warn if the current inline range is not within any of all
244 // of the parent ranges. If we have a DW_TAG_subpgram with multiple
245 // ranges we will emit a FunctionInfo for each range of that
246 // function that only emits information within the current range,
247 // so we only want to emit an error if the DWARF has issues, not
248 // when a range currently just isn't in the range we are currently
250 if (AllParentRanges
.contains(InlineRange
)) {
253 Out
.Report("Function DIE has uncontained address range",
254 [&](raw_ostream
&OS
) {
255 OS
<< "error: inlined function DIE at "
256 << HEX32(Die
.getOffset()) << " has a range ["
257 << HEX64(InlineRange
.start()) << " - "
258 << HEX64(InlineRange
.end())
259 << ") that isn't contained in "
260 << "any parent address ranges, this inline range "
267 // If we have all empty ranges for the inlines, then don't warn if we
268 // have an empty InlineInfo at the top level as all inline functions
270 if (EmptyCount
== AllInlineRanges
.size())
273 if (II
.Ranges
.empty())
276 if (auto NameIndex
= getQualifiedNameIndex(Die
, CUI
.Language
, Gsym
))
277 II
.Name
= *NameIndex
;
278 const uint64_t DwarfFileIdx
= dwarf::toUnsigned(
279 Die
.findRecursively(dwarf::DW_AT_call_file
), UINT32_MAX
);
280 std::optional
<uint32_t> OptGSymFileIdx
=
281 CUI
.DWARFToGSYMFileIndex(Gsym
, DwarfFileIdx
);
282 if (OptGSymFileIdx
) {
283 II
.CallFile
= OptGSymFileIdx
.value();
284 II
.CallLine
= dwarf::toUnsigned(Die
.find(dwarf::DW_AT_call_line
), 0);
285 // parse all children and append to parent
286 for (DWARFDie ChildDie
: Die
.children())
287 parseInlineInfo(Gsym
, Out
, CUI
, ChildDie
, Depth
+ 1, FI
, II
,
288 AllInlineRanges
, WarnIfEmpty
);
289 Parent
.Children
.emplace_back(std::move(II
));
292 "Inlined function die has invlaid file index in DW_AT_call_file",
293 [&](raw_ostream
&OS
) {
294 OS
<< "error: inlined function DIE at " << HEX32(Die
.getOffset())
295 << " has an invalid file index " << DwarfFileIdx
296 << " in its DW_AT_call_file attribute, this inline entry and "
298 << "children will be removed.\n";
302 if (Tag
== dwarf::DW_TAG_subprogram
|| Tag
== dwarf::DW_TAG_lexical_block
) {
303 // skip this Die and just recurse down
304 for (DWARFDie ChildDie
: Die
.children())
305 parseInlineInfo(Gsym
, Out
, CUI
, ChildDie
, Depth
+ 1, FI
, Parent
,
306 AllParentRanges
, WarnIfEmpty
);
310 static void convertFunctionLineTable(OutputAggregator
&Out
, CUInfo
&CUI
,
311 DWARFDie Die
, GsymCreator
&Gsym
,
313 std::vector
<uint32_t> RowVector
;
314 const uint64_t StartAddress
= FI
.startAddress();
315 const uint64_t EndAddress
= FI
.endAddress();
316 const uint64_t RangeSize
= EndAddress
- StartAddress
;
317 const object::SectionedAddress SecAddress
{
318 StartAddress
, object::SectionedAddress::UndefSection
};
321 if (!CUI
.LineTable
->lookupAddressRange(SecAddress
, RangeSize
, RowVector
)) {
322 // If we have a DW_TAG_subprogram but no line entries, fall back to using
323 // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
324 std::string FilePath
= Die
.getDeclFile(
325 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath
);
326 if (FilePath
.empty()) {
327 // If we had a DW_AT_decl_file, but got no file then we need to emit a
329 Out
.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream
&OS
) {
330 const uint64_t DwarfFileIdx
= dwarf::toUnsigned(
331 Die
.findRecursively(dwarf::DW_AT_decl_file
), UINT32_MAX
);
332 OS
<< "error: function DIE at " << HEX32(Die
.getOffset())
333 << " has an invalid file index " << DwarfFileIdx
334 << " in its DW_AT_decl_file attribute, unable to create a single "
335 << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
341 dwarf::toUnsigned(Die
.findRecursively({dwarf::DW_AT_decl_line
}))) {
342 LineEntry
LE(StartAddress
, Gsym
.insertFile(FilePath
), *Line
);
343 FI
.OptLineTable
= LineTable();
344 FI
.OptLineTable
->push(LE
);
349 FI
.OptLineTable
= LineTable();
350 DWARFDebugLine::Row PrevRow
;
351 for (uint32_t RowIndex
: RowVector
) {
352 // Take file number and line/column from the row.
353 const DWARFDebugLine::Row
&Row
= CUI
.LineTable
->Rows
[RowIndex
];
354 std::optional
<uint32_t> OptFileIdx
=
355 CUI
.DWARFToGSYMFileIndex(Gsym
, Row
.File
);
358 "Invalid file index in DWARF line table", [&](raw_ostream
&OS
) {
359 OS
<< "error: function DIE at " << HEX32(Die
.getOffset()) << " has "
360 << "a line entry with invalid DWARF file index, this entry will "
362 Row
.dumpTableHeader(OS
, /*Indent=*/0);
368 const uint32_t FileIdx
= OptFileIdx
.value();
369 uint64_t RowAddress
= Row
.Address
.Address
;
370 // Watch out for a RowAddress that is in the middle of a line table entry
371 // in the DWARF. If we pass an address in between two line table entries
372 // we will get a RowIndex for the previous valid line table row which won't
373 // be contained in our function. This is usually a bug in the DWARF due to
374 // linker problems or LTO or other DWARF re-linking so it is worth emitting
375 // an error, but not worth stopping the creation of the GSYM.
376 if (!FI
.Range
.contains(RowAddress
)) {
377 if (RowAddress
< FI
.Range
.start()) {
378 Out
.Report("Start address lies between valid Row table entries",
379 [&](raw_ostream
&OS
) {
380 OS
<< "error: DIE has a start address whose LowPC is "
383 << RowIndex
<< "] with address " << HEX64(RowAddress
)
384 << " and the next one.\n";
385 Die
.dump(OS
, 0, DIDumpOptions::getForSingleDIE());
387 RowAddress
= FI
.Range
.start();
393 LineEntry
LE(RowAddress
, FileIdx
, Row
.Line
);
394 if (RowIndex
!= RowVector
[0] && Row
.Address
< PrevRow
.Address
) {
395 // We have seen full duplicate line tables for functions in some
396 // DWARF files. Watch for those here by checking the last
397 // row was the function's end address (HighPC) and that the
398 // current line table entry's address is the same as the first
399 // line entry we already have in our "function_info.Lines". If
400 // so break out after printing a warning.
401 auto FirstLE
= FI
.OptLineTable
->first();
402 if (FirstLE
&& *FirstLE
== LE
)
403 // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird
404 Out
.Report("Duplicate line table detected", [&](raw_ostream
&OS
) {
405 OS
<< "warning: duplicate line table detected for DIE:\n";
406 Die
.dump(OS
, 0, DIDumpOptions::getForSingleDIE());
409 Out
.Report("Non-monotonically increasing addresses",
410 [&](raw_ostream
&OS
) {
411 OS
<< "error: line table has addresses that do not "
412 << "monotonically increase:\n";
413 for (uint32_t RowIndex2
: RowVector
)
414 CUI
.LineTable
->Rows
[RowIndex2
].dump(OS
);
415 Die
.dump(OS
, 0, DIDumpOptions::getForSingleDIE());
420 // Skip multiple line entries for the same file and line.
421 auto LastLE
= FI
.OptLineTable
->last();
422 if (LastLE
&& LastLE
->File
== FileIdx
&& LastLE
->Line
== Row
.Line
)
424 // Only push a row if it isn't an end sequence. End sequence markers are
425 // included for the last address in a function or the last contiguous
426 // address in a sequence.
427 if (Row
.EndSequence
) {
428 // End sequence means that the next line entry could have a lower address
429 // that the previous entries. So we clear the previous row so we don't
430 // trigger the line table error about address that do not monotonically
432 PrevRow
= DWARFDebugLine::Row();
434 FI
.OptLineTable
->push(LE
);
438 // If not line table rows were added, clear the line table so we don't encode
439 // on in the GSYM file.
440 if (FI
.OptLineTable
->empty())
441 FI
.OptLineTable
= std::nullopt
;
444 void DwarfTransformer::handleDie(OutputAggregator
&Out
, CUInfo
&CUI
,
446 switch (Die
.getTag()) {
447 case dwarf::DW_TAG_subprogram
: {
448 Expected
<DWARFAddressRangesVector
> RangesOrError
= Die
.getAddressRanges();
449 if (!RangesOrError
) {
450 consumeError(RangesOrError
.takeError());
453 const DWARFAddressRangesVector
&Ranges
= RangesOrError
.get();
456 auto NameIndex
= getQualifiedNameIndex(Die
, CUI
.Language
, Gsym
);
458 Out
.Report("Function has no name", [&](raw_ostream
&OS
) {
459 OS
<< "error: function at " << HEX64(Die
.getOffset())
460 << " has no name\n ";
461 Die
.dump(OS
, 0, DIDumpOptions::getForSingleDIE());
465 // All ranges for the subprogram DIE in case it has multiple. We need to
466 // pass this down into parseInlineInfo so we don't warn about inline
467 // ranges that are not in the current subrange of a function when they
468 // actually are in another subgrange. We do this because when a function
469 // has discontiguos ranges, we create multiple function entries with only
470 // the info for that range contained inside of it.
471 AddressRanges AllSubprogramRanges
= ConvertDWARFRanges(Ranges
);
473 // Create a function_info for each range
474 for (const DWARFAddressRange
&Range
: Ranges
) {
475 // The low PC must be less than the high PC. Many linkers don't remove
476 // DWARF for functions that don't get linked into the final executable.
477 // If both the high and low pc have relocations, linkers will often set
478 // the address values for both to the same value to indicate the function
479 // has been remove. Other linkers have been known to set the one or both
480 // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
481 // byte addresses to indicate the function isn't valid. The check below
482 // tries to watch for these cases and abort if it runs into them.
483 if (Range
.LowPC
>= Range
.HighPC
|| CUI
.isHighestAddress(Range
.LowPC
))
486 // Many linkers can't remove DWARF and might set the LowPC to zero. Since
487 // high PC can be an offset from the low PC in more recent DWARF versions
488 // we need to watch for a zero'ed low pc which we do using ValidTextRanges
490 if (!Gsym
.IsValidTextAddress(Range
.LowPC
)) {
491 // We expect zero and -1 to be invalid addresses in DWARF depending
492 // on the linker of the DWARF. This indicates a function was stripped
493 // and the debug info wasn't able to be stripped from the DWARF. If
494 // the LowPC isn't zero or -1, then we should emit an error.
495 if (Range
.LowPC
!= 0) {
496 if (!Gsym
.isQuiet()) {
497 // Unexpected invalid address, emit a warning
498 Out
.Report("Address range starts outside executable section",
499 [&](raw_ostream
&OS
) {
500 OS
<< "warning: DIE has an address range whose "
502 "is not in any executable sections ("
503 << *Gsym
.GetValidTextRanges()
504 << ") and will not be processed:\n";
505 Die
.dump(OS
, 0, DIDumpOptions::getForSingleDIE());
513 FI
.Range
= {Range
.LowPC
, Range
.HighPC
};
514 FI
.Name
= *NameIndex
;
516 convertFunctionLineTable(Out
, CUI
, Die
, Gsym
, FI
);
518 if (hasInlineInfo(Die
, 0)) {
519 FI
.Inline
= InlineInfo();
520 FI
.Inline
->Name
= *NameIndex
;
521 FI
.Inline
->Ranges
.insert(FI
.Range
);
522 bool WarnIfEmpty
= true;
523 parseInlineInfo(Gsym
, Out
, CUI
, Die
, 0, FI
, *FI
.Inline
,
524 AllSubprogramRanges
, WarnIfEmpty
);
525 // Make sure we at least got some valid inline info other than just
526 // the top level function. If we didn't then remove the inline info
527 // from the function info. We have seen cases where LTO tries to modify
528 // the DWARF for functions and it messes up the address ranges for
529 // the inline functions so it is no longer valid.
531 // By checking if there are any valid children on the top level inline
532 // information object, we will know if we got anything valid from the
534 if (FI
.Inline
->Children
.empty()) {
535 if (WarnIfEmpty
&& !Gsym
.isQuiet())
536 Out
.Report("DIE contains inline functions with no valid ranges",
537 [&](raw_ostream
&OS
) {
538 OS
<< "warning: DIE contains inline function "
539 "information that has no valid ranges, removing "
540 "inline information:\n";
541 Die
.dump(OS
, 0, DIDumpOptions::getForSingleDIE());
543 FI
.Inline
= std::nullopt
;
547 // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
548 if (LoadDwarfCallSites
)
549 parseCallSiteInfoFromDwarf(CUI
, Die
, FI
);
551 Gsym
.addFunctionInfo(std::move(FI
));
557 for (DWARFDie ChildDie
: Die
.children())
558 handleDie(Out
, CUI
, ChildDie
);
561 void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo
&CUI
, DWARFDie Die
,
563 // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
564 // DWARF specification:
565 // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
566 // - DW_AT_call_origin might point to a DIE of the function being called.
567 // For simplicity, we will just extract return_offset and possibly target name
570 CallSiteInfoCollection CSIC
;
572 for (DWARFDie Child
: Die
.children()) {
573 if (Child
.getTag() != dwarf::DW_TAG_call_site
)
577 // DW_AT_call_return_pc: the return PC (address). We'll convert it to
578 // offset relative to FI's start.
580 dwarf::toAddress(Child
.findRecursively(dwarf::DW_AT_call_return_pc
));
581 if (!ReturnPC
|| !FI
.Range
.contains(*ReturnPC
))
584 CSI
.ReturnOffset
= *ReturnPC
- FI
.startAddress();
586 // Attempt to get function name from DW_AT_call_origin. If present, we can
587 // insert it as a match regex.
588 if (DWARFDie OriginDie
=
589 Child
.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin
)) {
591 // Include the full unmangled name if available, otherwise the short name.
592 if (const char *LinkName
= OriginDie
.getLinkageName()) {
593 uint32_t LinkNameOff
= Gsym
.insertString(LinkName
, /*Copy=*/false);
594 CSI
.MatchRegex
.push_back(LinkNameOff
);
595 } else if (const char *ShortName
= OriginDie
.getShortName()) {
596 uint32_t ShortNameOff
= Gsym
.insertString(ShortName
, /*Copy=*/false);
597 CSI
.MatchRegex
.push_back(ShortNameOff
);
601 // For now, we won't attempt to deduce InternalCall/ExternalCall flags
603 CSI
.Flags
= CallSiteInfo::Flags::None
;
605 CSIC
.CallSites
.push_back(CSI
);
608 if (!CSIC
.CallSites
.empty()) {
610 FI
.CallSites
= CallSiteInfoCollection();
611 // Append parsed DWARF callsites:
612 FI
.CallSites
->CallSites
.insert(FI
.CallSites
->CallSites
.end(),
613 CSIC
.CallSites
.begin(),
614 CSIC
.CallSites
.end());
618 Error
DwarfTransformer::convert(uint32_t NumThreads
, OutputAggregator
&Out
) {
619 size_t NumBefore
= Gsym
.getNumFunctionInfos();
620 auto getDie
= [&](DWARFUnit
&DwarfUnit
) -> DWARFDie
{
621 DWARFDie ReturnDie
= DwarfUnit
.getUnitDIE(false);
622 if (DwarfUnit
.getDWOId()) {
623 DWARFUnit
*DWOCU
= DwarfUnit
.getNonSkeletonUnitDIE(false).getDwarfUnit();
624 if (!DWOCU
->isDWOUnit())
626 "warning: Unable to retrieve DWO .debug_info section for some "
627 "object files. (Remove the --quiet flag for full output)",
628 [&](raw_ostream
&OS
) {
629 std::string DWOName
= dwarf::toString(
630 DwarfUnit
.getUnitDIE().find(
631 {dwarf::DW_AT_dwo_name
, dwarf::DW_AT_GNU_dwo_name
}),
633 OS
<< "warning: Unable to retrieve DWO .debug_info section for "
637 ReturnDie
= DWOCU
->getUnitDIE(false);
642 if (NumThreads
== 1) {
643 // Parse all DWARF data from this thread, use the same string/file table
645 for (const auto &CU
: DICtx
.compile_units()) {
646 DWARFDie Die
= getDie(*CU
);
647 CUInfo
CUI(DICtx
, dyn_cast
<DWARFCompileUnit
>(CU
.get()));
648 handleDie(Out
, CUI
, Die
);
651 // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
652 // front before we start accessing any DIEs since there might be
653 // cross compile unit references in the DWARF. If we don't do this we can
656 // We need to call getAbbreviations sequentially first so that getUnitDIE()
657 // only works with its local data.
658 for (const auto &CU
: DICtx
.compile_units())
659 CU
->getAbbreviations();
661 // Now parse all DIEs in case we have cross compile unit references in a
663 DefaultThreadPool
pool(hardware_concurrency(NumThreads
));
664 for (const auto &CU
: DICtx
.compile_units())
665 pool
.async([&CU
]() { CU
->getUnitDIE(false /*CUDieOnly*/); });
668 // Now convert all DWARF to GSYM in a thread pool.
670 for (const auto &CU
: DICtx
.compile_units()) {
671 DWARFDie Die
= getDie(*CU
);
673 CUInfo
CUI(DICtx
, dyn_cast
<DWARFCompileUnit
>(CU
.get()));
674 pool
.async([this, CUI
, &LogMutex
, &Out
, Die
]() mutable {
676 raw_string_ostream
StrStream(storage
);
677 OutputAggregator
ThreadOut(Out
.GetOS() ? &StrStream
: nullptr);
678 handleDie(ThreadOut
, CUI
, Die
);
679 // Print ThreadLogStorage lines into an actual stream under a lock
680 std::lock_guard
<std::mutex
> guard(LogMutex
);
684 Out
.Merge(ThreadOut
);
690 size_t FunctionsAddedCount
= Gsym
.getNumFunctionInfos() - NumBefore
;
691 Out
<< "Loaded " << FunctionsAddedCount
<< " functions from DWARF.\n";
692 return Error::success();
695 llvm::Error
DwarfTransformer::verify(StringRef GsymPath
,
696 OutputAggregator
&Out
) {
697 Out
<< "Verifying GSYM file \"" << GsymPath
<< "\":\n";
699 auto Gsym
= GsymReader::openFile(GsymPath
);
701 return Gsym
.takeError();
703 auto NumAddrs
= Gsym
->getNumAddresses();
704 DILineInfoSpecifier
DLIS(
705 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath
,
706 DILineInfoSpecifier::FunctionNameKind::LinkageName
);
707 std::string gsymFilename
;
708 for (uint32_t I
= 0; I
< NumAddrs
; ++I
) {
709 auto FuncAddr
= Gsym
->getAddress(I
);
711 return createStringError(std::errc::invalid_argument
,
712 "failed to extract address[%i]", I
);
714 auto FI
= Gsym
->getFunctionInfo(*FuncAddr
);
716 return createStringError(
717 std::errc::invalid_argument
,
718 "failed to extract function info for address 0x%" PRIu64
, *FuncAddr
);
720 for (auto Addr
= *FuncAddr
; Addr
< *FuncAddr
+ FI
->size(); ++Addr
) {
721 const object::SectionedAddress SectAddr
{
722 Addr
, object::SectionedAddress::UndefSection
};
723 auto LR
= Gsym
->lookup(Addr
);
725 return LR
.takeError();
727 auto DwarfInlineInfos
=
728 DICtx
.getInliningInfoForAddress(SectAddr
, DLIS
);
729 uint32_t NumDwarfInlineInfos
= DwarfInlineInfos
.getNumberOfFrames();
730 if (NumDwarfInlineInfos
== 0) {
731 DwarfInlineInfos
.addFrame(
732 DICtx
.getLineInfoForAddress(SectAddr
, DLIS
));
735 // Check for 1 entry that has no file and line info
736 if (NumDwarfInlineInfos
== 1 &&
737 DwarfInlineInfos
.getFrame(0).FileName
== "<invalid>") {
738 DwarfInlineInfos
= DIInliningInfo();
739 NumDwarfInlineInfos
= 0;
741 if (NumDwarfInlineInfos
> 0 &&
742 NumDwarfInlineInfos
!= LR
->Locations
.size()) {
744 raw_ostream
&Log
= *Out
.GetOS();
745 Log
<< "error: address " << HEX64(Addr
) << " has "
746 << NumDwarfInlineInfos
<< " DWARF inline frames and GSYM has "
747 << LR
->Locations
.size() << "\n";
748 Log
<< " " << NumDwarfInlineInfos
<< " DWARF frames:\n";
749 for (size_t Idx
= 0; Idx
< NumDwarfInlineInfos
; ++Idx
) {
750 const auto &dii
= DwarfInlineInfos
.getFrame(Idx
);
751 Log
<< " [" << Idx
<< "]: " << dii
.FunctionName
<< " @ "
752 << dii
.FileName
<< ':' << dii
.Line
<< '\n';
754 Log
<< " " << LR
->Locations
.size() << " GSYM frames:\n";
755 for (size_t Idx
= 0, count
= LR
->Locations
.size(); Idx
< count
;
757 const auto &gii
= LR
->Locations
[Idx
];
758 Log
<< " [" << Idx
<< "]: " << gii
.Name
<< " @ " << gii
.Dir
759 << '/' << gii
.Base
<< ':' << gii
.Line
<< '\n';
761 Gsym
->dump(Log
, *FI
);
766 for (size_t Idx
= 0, count
= LR
->Locations
.size(); Idx
< count
;
768 const auto &gii
= LR
->Locations
[Idx
];
769 if (Idx
< NumDwarfInlineInfos
) {
770 const auto &dii
= DwarfInlineInfos
.getFrame(Idx
);
771 gsymFilename
= LR
->getSourceFile(Idx
);
772 // Verify function name
773 if (dii
.FunctionName
.find(gii
.Name
.str()) != 0)
774 Out
<< "error: address " << HEX64(Addr
) << " DWARF function \""
775 << dii
.FunctionName
.c_str()
776 << "\" doesn't match GSYM function \"" << gii
.Name
<< "\"\n";
778 // Verify source file path
779 if (dii
.FileName
!= gsymFilename
)
780 Out
<< "error: address " << HEX64(Addr
) << " DWARF path \""
781 << dii
.FileName
.c_str() << "\" doesn't match GSYM path \""
782 << gsymFilename
.c_str() << "\"\n";
783 // Verify source file line
784 if (dii
.Line
!= gii
.Line
)
785 Out
<< "error: address " << HEX64(Addr
) << " DWARF line "
786 << dii
.Line
<< " != GSYM line " << gii
.Line
<< "\n";
791 return Error::success();