1 //===- GsymCreator.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/DebugInfo/GSYM/OutputAggregator.h"
13 #include "llvm/MC/StringTableBuilder.h"
14 #include "llvm/Support/raw_ostream.h"
24 GsymCreator::GsymCreator(bool Quiet
)
25 : StrTab(StringTableBuilder::ELF
), Quiet(Quiet
) {
26 insertFile(StringRef());
29 uint32_t GsymCreator::insertFile(StringRef Path
, llvm::sys::path::Style Style
) {
30 llvm::StringRef directory
= llvm::sys::path::parent_path(Path
, Style
);
31 llvm::StringRef filename
= llvm::sys::path::filename(Path
, Style
);
32 // We must insert the strings first, then call the FileEntry constructor.
33 // If we inline the insertString() function call into the constructor, the
34 // call order is undefined due to parameter lists not having any ordering
36 const uint32_t Dir
= insertString(directory
);
37 const uint32_t Base
= insertString(filename
);
38 return insertFileEntry(FileEntry(Dir
, Base
));
41 uint32_t GsymCreator::insertFileEntry(FileEntry FE
) {
42 std::lock_guard
<std::mutex
> Guard(Mutex
);
43 const auto NextIndex
= Files
.size();
44 // Find FE in hash map and insert if not present.
45 auto R
= FileEntryToIndex
.insert(std::make_pair(FE
, NextIndex
));
47 Files
.emplace_back(FE
);
48 return R
.first
->second
;
51 uint32_t GsymCreator::copyFile(const GsymCreator
&SrcGC
, uint32_t FileIdx
) {
52 // File index zero is reserved for a FileEntry with no directory and no
53 // filename. Any other file and we need to copy the strings for the directory
57 const FileEntry SrcFE
= SrcGC
.Files
[FileIdx
];
58 // Copy the strings for the file and then add the newly converted file entry.
62 : StrTab
.add(SrcGC
.StringOffsetMap
.find(SrcFE
.Dir
)->second
);
63 uint32_t Base
= StrTab
.add(SrcGC
.StringOffsetMap
.find(SrcFE
.Base
)->second
);
64 FileEntry
DstFE(Dir
, Base
);
65 return insertFileEntry(DstFE
);
68 llvm::Error
GsymCreator::save(StringRef Path
, llvm::endianness ByteOrder
,
69 std::optional
<uint64_t> SegmentSize
) const {
71 return saveSegments(Path
, ByteOrder
, *SegmentSize
);
73 raw_fd_ostream
OutStrm(Path
, EC
);
75 return llvm::errorCodeToError(EC
);
76 FileWriter
O(OutStrm
, ByteOrder
);
80 llvm::Error
GsymCreator::encode(FileWriter
&O
) const {
81 std::lock_guard
<std::mutex
> Guard(Mutex
);
83 return createStringError(std::errc::invalid_argument
,
84 "no functions to encode");
86 return createStringError(std::errc::invalid_argument
,
87 "GsymCreator wasn't finalized prior to encoding");
89 if (Funcs
.size() > UINT32_MAX
)
90 return createStringError(std::errc::invalid_argument
,
91 "too many FunctionInfos");
93 std::optional
<uint64_t> BaseAddress
= getBaseAddress();
94 // Base address should be valid if we have any functions.
96 return createStringError(std::errc::invalid_argument
,
97 "invalid base address");
99 Hdr
.Magic
= GSYM_MAGIC
;
100 Hdr
.Version
= GSYM_VERSION
;
101 Hdr
.AddrOffSize
= getAddressOffsetSize();
102 Hdr
.UUIDSize
= static_cast<uint8_t>(UUID
.size());
103 Hdr
.BaseAddress
= *BaseAddress
;
104 Hdr
.NumAddresses
= static_cast<uint32_t>(Funcs
.size());
105 Hdr
.StrtabOffset
= 0; // We will fix this up later.
106 Hdr
.StrtabSize
= 0; // We will fix this up later.
107 memset(Hdr
.UUID
, 0, sizeof(Hdr
.UUID
));
108 if (UUID
.size() > sizeof(Hdr
.UUID
))
109 return createStringError(std::errc::invalid_argument
,
110 "invalid UUID size %u", (uint32_t)UUID
.size());
111 // Copy the UUID value if we have one.
113 memcpy(Hdr
.UUID
, UUID
.data(), UUID
.size());
114 // Write out the header.
115 llvm::Error Err
= Hdr
.encode(O
);
119 const uint64_t MaxAddressOffset
= getMaxAddressOffset();
120 // Write out the address offsets.
121 O
.alignTo(Hdr
.AddrOffSize
);
122 for (const auto &FuncInfo
: Funcs
) {
123 uint64_t AddrOffset
= FuncInfo
.startAddress() - Hdr
.BaseAddress
;
124 // Make sure we calculated the address offsets byte size correctly by
125 // verifying the current address offset is within ranges. We have seen bugs
126 // introduced when the code changes that can cause problems here so it is
127 // good to catch this during testing.
128 assert(AddrOffset
<= MaxAddressOffset
);
129 (void)MaxAddressOffset
;
130 switch (Hdr
.AddrOffSize
) {
132 O
.writeU8(static_cast<uint8_t>(AddrOffset
));
135 O
.writeU16(static_cast<uint16_t>(AddrOffset
));
138 O
.writeU32(static_cast<uint32_t>(AddrOffset
));
141 O
.writeU64(AddrOffset
);
146 // Write out all zeros for the AddrInfoOffsets.
148 const off_t AddrInfoOffsetsOffset
= O
.tell();
149 for (size_t i
= 0, n
= Funcs
.size(); i
< n
; ++i
)
152 // Write out the file table
154 assert(!Files
.empty());
155 assert(Files
[0].Dir
== 0);
156 assert(Files
[0].Base
== 0);
157 size_t NumFiles
= Files
.size();
158 if (NumFiles
> UINT32_MAX
)
159 return createStringError(std::errc::invalid_argument
, "too many files");
160 O
.writeU32(static_cast<uint32_t>(NumFiles
));
161 for (auto File
: Files
) {
162 O
.writeU32(File
.Dir
);
163 O
.writeU32(File
.Base
);
166 // Write out the string table.
167 const off_t StrtabOffset
= O
.tell();
168 StrTab
.write(O
.get_stream());
169 const off_t StrtabSize
= O
.tell() - StrtabOffset
;
170 std::vector
<uint32_t> AddrInfoOffsets
;
172 // Write out the address infos for each function info.
173 for (const auto &FuncInfo
: Funcs
) {
174 if (Expected
<uint64_t> OffsetOrErr
= FuncInfo
.encode(O
))
175 AddrInfoOffsets
.push_back(OffsetOrErr
.get());
177 return OffsetOrErr
.takeError();
179 // Fixup the string table offset and size in the header
180 O
.fixup32((uint32_t)StrtabOffset
, offsetof(Header
, StrtabOffset
));
181 O
.fixup32((uint32_t)StrtabSize
, offsetof(Header
, StrtabSize
));
183 // Fixup all address info offsets
185 for (auto AddrInfoOffset
: AddrInfoOffsets
) {
186 O
.fixup32(AddrInfoOffset
, AddrInfoOffsetsOffset
+ Offset
);
189 return ErrorSuccess();
192 llvm::Error
GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile
) {
193 // Use the loader to load call site information from the YAML file.
194 CallSiteInfoLoader
Loader(*this, Funcs
);
195 return Loader
.loadYAML(YAMLFile
);
198 void GsymCreator::prepareMergedFunctions(OutputAggregator
&Out
) {
199 // Nothing to do if we have less than 2 functions.
200 if (Funcs
.size() < 2)
203 // Sort the function infos by address range first, preserving input order
204 llvm::stable_sort(Funcs
);
205 std::vector
<FunctionInfo
> TopLevelFuncs
;
207 // Add the first function info to the top level functions
208 TopLevelFuncs
.emplace_back(std::move(Funcs
.front()));
210 // Now if the next function info has the same address range as the top level,
211 // then merge it into the top level function, otherwise add it to the top
213 for (size_t Idx
= 1; Idx
< Funcs
.size(); ++Idx
) {
214 FunctionInfo
&TopFunc
= TopLevelFuncs
.back();
215 FunctionInfo
&MatchFunc
= Funcs
[Idx
];
216 if (TopFunc
.Range
== MatchFunc
.Range
) {
217 // Both have the same range - add the 2nd func as a child of the 1st func
218 if (!TopFunc
.MergedFunctions
)
219 TopFunc
.MergedFunctions
= MergedFunctionsInfo();
220 // Avoid adding duplicate functions to MergedFunctions. Since functions
221 // are already ordered within the Funcs array, we can just check equality
222 // against the last function in the merged array.
223 else if (TopFunc
.MergedFunctions
->MergedFunctions
.back() == MatchFunc
)
225 TopFunc
.MergedFunctions
->MergedFunctions
.emplace_back(
226 std::move(MatchFunc
));
228 // No match, add the function as a top-level function
229 TopLevelFuncs
.emplace_back(std::move(MatchFunc
));
232 uint32_t mergedCount
= Funcs
.size() - TopLevelFuncs
.size();
233 // If any functions were merged, print a message about it.
234 if (mergedCount
!= 0)
235 Out
<< "Have " << mergedCount
236 << " merged functions as children of other functions\n";
238 std::swap(Funcs
, TopLevelFuncs
);
241 llvm::Error
GsymCreator::finalize(OutputAggregator
&Out
) {
242 std::lock_guard
<std::mutex
> Guard(Mutex
);
244 return createStringError(std::errc::invalid_argument
, "already finalized");
247 // Don't let the string table indexes change by finalizing in order.
248 StrTab
.finalizeInOrder();
250 // Remove duplicates function infos that have both entries from debug info
251 // (DWARF or Breakpad) and entries from the SymbolTable.
253 // Also handle overlapping function. Usually there shouldn't be any, but they
254 // can and do happen in some rare cases.
263 // In (a) and (b), Y is ignored and X will be reported for the full range.
264 // In (c), both functions will be included in the result and lookups for an
265 // address in the intersection will return Y because of binary search.
267 // Note that in case of (b), we cannot include Y in the result because then
268 // we wouldn't find any function for range (end of Y, end of X)
269 // with binary search
271 const auto NumBefore
= Funcs
.size();
272 // Only sort and unique if this isn't a segment. If this is a segment we
273 // already finalized the main GsymCreator with all of the function infos
274 // and then the already sorted and uniqued function infos were added to this
278 // Sort function infos so we can emit sorted functions. Use stable sort to
279 // ensure determinism.
280 llvm::stable_sort(Funcs
);
281 std::vector
<FunctionInfo
> FinalizedFuncs
;
282 FinalizedFuncs
.reserve(Funcs
.size());
283 FinalizedFuncs
.emplace_back(std::move(Funcs
.front()));
284 for (size_t Idx
=1; Idx
< NumBefore
; ++Idx
) {
285 FunctionInfo
&Prev
= FinalizedFuncs
.back();
286 FunctionInfo
&Curr
= Funcs
[Idx
];
287 // Empty ranges won't intersect, but we still need to
288 // catch the case where we have multiple symbols at the
289 // same address and coalesce them.
290 const bool ranges_equal
= Prev
.Range
== Curr
.Range
;
291 if (ranges_equal
|| Prev
.Range
.intersects(Curr
.Range
)) {
292 // Overlapping ranges or empty identical ranges.
294 // Same address range. Check if one is from debug
295 // info and the other is from a symbol table. If
296 // so, then keep the one with debug info. Our
297 // sorting guarantees that entries with matching
298 // address ranges that have debug info are last in
300 if (!(Prev
== Curr
)) {
301 if (Prev
.hasRichInfo() && Curr
.hasRichInfo())
303 "Duplicate address ranges with different debug info.",
304 [&](raw_ostream
&OS
) {
305 OS
<< "warning: same address range contains "
307 << "info. Removing:\n"
308 << Prev
<< "\nIn favor of this one:\n"
312 // We want to swap the current entry with the previous since
313 // later entries with the same range always have more debug info
314 // or different debug info.
315 std::swap(Prev
, Curr
);
318 Out
.Report("Overlapping function ranges", [&](raw_ostream
&OS
) {
319 // print warnings about overlaps
320 OS
<< "warning: function ranges overlap:\n"
324 FinalizedFuncs
.emplace_back(std::move(Curr
));
327 if (Prev
.Range
.size() == 0 && Curr
.Range
.contains(Prev
.Range
.start())) {
328 // Symbols on macOS don't have address ranges, so if the range
329 // doesn't match and the size is zero, then we replace the empty
330 // symbol function info with the current one.
331 std::swap(Prev
, Curr
);
333 FinalizedFuncs
.emplace_back(std::move(Curr
));
337 std::swap(Funcs
, FinalizedFuncs
);
339 // If our last function info entry doesn't have a size and if we have valid
340 // text ranges, we should set the size of the last entry since any search for
341 // a high address might match our last entry. By fixing up this size, we can
342 // help ensure we don't cause lookups to always return the last symbol that
343 // has no size when doing lookups.
344 if (!Funcs
.empty() && Funcs
.back().Range
.size() == 0 && ValidTextRanges
) {
346 ValidTextRanges
->getRangeThatContains(Funcs
.back().Range
.start())) {
347 Funcs
.back().Range
= {Funcs
.back().Range
.start(), Range
->end()};
350 Out
<< "Pruned " << NumBefore
- Funcs
.size() << " functions, ended with "
351 << Funcs
.size() << " total\n";
353 return Error::success();
356 uint32_t GsymCreator::copyString(const GsymCreator
&SrcGC
, uint32_t StrOff
) {
357 // String offset at zero is always the empty string, no copying needed.
360 return StrTab
.add(SrcGC
.StringOffsetMap
.find(StrOff
)->second
);
363 uint32_t GsymCreator::insertString(StringRef S
, bool Copy
) {
367 // The hash can be calculated outside the lock.
368 CachedHashStringRef
CHStr(S
);
369 std::lock_guard
<std::mutex
> Guard(Mutex
);
371 // We need to provide backing storage for the string if requested
372 // since StringTableBuilder stores references to strings. Any string
373 // that comes from a section in an object file doesn't need to be
374 // copied, but any string created by code will need to be copied.
375 // This allows GsymCreator to be really fast when parsing DWARF and
376 // other object files as most strings don't need to be copied.
377 if (!StrTab
.contains(CHStr
))
378 CHStr
= CachedHashStringRef
{StringStorage
.insert(S
).first
->getKey(),
381 const uint32_t StrOff
= StrTab
.add(CHStr
);
382 // Save a mapping of string offsets to the cached string reference in case
383 // we need to segment the GSYM file and copy string from one string table to
385 StringOffsetMap
.try_emplace(StrOff
, CHStr
);
389 StringRef
GsymCreator::getString(uint32_t Offset
) {
390 auto I
= StringOffsetMap
.find(Offset
);
391 assert(I
!= StringOffsetMap
.end() &&
392 "GsymCreator::getString expects a valid offset as parameter.");
393 return I
->second
.val();
396 void GsymCreator::addFunctionInfo(FunctionInfo
&&FI
) {
397 std::lock_guard
<std::mutex
> Guard(Mutex
);
398 Funcs
.emplace_back(std::move(FI
));
401 void GsymCreator::forEachFunctionInfo(
402 std::function
<bool(FunctionInfo
&)> const &Callback
) {
403 std::lock_guard
<std::mutex
> Guard(Mutex
);
404 for (auto &FI
: Funcs
) {
410 void GsymCreator::forEachFunctionInfo(
411 std::function
<bool(const FunctionInfo
&)> const &Callback
) const {
412 std::lock_guard
<std::mutex
> Guard(Mutex
);
413 for (const auto &FI
: Funcs
) {
419 size_t GsymCreator::getNumFunctionInfos() const {
420 std::lock_guard
<std::mutex
> Guard(Mutex
);
424 bool GsymCreator::IsValidTextAddress(uint64_t Addr
) const {
426 return ValidTextRanges
->contains(Addr
);
427 return true; // No valid text ranges has been set, so accept all ranges.
430 std::optional
<uint64_t> GsymCreator::getFirstFunctionAddress() const {
431 // If we have finalized then Funcs are sorted. If we are a segment then
432 // Funcs will be sorted as well since function infos get added from an
433 // already finalized GsymCreator object where its functions were sorted and
435 if ((Finalized
|| IsSegment
) && !Funcs
.empty())
436 return std::optional
<uint64_t>(Funcs
.front().startAddress());
440 std::optional
<uint64_t> GsymCreator::getLastFunctionAddress() const {
441 // If we have finalized then Funcs are sorted. If we are a segment then
442 // Funcs will be sorted as well since function infos get added from an
443 // already finalized GsymCreator object where its functions were sorted and
445 if ((Finalized
|| IsSegment
) && !Funcs
.empty())
446 return std::optional
<uint64_t>(Funcs
.back().startAddress());
450 std::optional
<uint64_t> GsymCreator::getBaseAddress() const {
453 return getFirstFunctionAddress();
456 uint64_t GsymCreator::getMaxAddressOffset() const {
457 switch (getAddressOffsetSize()) {
458 case 1: return UINT8_MAX
;
459 case 2: return UINT16_MAX
;
460 case 4: return UINT32_MAX
;
461 case 8: return UINT64_MAX
;
463 llvm_unreachable("invalid address offset");
466 uint8_t GsymCreator::getAddressOffsetSize() const {
467 const std::optional
<uint64_t> BaseAddress
= getBaseAddress();
468 const std::optional
<uint64_t> LastFuncAddr
= getLastFunctionAddress();
469 if (BaseAddress
&& LastFuncAddr
) {
470 const uint64_t AddrDelta
= *LastFuncAddr
- *BaseAddress
;
471 if (AddrDelta
<= UINT8_MAX
)
473 else if (AddrDelta
<= UINT16_MAX
)
475 else if (AddrDelta
<= UINT32_MAX
)
482 uint64_t GsymCreator::calculateHeaderAndTableSize() const {
483 uint64_t Size
= sizeof(Header
);
484 const size_t NumFuncs
= Funcs
.size();
485 // Add size of address offset table
486 Size
+= NumFuncs
* getAddressOffsetSize();
487 // Add size of address info offsets which are 32 bit integers in version 1.
488 Size
+= NumFuncs
* sizeof(uint32_t);
489 // Add file table size
490 Size
+= Files
.size() * sizeof(FileEntry
);
491 // Add string table size
492 Size
+= StrTab
.getSize();
497 // This function takes a InlineInfo class that was copy constructed from an
498 // InlineInfo from the \a SrcGC and updates all members that point to strings
499 // and files to point to strings and files from this GsymCreator.
500 void GsymCreator::fixupInlineInfo(const GsymCreator
&SrcGC
, InlineInfo
&II
) {
501 II
.Name
= copyString(SrcGC
, II
.Name
);
502 II
.CallFile
= copyFile(SrcGC
, II
.CallFile
);
503 for (auto &ChildII
: II
.Children
)
504 fixupInlineInfo(SrcGC
, ChildII
);
507 uint64_t GsymCreator::copyFunctionInfo(const GsymCreator
&SrcGC
, size_t FuncIdx
) {
508 // To copy a function info we need to copy any files and strings over into
509 // this GsymCreator and then copy the function info and update the string
510 // table offsets to match the new offsets.
511 const FunctionInfo
&SrcFI
= SrcGC
.Funcs
[FuncIdx
];
514 DstFI
.Range
= SrcFI
.Range
;
515 DstFI
.Name
= copyString(SrcGC
, SrcFI
.Name
);
516 // Copy the line table if there is one.
517 if (SrcFI
.OptLineTable
) {
518 // Copy the entire line table.
519 DstFI
.OptLineTable
= LineTable(SrcFI
.OptLineTable
.value());
520 // Fixup all LineEntry::File entries which are indexes in the the file table
521 // from SrcGC and must be converted to file indexes from this GsymCreator.
522 LineTable
&DstLT
= DstFI
.OptLineTable
.value();
523 const size_t NumLines
= DstLT
.size();
524 for (size_t I
=0; I
<NumLines
; ++I
) {
525 LineEntry
&LE
= DstLT
.get(I
);
526 LE
.File
= copyFile(SrcGC
, LE
.File
);
529 // Copy the inline information if needed.
531 // Make a copy of the source inline information.
532 DstFI
.Inline
= SrcFI
.Inline
.value();
533 // Fixup all strings and files in the copied inline information.
534 fixupInlineInfo(SrcGC
, *DstFI
.Inline
);
536 std::lock_guard
<std::mutex
> Guard(Mutex
);
537 Funcs
.emplace_back(DstFI
);
538 return Funcs
.back().cacheEncoding();
541 llvm::Error
GsymCreator::saveSegments(StringRef Path
,
542 llvm::endianness ByteOrder
,
543 uint64_t SegmentSize
) const {
544 if (SegmentSize
== 0)
545 return createStringError(std::errc::invalid_argument
,
546 "invalid segment size zero");
549 const size_t NumFuncs
= Funcs
.size();
550 while (FuncIdx
< NumFuncs
) {
551 llvm::Expected
<std::unique_ptr
<GsymCreator
>> ExpectedGC
=
552 createSegment(SegmentSize
, FuncIdx
);
554 GsymCreator
*GC
= ExpectedGC
->get();
556 break; // We had not more functions to encode.
557 // Don't collect any messages at all
558 OutputAggregator
Out(nullptr);
559 llvm::Error Err
= GC
->finalize(Out
);
562 std::string SegmentedGsymPath
;
563 raw_string_ostream
SGP(SegmentedGsymPath
);
564 std::optional
<uint64_t> FirstFuncAddr
= GC
->getFirstFunctionAddress();
566 SGP
<< Path
<< "-" << llvm::format_hex(*FirstFuncAddr
, 1);
568 Err
= GC
->save(SegmentedGsymPath
, ByteOrder
, std::nullopt
);
573 return ExpectedGC
.takeError();
576 return Error::success();
579 llvm::Expected
<std::unique_ptr
<GsymCreator
>>
580 GsymCreator::createSegment(uint64_t SegmentSize
, size_t &FuncIdx
) const {
581 // No function entries, return empty unique pointer
582 if (FuncIdx
>= Funcs
.size())
583 return std::unique_ptr
<GsymCreator
>();
585 std::unique_ptr
<GsymCreator
> GC(new GsymCreator(/*Quiet=*/true));
587 // Tell the creator that this is a segment.
590 // Set the base address if there is one.
592 GC
->setBaseAddress(*BaseAddress
);
593 // Copy the UUID value from this object into the new creator.
595 const size_t NumFuncs
= Funcs
.size();
596 // Track how big the function infos are for the current segment so we can
597 // emit segments that are close to the requested size. It is quick math to
598 // determine the current header and tables sizes, so we can do that each loop.
599 uint64_t SegmentFuncInfosSize
= 0;
600 for (; FuncIdx
< NumFuncs
; ++FuncIdx
) {
601 const uint64_t HeaderAndTableSize
= GC
->calculateHeaderAndTableSize();
602 if (HeaderAndTableSize
+ SegmentFuncInfosSize
>= SegmentSize
) {
603 if (SegmentFuncInfosSize
== 0)
604 return createStringError(std::errc::invalid_argument
,
605 "a segment size of %" PRIu64
" is to small to "
606 "fit any function infos, specify a larger value",
611 SegmentFuncInfosSize
+= alignTo(GC
->copyFunctionInfo(*this, FuncIdx
), 4);
613 return std::move(GC
);