1 //===- GsymCreator.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
23 GsymCreator::GsymCreator(bool Quiet
)
24 : StrTab(StringTableBuilder::ELF
), Quiet(Quiet
) {
25 insertFile(StringRef());
28 uint32_t GsymCreator::insertFile(StringRef Path
, llvm::sys::path::Style Style
) {
29 llvm::StringRef directory
= llvm::sys::path::parent_path(Path
, Style
);
30 llvm::StringRef filename
= llvm::sys::path::filename(Path
, Style
);
31 // We must insert the strings first, then call the FileEntry constructor.
32 // If we inline the insertString() function call into the constructor, the
33 // call order is undefined due to parameter lists not having any ordering
35 const uint32_t Dir
= insertString(directory
);
36 const uint32_t Base
= insertString(filename
);
37 return insertFileEntry(FileEntry(Dir
, Base
));
40 uint32_t GsymCreator::insertFileEntry(FileEntry FE
) {
41 std::lock_guard
<std::mutex
> Guard(Mutex
);
42 const auto NextIndex
= Files
.size();
43 // Find FE in hash map and insert if not present.
44 auto R
= FileEntryToIndex
.insert(std::make_pair(FE
, NextIndex
));
46 Files
.emplace_back(FE
);
47 return R
.first
->second
;
50 uint32_t GsymCreator::copyFile(const GsymCreator
&SrcGC
, uint32_t FileIdx
) {
51 // File index zero is reserved for a FileEntry with no directory and no
52 // filename. Any other file and we need to copy the strings for the directory
56 const FileEntry SrcFE
= SrcGC
.Files
[FileIdx
];
57 // Copy the strings for the file and then add the newly converted file entry.
58 uint32_t Dir
= StrTab
.add(SrcGC
.StringOffsetMap
.find(SrcFE
.Dir
)->second
);
59 uint32_t Base
= StrTab
.add(SrcGC
.StringOffsetMap
.find(SrcFE
.Base
)->second
);
60 FileEntry
DstFE(Dir
, Base
);
61 return insertFileEntry(DstFE
);
65 llvm::Error
GsymCreator::save(StringRef Path
,
66 llvm::support::endianness ByteOrder
,
67 std::optional
<uint64_t> SegmentSize
) const {
69 return saveSegments(Path
, ByteOrder
, *SegmentSize
);
71 raw_fd_ostream
OutStrm(Path
, EC
);
73 return llvm::errorCodeToError(EC
);
74 FileWriter
O(OutStrm
, ByteOrder
);
78 llvm::Error
GsymCreator::encode(FileWriter
&O
) const {
79 std::lock_guard
<std::mutex
> Guard(Mutex
);
81 return createStringError(std::errc::invalid_argument
,
82 "no functions to encode");
84 return createStringError(std::errc::invalid_argument
,
85 "GsymCreator wasn't finalized prior to encoding");
87 if (Funcs
.size() > UINT32_MAX
)
88 return createStringError(std::errc::invalid_argument
,
89 "too many FunctionInfos");
91 std::optional
<uint64_t> BaseAddress
= getBaseAddress();
92 // Base address should be valid if we have any functions.
94 return createStringError(std::errc::invalid_argument
,
95 "invalid base address");
97 Hdr
.Magic
= GSYM_MAGIC
;
98 Hdr
.Version
= GSYM_VERSION
;
99 Hdr
.AddrOffSize
= getAddressOffsetSize();
100 Hdr
.UUIDSize
= static_cast<uint8_t>(UUID
.size());
101 Hdr
.BaseAddress
= *BaseAddress
;
102 Hdr
.NumAddresses
= static_cast<uint32_t>(Funcs
.size());
103 Hdr
.StrtabOffset
= 0; // We will fix this up later.
104 Hdr
.StrtabSize
= 0; // We will fix this up later.
105 memset(Hdr
.UUID
, 0, sizeof(Hdr
.UUID
));
106 if (UUID
.size() > sizeof(Hdr
.UUID
))
107 return createStringError(std::errc::invalid_argument
,
108 "invalid UUID size %u", (uint32_t)UUID
.size());
109 // Copy the UUID value if we have one.
111 memcpy(Hdr
.UUID
, UUID
.data(), UUID
.size());
112 // Write out the header.
113 llvm::Error Err
= Hdr
.encode(O
);
117 const uint64_t MaxAddressOffset
= getMaxAddressOffset();
118 // Write out the address offsets.
119 O
.alignTo(Hdr
.AddrOffSize
);
120 for (const auto &FuncInfo
: Funcs
) {
121 uint64_t AddrOffset
= FuncInfo
.startAddress() - Hdr
.BaseAddress
;
122 // Make sure we calculated the address offsets byte size correctly by
123 // verifying the current address offset is within ranges. We have seen bugs
124 // introduced when the code changes that can cause problems here so it is
125 // good to catch this during testing.
126 assert(AddrOffset
<= MaxAddressOffset
);
127 (void)MaxAddressOffset
;
128 switch (Hdr
.AddrOffSize
) {
130 O
.writeU8(static_cast<uint8_t>(AddrOffset
));
133 O
.writeU16(static_cast<uint16_t>(AddrOffset
));
136 O
.writeU32(static_cast<uint32_t>(AddrOffset
));
139 O
.writeU64(AddrOffset
);
144 // Write out all zeros for the AddrInfoOffsets.
146 const off_t AddrInfoOffsetsOffset
= O
.tell();
147 for (size_t i
= 0, n
= Funcs
.size(); i
< n
; ++i
)
150 // Write out the file table
152 assert(!Files
.empty());
153 assert(Files
[0].Dir
== 0);
154 assert(Files
[0].Base
== 0);
155 size_t NumFiles
= Files
.size();
156 if (NumFiles
> UINT32_MAX
)
157 return createStringError(std::errc::invalid_argument
, "too many files");
158 O
.writeU32(static_cast<uint32_t>(NumFiles
));
159 for (auto File
: Files
) {
160 O
.writeU32(File
.Dir
);
161 O
.writeU32(File
.Base
);
164 // Write out the string table.
165 const off_t StrtabOffset
= O
.tell();
166 StrTab
.write(O
.get_stream());
167 const off_t StrtabSize
= O
.tell() - StrtabOffset
;
168 std::vector
<uint32_t> AddrInfoOffsets
;
170 // Write out the address infos for each function info.
171 for (const auto &FuncInfo
: Funcs
) {
172 if (Expected
<uint64_t> OffsetOrErr
= FuncInfo
.encode(O
))
173 AddrInfoOffsets
.push_back(OffsetOrErr
.get());
175 return OffsetOrErr
.takeError();
177 // Fixup the string table offset and size in the header
178 O
.fixup32((uint32_t)StrtabOffset
, offsetof(Header
, StrtabOffset
));
179 O
.fixup32((uint32_t)StrtabSize
, offsetof(Header
, StrtabSize
));
181 // Fixup all address info offsets
183 for (auto AddrInfoOffset
: AddrInfoOffsets
) {
184 O
.fixup32(AddrInfoOffset
, AddrInfoOffsetsOffset
+ Offset
);
187 return ErrorSuccess();
190 // Similar to std::remove_if, but the predicate is binary and it is passed both
191 // the previous and the current element.
192 template <class ForwardIt
, class BinaryPredicate
>
193 static ForwardIt
removeIfBinary(ForwardIt FirstIt
, ForwardIt LastIt
,
194 BinaryPredicate Pred
) {
195 if (FirstIt
!= LastIt
) {
196 auto PrevIt
= FirstIt
++;
197 FirstIt
= std::find_if(FirstIt
, LastIt
, [&](const auto &Curr
) {
198 return Pred(*PrevIt
++, Curr
);
200 if (FirstIt
!= LastIt
)
201 for (ForwardIt CurrIt
= FirstIt
; ++CurrIt
!= LastIt
;)
202 if (!Pred(*PrevIt
, *CurrIt
)) {
204 *FirstIt
++ = std::move(*CurrIt
);
210 llvm::Error
GsymCreator::finalize(llvm::raw_ostream
&OS
) {
211 std::lock_guard
<std::mutex
> Guard(Mutex
);
213 return createStringError(std::errc::invalid_argument
, "already finalized");
216 // Sort function infos so we can emit sorted functions.
219 // Don't let the string table indexes change by finalizing in order.
220 StrTab
.finalizeInOrder();
222 // Remove duplicates function infos that have both entries from debug info
223 // (DWARF or Breakpad) and entries from the SymbolTable.
225 // Also handle overlapping function. Usually there shouldn't be any, but they
226 // can and do happen in some rare cases.
235 // In (a) and (b), Y is ignored and X will be reported for the full range.
236 // In (c), both functions will be included in the result and lookups for an
237 // address in the intersection will return Y because of binary search.
239 // Note that in case of (b), we cannot include Y in the result because then
240 // we wouldn't find any function for range (end of Y, end of X)
241 // with binary search
242 auto NumBefore
= Funcs
.size();
244 removeIfBinary(Funcs
.begin(), Funcs
.end(),
245 [&](const auto &Prev
, const auto &Curr
) {
246 // Empty ranges won't intersect, but we still need to
247 // catch the case where we have multiple symbols at the
248 // same address and coalesce them.
249 const bool ranges_equal
= Prev
.Range
== Curr
.Range
;
250 if (ranges_equal
|| Prev
.Range
.intersects(Curr
.Range
)) {
251 // Overlapping ranges or empty identical ranges.
253 // Same address range. Check if one is from debug
254 // info and the other is from a symbol table. If
255 // so, then keep the one with debug info. Our
256 // sorting guarantees that entries with matching
257 // address ranges that have debug info are last in
260 // FunctionInfo entries match exactly (range,
263 // We used to output a warning here, but this was
264 // so frequent on some binaries, in particular
265 // when those were built with GCC, that it slowed
266 // down processing extremely.
269 if (!Prev
.hasRichInfo() && Curr
.hasRichInfo()) {
270 // Same address range, one with no debug info
271 // (symbol) and the next with debug info. Keep
276 OS
<< "warning: same address range contains "
278 << "info. Removing:\n"
279 << Prev
<< "\nIn favor of this one:\n"
286 if (!Quiet
) { // print warnings about overlaps
287 OS
<< "warning: function ranges overlap:\n"
292 } else if (Prev
.Range
.size() == 0 &&
293 Curr
.Range
.contains(Prev
.Range
.start())) {
295 OS
<< "warning: removing symbol:\n"
296 << Prev
<< "\nKeeping:\n"
306 // If our last function info entry doesn't have a size and if we have valid
307 // text ranges, we should set the size of the last entry since any search for
308 // a high address might match our last entry. By fixing up this size, we can
309 // help ensure we don't cause lookups to always return the last symbol that
310 // has no size when doing lookups.
311 if (!Funcs
.empty() && Funcs
.back().Range
.size() == 0 && ValidTextRanges
) {
313 ValidTextRanges
->getRangeThatContains(Funcs
.back().Range
.start())) {
314 Funcs
.back().Range
= {Funcs
.back().Range
.start(), Range
->end()};
317 OS
<< "Pruned " << NumBefore
- Funcs
.size() << " functions, ended with "
318 << Funcs
.size() << " total\n";
319 return Error::success();
322 uint32_t GsymCreator::copyString(const GsymCreator
&SrcGC
, uint32_t StrOff
) {
323 // String offset at zero is always the empty string, no copying needed.
326 return StrTab
.add(SrcGC
.StringOffsetMap
.find(StrOff
)->second
);
329 uint32_t GsymCreator::insertString(StringRef S
, bool Copy
) {
333 // The hash can be calculated outside the lock.
334 CachedHashStringRef
CHStr(S
);
335 std::lock_guard
<std::mutex
> Guard(Mutex
);
337 // We need to provide backing storage for the string if requested
338 // since StringTableBuilder stores references to strings. Any string
339 // that comes from a section in an object file doesn't need to be
340 // copied, but any string created by code will need to be copied.
341 // This allows GsymCreator to be really fast when parsing DWARF and
342 // other object files as most strings don't need to be copied.
343 if (!StrTab
.contains(CHStr
))
344 CHStr
= CachedHashStringRef
{StringStorage
.insert(S
).first
->getKey(),
347 const uint32_t StrOff
= StrTab
.add(CHStr
);
348 // Save a mapping of string offsets to the cached string reference in case
349 // we need to segment the GSYM file and copy string from one string table to
351 if (StringOffsetMap
.count(StrOff
) == 0)
352 StringOffsetMap
.insert(std::make_pair(StrOff
, CHStr
));
356 void GsymCreator::addFunctionInfo(FunctionInfo
&&FI
) {
357 std::lock_guard
<std::mutex
> Guard(Mutex
);
358 Ranges
.insert(FI
.Range
);
359 Funcs
.emplace_back(std::move(FI
));
362 void GsymCreator::forEachFunctionInfo(
363 std::function
<bool(FunctionInfo
&)> const &Callback
) {
364 std::lock_guard
<std::mutex
> Guard(Mutex
);
365 for (auto &FI
: Funcs
) {
371 void GsymCreator::forEachFunctionInfo(
372 std::function
<bool(const FunctionInfo
&)> const &Callback
) const {
373 std::lock_guard
<std::mutex
> Guard(Mutex
);
374 for (const auto &FI
: Funcs
) {
380 size_t GsymCreator::getNumFunctionInfos() const {
381 std::lock_guard
<std::mutex
> Guard(Mutex
);
385 bool GsymCreator::IsValidTextAddress(uint64_t Addr
) const {
387 return ValidTextRanges
->contains(Addr
);
388 return true; // No valid text ranges has been set, so accept all ranges.
391 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr
) const {
392 std::lock_guard
<std::mutex
> Guard(Mutex
);
393 return Ranges
.contains(Addr
);
396 std::optional
<uint64_t> GsymCreator::getFirstFunctionAddress() const {
397 if (Finalized
&& !Funcs
.empty())
398 return std::optional
<uint64_t>(Funcs
.front().startAddress());
399 // This code gets used by the segmentation of GSYM files to help determine the
400 // size of the GSYM header while continually adding new FunctionInfo objects
401 // to this object, so we haven't finalized this object yet.
404 return std::optional
<uint64_t>(Ranges
.begin()->start());
407 std::optional
<uint64_t> GsymCreator::getLastFunctionAddress() const {
408 if (Finalized
&& !Funcs
.empty())
409 return std::optional
<uint64_t>(Funcs
.back().startAddress());
410 // This code gets used by the segmentation of GSYM files to help determine the
411 // size of the GSYM header while continually adding new FunctionInfo objects
412 // to this object, so we haven't finalized this object yet.
415 return std::optional
<uint64_t>((Ranges
.end() - 1)->end());
418 std::optional
<uint64_t> GsymCreator::getBaseAddress() const {
421 return getFirstFunctionAddress();
424 uint64_t GsymCreator::getMaxAddressOffset() const {
425 switch (getAddressOffsetSize()) {
426 case 1: return UINT8_MAX
;
427 case 2: return UINT16_MAX
;
428 case 4: return UINT32_MAX
;
429 case 8: return UINT64_MAX
;
431 llvm_unreachable("invalid address offset");
434 uint8_t GsymCreator::getAddressOffsetSize() const {
435 const std::optional
<uint64_t> BaseAddress
= getBaseAddress();
436 const std::optional
<uint64_t> LastFuncAddr
= getLastFunctionAddress();
437 if (BaseAddress
&& LastFuncAddr
) {
438 const uint64_t AddrDelta
= *LastFuncAddr
- *BaseAddress
;
439 if (AddrDelta
<= UINT8_MAX
)
441 else if (AddrDelta
<= UINT16_MAX
)
443 else if (AddrDelta
<= UINT32_MAX
)
450 uint64_t GsymCreator::calculateHeaderAndTableSize() const {
451 uint64_t Size
= sizeof(Header
);
452 const size_t NumFuncs
= Funcs
.size();
453 // Add size of address offset table
454 Size
+= NumFuncs
* getAddressOffsetSize();
455 // Add size of address info offsets which are 32 bit integers in version 1.
456 Size
+= NumFuncs
* sizeof(uint32_t);
457 // Add file table size
458 Size
+= Files
.size() * sizeof(FileEntry
);
459 // Add string table size
460 Size
+= StrTab
.getSize();
465 // This function takes a InlineInfo class that was copy constructed from an
466 // InlineInfo from the \a SrcGC and updates all members that point to strings
467 // and files to point to strings and files from this GsymCreator.
468 void GsymCreator::fixupInlineInfo(const GsymCreator
&SrcGC
, InlineInfo
&II
) {
469 II
.Name
= copyString(SrcGC
, II
.Name
);
470 II
.CallFile
= copyFile(SrcGC
, II
.CallFile
);
471 for (auto &ChildII
: II
.Children
)
472 fixupInlineInfo(SrcGC
, ChildII
);
475 uint64_t GsymCreator::copyFunctionInfo(const GsymCreator
&SrcGC
, size_t FuncIdx
) {
476 // To copy a function info we need to copy any files and strings over into
477 // this GsymCreator and then copy the function info and update the string
478 // table offsets to match the new offsets.
479 const FunctionInfo
&SrcFI
= SrcGC
.Funcs
[FuncIdx
];
480 Ranges
.insert(SrcFI
.Range
);
483 DstFI
.Range
= SrcFI
.Range
;
484 DstFI
.Name
= copyString(SrcGC
, SrcFI
.Name
);
485 // Copy the line table if there is one.
486 if (SrcFI
.OptLineTable
) {
487 // Copy the entire line table.
488 DstFI
.OptLineTable
= LineTable(SrcFI
.OptLineTable
.value());
489 // Fixup all LineEntry::File entries which are indexes in the the file table
490 // from SrcGC and must be converted to file indexes from this GsymCreator.
491 LineTable
&DstLT
= DstFI
.OptLineTable
.value();
492 const size_t NumLines
= DstLT
.size();
493 for (size_t I
=0; I
<NumLines
; ++I
) {
494 LineEntry
&LE
= DstLT
.get(I
);
495 LE
.File
= copyFile(SrcGC
, LE
.File
);
498 // Copy the inline information if needed.
500 // Make a copy of the source inline information.
501 DstFI
.Inline
= SrcFI
.Inline
.value();
502 // Fixup all strings and files in the copied inline information.
503 fixupInlineInfo(SrcGC
, *DstFI
.Inline
);
505 std::lock_guard
<std::mutex
> Guard(Mutex
);
506 Funcs
.push_back(DstFI
);
507 return Funcs
.back().cacheEncoding();
510 llvm::Error
GsymCreator::saveSegments(StringRef Path
,
511 llvm::support::endianness ByteOrder
,
512 uint64_t SegmentSize
) const {
513 if (SegmentSize
== 0)
514 return createStringError(std::errc::invalid_argument
,
515 "invalid segment size zero");
518 const size_t NumFuncs
= Funcs
.size();
519 while (FuncIdx
< NumFuncs
) {
520 llvm::Expected
<std::unique_ptr
<GsymCreator
>> ExpectedGC
=
521 createSegment(SegmentSize
, FuncIdx
);
523 GsymCreator
*GC
= ExpectedGC
->get();
525 break; // We had not more functions to encode.
526 raw_null_ostream ErrorStrm
;
527 llvm::Error Err
= GC
->finalize(ErrorStrm
);
530 std::string SegmentedGsymPath
;
531 raw_string_ostream
SGP(SegmentedGsymPath
);
532 std::optional
<uint64_t> FirstFuncAddr
= GC
->getFirstFunctionAddress();
534 SGP
<< Path
<< "-" << llvm::format_hex(*FirstFuncAddr
, 1);
536 Err
= GC
->save(SegmentedGsymPath
, ByteOrder
, std::nullopt
);
541 return ExpectedGC
.takeError();
544 return Error::success();
547 llvm::Expected
<std::unique_ptr
<GsymCreator
>>
548 GsymCreator::createSegment(uint64_t SegmentSize
, size_t &FuncIdx
) const {
549 // No function entries, return empty unique pointer
550 if (FuncIdx
>= Funcs
.size())
551 return std::unique_ptr
<GsymCreator
>();
553 std::unique_ptr
<GsymCreator
> GC(new GsymCreator(/*Quiet=*/true));
554 // Set the base address if there is one.
556 GC
->setBaseAddress(*BaseAddress
);
557 // Copy the UUID value from this object into the new creator.
559 const size_t NumFuncs
= Funcs
.size();
560 // Track how big the function infos are for the current segment so we can
561 // emit segments that are close to the requested size. It is quick math to
562 // determine the current header and tables sizes, so we can do that each loop.
563 uint64_t SegmentFuncInfosSize
= 0;
564 for (; FuncIdx
< NumFuncs
; ++FuncIdx
) {
565 const uint64_t HeaderAndTableSize
= GC
->calculateHeaderAndTableSize();
566 if (HeaderAndTableSize
+ SegmentFuncInfosSize
>= SegmentSize
) {
567 if (SegmentFuncInfosSize
== 0)
568 return createStringError(std::errc::invalid_argument
,
569 "a segment size of %" PRIu64
" is to small to "
570 "fit any function infos, specify a larger value",
575 SegmentFuncInfosSize
+= alignTo(GC
->copyFunctionInfo(*this, FuncIdx
), 4);
577 return std::move(GC
);