[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / llvm / lib / DebugInfo / GSYM / GsymCreator.cpp
blobee7b0efba5ea472e0070684b74d9fad18b8e2aa6
1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
20 using namespace llvm;
21 using namespace gsym;
23 GsymCreator::GsymCreator(bool Quiet)
24 : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
25 insertFile(StringRef());
28 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
29 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
30 llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
31 // We must insert the strings first, then call the FileEntry constructor.
32 // If we inline the insertString() function call into the constructor, the
33 // call order is undefined due to parameter lists not having any ordering
34 // requirements.
35 const uint32_t Dir = insertString(directory);
36 const uint32_t Base = insertString(filename);
37 return insertFileEntry(FileEntry(Dir, Base));
40 uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
41 std::lock_guard<std::mutex> Guard(Mutex);
42 const auto NextIndex = Files.size();
43 // Find FE in hash map and insert if not present.
44 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
45 if (R.second)
46 Files.emplace_back(FE);
47 return R.first->second;
50 uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
51 // File index zero is reserved for a FileEntry with no directory and no
52 // filename. Any other file and we need to copy the strings for the directory
53 // and filename.
54 if (FileIdx == 0)
55 return 0;
56 const FileEntry SrcFE = SrcGC.Files[FileIdx];
57 // Copy the strings for the file and then add the newly converted file entry.
58 uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
59 uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
60 FileEntry DstFE(Dir, Base);
61 return insertFileEntry(DstFE);
64 llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
65 std::optional<uint64_t> SegmentSize) const {
66 if (SegmentSize)
67 return saveSegments(Path, ByteOrder, *SegmentSize);
68 std::error_code EC;
69 raw_fd_ostream OutStrm(Path, EC);
70 if (EC)
71 return llvm::errorCodeToError(EC);
72 FileWriter O(OutStrm, ByteOrder);
73 return encode(O);
76 llvm::Error GsymCreator::encode(FileWriter &O) const {
77 std::lock_guard<std::mutex> Guard(Mutex);
78 if (Funcs.empty())
79 return createStringError(std::errc::invalid_argument,
80 "no functions to encode");
81 if (!Finalized)
82 return createStringError(std::errc::invalid_argument,
83 "GsymCreator wasn't finalized prior to encoding");
85 if (Funcs.size() > UINT32_MAX)
86 return createStringError(std::errc::invalid_argument,
87 "too many FunctionInfos");
89 std::optional<uint64_t> BaseAddress = getBaseAddress();
90 // Base address should be valid if we have any functions.
91 if (!BaseAddress)
92 return createStringError(std::errc::invalid_argument,
93 "invalid base address");
94 Header Hdr;
95 Hdr.Magic = GSYM_MAGIC;
96 Hdr.Version = GSYM_VERSION;
97 Hdr.AddrOffSize = getAddressOffsetSize();
98 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
99 Hdr.BaseAddress = *BaseAddress;
100 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
101 Hdr.StrtabOffset = 0; // We will fix this up later.
102 Hdr.StrtabSize = 0; // We will fix this up later.
103 memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
104 if (UUID.size() > sizeof(Hdr.UUID))
105 return createStringError(std::errc::invalid_argument,
106 "invalid UUID size %u", (uint32_t)UUID.size());
107 // Copy the UUID value if we have one.
108 if (UUID.size() > 0)
109 memcpy(Hdr.UUID, UUID.data(), UUID.size());
110 // Write out the header.
111 llvm::Error Err = Hdr.encode(O);
112 if (Err)
113 return Err;
115 const uint64_t MaxAddressOffset = getMaxAddressOffset();
116 // Write out the address offsets.
117 O.alignTo(Hdr.AddrOffSize);
118 for (const auto &FuncInfo : Funcs) {
119 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
120 // Make sure we calculated the address offsets byte size correctly by
121 // verifying the current address offset is within ranges. We have seen bugs
122 // introduced when the code changes that can cause problems here so it is
123 // good to catch this during testing.
124 assert(AddrOffset <= MaxAddressOffset);
125 (void)MaxAddressOffset;
126 switch (Hdr.AddrOffSize) {
127 case 1:
128 O.writeU8(static_cast<uint8_t>(AddrOffset));
129 break;
130 case 2:
131 O.writeU16(static_cast<uint16_t>(AddrOffset));
132 break;
133 case 4:
134 O.writeU32(static_cast<uint32_t>(AddrOffset));
135 break;
136 case 8:
137 O.writeU64(AddrOffset);
138 break;
142 // Write out all zeros for the AddrInfoOffsets.
143 O.alignTo(4);
144 const off_t AddrInfoOffsetsOffset = O.tell();
145 for (size_t i = 0, n = Funcs.size(); i < n; ++i)
146 O.writeU32(0);
148 // Write out the file table
149 O.alignTo(4);
150 assert(!Files.empty());
151 assert(Files[0].Dir == 0);
152 assert(Files[0].Base == 0);
153 size_t NumFiles = Files.size();
154 if (NumFiles > UINT32_MAX)
155 return createStringError(std::errc::invalid_argument, "too many files");
156 O.writeU32(static_cast<uint32_t>(NumFiles));
157 for (auto File : Files) {
158 O.writeU32(File.Dir);
159 O.writeU32(File.Base);
162 // Write out the string table.
163 const off_t StrtabOffset = O.tell();
164 StrTab.write(O.get_stream());
165 const off_t StrtabSize = O.tell() - StrtabOffset;
166 std::vector<uint32_t> AddrInfoOffsets;
168 // Write out the address infos for each function info.
169 for (const auto &FuncInfo : Funcs) {
170 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
171 AddrInfoOffsets.push_back(OffsetOrErr.get());
172 else
173 return OffsetOrErr.takeError();
175 // Fixup the string table offset and size in the header
176 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
177 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
179 // Fixup all address info offsets
180 uint64_t Offset = 0;
181 for (auto AddrInfoOffset : AddrInfoOffsets) {
182 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
183 Offset += 4;
185 return ErrorSuccess();
188 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
189 std::lock_guard<std::mutex> Guard(Mutex);
190 if (Finalized)
191 return createStringError(std::errc::invalid_argument, "already finalized");
192 Finalized = true;
194 // Don't let the string table indexes change by finalizing in order.
195 StrTab.finalizeInOrder();
197 // Remove duplicates function infos that have both entries from debug info
198 // (DWARF or Breakpad) and entries from the SymbolTable.
200 // Also handle overlapping function. Usually there shouldn't be any, but they
201 // can and do happen in some rare cases.
203 // (a) (b) (c)
204 // ^ ^ ^ ^
205 // |X |Y |X ^ |X
206 // | | | |Y | ^
207 // | | | v v |Y
208 // v v v v
210 // In (a) and (b), Y is ignored and X will be reported for the full range.
211 // In (c), both functions will be included in the result and lookups for an
212 // address in the intersection will return Y because of binary search.
214 // Note that in case of (b), we cannot include Y in the result because then
215 // we wouldn't find any function for range (end of Y, end of X)
216 // with binary search
218 const auto NumBefore = Funcs.size();
219 // Only sort and unique if this isn't a segment. If this is a segment we
220 // already finalized the main GsymCreator with all of the function infos
221 // and then the already sorted and uniqued function infos were added to this
222 // object.
223 if (!IsSegment) {
224 if (NumBefore > 1) {
225 // Sort function infos so we can emit sorted functions.
226 llvm::sort(Funcs);
227 std::vector<FunctionInfo> FinalizedFuncs;
228 FinalizedFuncs.reserve(Funcs.size());
229 FinalizedFuncs.emplace_back(std::move(Funcs.front()));
230 for (size_t Idx=1; Idx < NumBefore; ++Idx) {
231 FunctionInfo &Prev = FinalizedFuncs.back();
232 FunctionInfo &Curr = Funcs[Idx];
233 // Empty ranges won't intersect, but we still need to
234 // catch the case where we have multiple symbols at the
235 // same address and coalesce them.
236 const bool ranges_equal = Prev.Range == Curr.Range;
237 if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
238 // Overlapping ranges or empty identical ranges.
239 if (ranges_equal) {
240 // Same address range. Check if one is from debug
241 // info and the other is from a symbol table. If
242 // so, then keep the one with debug info. Our
243 // sorting guarantees that entries with matching
244 // address ranges that have debug info are last in
245 // the sort.
246 if (!(Prev == Curr)) {
247 if (Prev.hasRichInfo() && Curr.hasRichInfo()) {
248 if (!Quiet) {
249 OS << "warning: same address range contains "
250 "different debug "
251 << "info. Removing:\n"
252 << Prev << "\nIn favor of this one:\n"
253 << Curr << "\n";
256 // We want to swap the current entry with the previous since
257 // later entries with the same range always have more debug info
258 // or different debug info.
259 std::swap(Prev, Curr);
261 } else {
262 if (!Quiet) { // print warnings about overlaps
263 OS << "warning: function ranges overlap:\n"
264 << Prev << "\n"
265 << Curr << "\n";
267 FinalizedFuncs.emplace_back(std::move(Curr));
269 } else {
270 if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
271 // Symbols on macOS don't have address ranges, so if the range
272 // doesn't match and the size is zero, then we replace the empty
273 // symbol function info with the current one.
274 std::swap(Prev, Curr);
275 } else {
276 FinalizedFuncs.emplace_back(std::move(Curr));
280 std::swap(Funcs, FinalizedFuncs);
282 // If our last function info entry doesn't have a size and if we have valid
283 // text ranges, we should set the size of the last entry since any search for
284 // a high address might match our last entry. By fixing up this size, we can
285 // help ensure we don't cause lookups to always return the last symbol that
286 // has no size when doing lookups.
287 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
288 if (auto Range =
289 ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
290 Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
293 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
294 << Funcs.size() << " total\n";
296 return Error::success();
299 uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
300 // String offset at zero is always the empty string, no copying needed.
301 if (StrOff == 0)
302 return 0;
303 return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
306 uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
307 if (S.empty())
308 return 0;
310 // The hash can be calculated outside the lock.
311 CachedHashStringRef CHStr(S);
312 std::lock_guard<std::mutex> Guard(Mutex);
313 if (Copy) {
314 // We need to provide backing storage for the string if requested
315 // since StringTableBuilder stores references to strings. Any string
316 // that comes from a section in an object file doesn't need to be
317 // copied, but any string created by code will need to be copied.
318 // This allows GsymCreator to be really fast when parsing DWARF and
319 // other object files as most strings don't need to be copied.
320 if (!StrTab.contains(CHStr))
321 CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
322 CHStr.hash()};
324 const uint32_t StrOff = StrTab.add(CHStr);
325 // Save a mapping of string offsets to the cached string reference in case
326 // we need to segment the GSYM file and copy string from one string table to
327 // another.
328 if (StringOffsetMap.count(StrOff) == 0)
329 StringOffsetMap.insert(std::make_pair(StrOff, CHStr));
330 return StrOff;
333 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
334 std::lock_guard<std::mutex> Guard(Mutex);
335 Funcs.emplace_back(std::move(FI));
338 void GsymCreator::forEachFunctionInfo(
339 std::function<bool(FunctionInfo &)> const &Callback) {
340 std::lock_guard<std::mutex> Guard(Mutex);
341 for (auto &FI : Funcs) {
342 if (!Callback(FI))
343 break;
347 void GsymCreator::forEachFunctionInfo(
348 std::function<bool(const FunctionInfo &)> const &Callback) const {
349 std::lock_guard<std::mutex> Guard(Mutex);
350 for (const auto &FI : Funcs) {
351 if (!Callback(FI))
352 break;
356 size_t GsymCreator::getNumFunctionInfos() const {
357 std::lock_guard<std::mutex> Guard(Mutex);
358 return Funcs.size();
361 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
362 if (ValidTextRanges)
363 return ValidTextRanges->contains(Addr);
364 return true; // No valid text ranges has been set, so accept all ranges.
367 std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
368 // If we have finalized then Funcs are sorted. If we are a segment then
369 // Funcs will be sorted as well since function infos get added from an
370 // already finalized GsymCreator object where its functions were sorted and
371 // uniqued.
372 if ((Finalized || IsSegment) && !Funcs.empty())
373 return std::optional<uint64_t>(Funcs.front().startAddress());
374 return std::nullopt;
377 std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
378 // If we have finalized then Funcs are sorted. If we are a segment then
379 // Funcs will be sorted as well since function infos get added from an
380 // already finalized GsymCreator object where its functions were sorted and
381 // uniqued.
382 if ((Finalized || IsSegment) && !Funcs.empty())
383 return std::optional<uint64_t>(Funcs.back().startAddress());
384 return std::nullopt;
387 std::optional<uint64_t> GsymCreator::getBaseAddress() const {
388 if (BaseAddress)
389 return BaseAddress;
390 return getFirstFunctionAddress();
393 uint64_t GsymCreator::getMaxAddressOffset() const {
394 switch (getAddressOffsetSize()) {
395 case 1: return UINT8_MAX;
396 case 2: return UINT16_MAX;
397 case 4: return UINT32_MAX;
398 case 8: return UINT64_MAX;
400 llvm_unreachable("invalid address offset");
403 uint8_t GsymCreator::getAddressOffsetSize() const {
404 const std::optional<uint64_t> BaseAddress = getBaseAddress();
405 const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
406 if (BaseAddress && LastFuncAddr) {
407 const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
408 if (AddrDelta <= UINT8_MAX)
409 return 1;
410 else if (AddrDelta <= UINT16_MAX)
411 return 2;
412 else if (AddrDelta <= UINT32_MAX)
413 return 4;
414 return 8;
416 return 1;
419 uint64_t GsymCreator::calculateHeaderAndTableSize() const {
420 uint64_t Size = sizeof(Header);
421 const size_t NumFuncs = Funcs.size();
422 // Add size of address offset table
423 Size += NumFuncs * getAddressOffsetSize();
424 // Add size of address info offsets which are 32 bit integers in version 1.
425 Size += NumFuncs * sizeof(uint32_t);
426 // Add file table size
427 Size += Files.size() * sizeof(FileEntry);
428 // Add string table size
429 Size += StrTab.getSize();
431 return Size;
434 // This function takes a InlineInfo class that was copy constructed from an
435 // InlineInfo from the \a SrcGC and updates all members that point to strings
436 // and files to point to strings and files from this GsymCreator.
437 void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
438 II.Name = copyString(SrcGC, II.Name);
439 II.CallFile = copyFile(SrcGC, II.CallFile);
440 for (auto &ChildII: II.Children)
441 fixupInlineInfo(SrcGC, ChildII);
444 uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
445 // To copy a function info we need to copy any files and strings over into
446 // this GsymCreator and then copy the function info and update the string
447 // table offsets to match the new offsets.
448 const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
450 FunctionInfo DstFI;
451 DstFI.Range = SrcFI.Range;
452 DstFI.Name = copyString(SrcGC, SrcFI.Name);
453 // Copy the line table if there is one.
454 if (SrcFI.OptLineTable) {
455 // Copy the entire line table.
456 DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
457 // Fixup all LineEntry::File entries which are indexes in the the file table
458 // from SrcGC and must be converted to file indexes from this GsymCreator.
459 LineTable &DstLT = DstFI.OptLineTable.value();
460 const size_t NumLines = DstLT.size();
461 for (size_t I=0; I<NumLines; ++I) {
462 LineEntry &LE = DstLT.get(I);
463 LE.File = copyFile(SrcGC, LE.File);
466 // Copy the inline information if needed.
467 if (SrcFI.Inline) {
468 // Make a copy of the source inline information.
469 DstFI.Inline = SrcFI.Inline.value();
470 // Fixup all strings and files in the copied inline information.
471 fixupInlineInfo(SrcGC, *DstFI.Inline);
473 std::lock_guard<std::mutex> Guard(Mutex);
474 Funcs.emplace_back(DstFI);
475 return Funcs.back().cacheEncoding();
478 llvm::Error GsymCreator::saveSegments(StringRef Path,
479 llvm::endianness ByteOrder,
480 uint64_t SegmentSize) const {
481 if (SegmentSize == 0)
482 return createStringError(std::errc::invalid_argument,
483 "invalid segment size zero");
485 size_t FuncIdx = 0;
486 const size_t NumFuncs = Funcs.size();
487 while (FuncIdx < NumFuncs) {
488 llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
489 createSegment(SegmentSize, FuncIdx);
490 if (ExpectedGC) {
491 GsymCreator *GC = ExpectedGC->get();
492 if (GC == NULL)
493 break; // We had not more functions to encode.
494 raw_null_ostream ErrorStrm;
495 llvm::Error Err = GC->finalize(ErrorStrm);
496 if (Err)
497 return Err;
498 std::string SegmentedGsymPath;
499 raw_string_ostream SGP(SegmentedGsymPath);
500 std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
501 if (FirstFuncAddr) {
502 SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
503 SGP.flush();
504 Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
505 if (Err)
506 return Err;
508 } else {
509 return ExpectedGC.takeError();
512 return Error::success();
515 llvm::Expected<std::unique_ptr<GsymCreator>>
516 GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
517 // No function entries, return empty unique pointer
518 if (FuncIdx >= Funcs.size())
519 return std::unique_ptr<GsymCreator>();
521 std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
523 // Tell the creator that this is a segment.
524 GC->setIsSegment();
526 // Set the base address if there is one.
527 if (BaseAddress)
528 GC->setBaseAddress(*BaseAddress);
529 // Copy the UUID value from this object into the new creator.
530 GC->setUUID(UUID);
531 const size_t NumFuncs = Funcs.size();
532 // Track how big the function infos are for the current segment so we can
533 // emit segments that are close to the requested size. It is quick math to
534 // determine the current header and tables sizes, so we can do that each loop.
535 uint64_t SegmentFuncInfosSize = 0;
536 for (; FuncIdx < NumFuncs; ++FuncIdx) {
537 const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
538 if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
539 if (SegmentFuncInfosSize == 0)
540 return createStringError(std::errc::invalid_argument,
541 "a segment size of %" PRIu64 " is to small to "
542 "fit any function infos, specify a larger value",
543 SegmentSize);
545 break;
547 SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
549 return std::move(GC);