[RISCV] Refactor predicates for rvv intrinsic patterns.
[llvm-project.git] / llvm / lib / DebugInfo / GSYM / GsymCreator.cpp
blob60b6dbc6a12d2a54510f87c35ae4e53682d8819c
1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
20 using namespace llvm;
21 using namespace gsym;
23 GsymCreator::GsymCreator(bool Quiet)
24 : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
25 insertFile(StringRef());
28 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
29 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
30 llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
31 // We must insert the strings first, then call the FileEntry constructor.
32 // If we inline the insertString() function call into the constructor, the
33 // call order is undefined due to parameter lists not having any ordering
34 // requirements.
35 const uint32_t Dir = insertString(directory);
36 const uint32_t Base = insertString(filename);
37 return insertFileEntry(FileEntry(Dir, Base));
40 uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
41 std::lock_guard<std::mutex> Guard(Mutex);
42 const auto NextIndex = Files.size();
43 // Find FE in hash map and insert if not present.
44 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
45 if (R.second)
46 Files.emplace_back(FE);
47 return R.first->second;
50 uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
51 // File index zero is reserved for a FileEntry with no directory and no
52 // filename. Any other file and we need to copy the strings for the directory
53 // and filename.
54 if (FileIdx == 0)
55 return 0;
56 const FileEntry SrcFE = SrcGC.Files[FileIdx];
57 // Copy the strings for the file and then add the newly converted file entry.
58 uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
59 uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
60 FileEntry DstFE(Dir, Base);
61 return insertFileEntry(DstFE);
65 llvm::Error GsymCreator::save(StringRef Path,
66 llvm::support::endianness ByteOrder,
67 std::optional<uint64_t> SegmentSize) const {
68 if (SegmentSize)
69 return saveSegments(Path, ByteOrder, *SegmentSize);
70 std::error_code EC;
71 raw_fd_ostream OutStrm(Path, EC);
72 if (EC)
73 return llvm::errorCodeToError(EC);
74 FileWriter O(OutStrm, ByteOrder);
75 return encode(O);
78 llvm::Error GsymCreator::encode(FileWriter &O) const {
79 std::lock_guard<std::mutex> Guard(Mutex);
80 if (Funcs.empty())
81 return createStringError(std::errc::invalid_argument,
82 "no functions to encode");
83 if (!Finalized)
84 return createStringError(std::errc::invalid_argument,
85 "GsymCreator wasn't finalized prior to encoding");
87 if (Funcs.size() > UINT32_MAX)
88 return createStringError(std::errc::invalid_argument,
89 "too many FunctionInfos");
91 std::optional<uint64_t> BaseAddress = getBaseAddress();
92 // Base address should be valid if we have any functions.
93 if (!BaseAddress)
94 return createStringError(std::errc::invalid_argument,
95 "invalid base address");
96 Header Hdr;
97 Hdr.Magic = GSYM_MAGIC;
98 Hdr.Version = GSYM_VERSION;
99 Hdr.AddrOffSize = getAddressOffsetSize();
100 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
101 Hdr.BaseAddress = *BaseAddress;
102 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
103 Hdr.StrtabOffset = 0; // We will fix this up later.
104 Hdr.StrtabSize = 0; // We will fix this up later.
105 memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
106 if (UUID.size() > sizeof(Hdr.UUID))
107 return createStringError(std::errc::invalid_argument,
108 "invalid UUID size %u", (uint32_t)UUID.size());
109 // Copy the UUID value if we have one.
110 if (UUID.size() > 0)
111 memcpy(Hdr.UUID, UUID.data(), UUID.size());
112 // Write out the header.
113 llvm::Error Err = Hdr.encode(O);
114 if (Err)
115 return Err;
117 const uint64_t MaxAddressOffset = getMaxAddressOffset();
118 // Write out the address offsets.
119 O.alignTo(Hdr.AddrOffSize);
120 for (const auto &FuncInfo : Funcs) {
121 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
122 // Make sure we calculated the address offsets byte size correctly by
123 // verifying the current address offset is within ranges. We have seen bugs
124 // introduced when the code changes that can cause problems here so it is
125 // good to catch this during testing.
126 assert(AddrOffset <= MaxAddressOffset);
127 (void)MaxAddressOffset;
128 switch (Hdr.AddrOffSize) {
129 case 1:
130 O.writeU8(static_cast<uint8_t>(AddrOffset));
131 break;
132 case 2:
133 O.writeU16(static_cast<uint16_t>(AddrOffset));
134 break;
135 case 4:
136 O.writeU32(static_cast<uint32_t>(AddrOffset));
137 break;
138 case 8:
139 O.writeU64(AddrOffset);
140 break;
144 // Write out all zeros for the AddrInfoOffsets.
145 O.alignTo(4);
146 const off_t AddrInfoOffsetsOffset = O.tell();
147 for (size_t i = 0, n = Funcs.size(); i < n; ++i)
148 O.writeU32(0);
150 // Write out the file table
151 O.alignTo(4);
152 assert(!Files.empty());
153 assert(Files[0].Dir == 0);
154 assert(Files[0].Base == 0);
155 size_t NumFiles = Files.size();
156 if (NumFiles > UINT32_MAX)
157 return createStringError(std::errc::invalid_argument, "too many files");
158 O.writeU32(static_cast<uint32_t>(NumFiles));
159 for (auto File : Files) {
160 O.writeU32(File.Dir);
161 O.writeU32(File.Base);
164 // Write out the string table.
165 const off_t StrtabOffset = O.tell();
166 StrTab.write(O.get_stream());
167 const off_t StrtabSize = O.tell() - StrtabOffset;
168 std::vector<uint32_t> AddrInfoOffsets;
170 // Write out the address infos for each function info.
171 for (const auto &FuncInfo : Funcs) {
172 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
173 AddrInfoOffsets.push_back(OffsetOrErr.get());
174 else
175 return OffsetOrErr.takeError();
177 // Fixup the string table offset and size in the header
178 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
179 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
181 // Fixup all address info offsets
182 uint64_t Offset = 0;
183 for (auto AddrInfoOffset : AddrInfoOffsets) {
184 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
185 Offset += 4;
187 return ErrorSuccess();
190 // Similar to std::remove_if, but the predicate is binary and it is passed both
191 // the previous and the current element.
192 template <class ForwardIt, class BinaryPredicate>
193 static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
194 BinaryPredicate Pred) {
195 if (FirstIt != LastIt) {
196 auto PrevIt = FirstIt++;
197 FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
198 return Pred(*PrevIt++, Curr);
200 if (FirstIt != LastIt)
201 for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
202 if (!Pred(*PrevIt, *CurrIt)) {
203 PrevIt = FirstIt;
204 *FirstIt++ = std::move(*CurrIt);
207 return FirstIt;
210 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
211 std::lock_guard<std::mutex> Guard(Mutex);
212 if (Finalized)
213 return createStringError(std::errc::invalid_argument, "already finalized");
214 Finalized = true;
216 // Sort function infos so we can emit sorted functions.
217 llvm::sort(Funcs);
219 // Don't let the string table indexes change by finalizing in order.
220 StrTab.finalizeInOrder();
222 // Remove duplicates function infos that have both entries from debug info
223 // (DWARF or Breakpad) and entries from the SymbolTable.
225 // Also handle overlapping function. Usually there shouldn't be any, but they
226 // can and do happen in some rare cases.
228 // (a) (b) (c)
229 // ^ ^ ^ ^
230 // |X |Y |X ^ |X
231 // | | | |Y | ^
232 // | | | v v |Y
233 // v v v v
235 // In (a) and (b), Y is ignored and X will be reported for the full range.
236 // In (c), both functions will be included in the result and lookups for an
237 // address in the intersection will return Y because of binary search.
239 // Note that in case of (b), we cannot include Y in the result because then
240 // we wouldn't find any function for range (end of Y, end of X)
241 // with binary search
242 auto NumBefore = Funcs.size();
243 Funcs.erase(
244 removeIfBinary(Funcs.begin(), Funcs.end(),
245 [&](const auto &Prev, const auto &Curr) {
246 // Empty ranges won't intersect, but we still need to
247 // catch the case where we have multiple symbols at the
248 // same address and coalesce them.
249 const bool ranges_equal = Prev.Range == Curr.Range;
250 if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
251 // Overlapping ranges or empty identical ranges.
252 if (ranges_equal) {
253 // Same address range. Check if one is from debug
254 // info and the other is from a symbol table. If
255 // so, then keep the one with debug info. Our
256 // sorting guarantees that entries with matching
257 // address ranges that have debug info are last in
258 // the sort.
259 if (Prev == Curr) {
260 // FunctionInfo entries match exactly (range,
261 // lines, inlines)
263 // We used to output a warning here, but this was
264 // so frequent on some binaries, in particular
265 // when those were built with GCC, that it slowed
266 // down processing extremely.
267 return true;
268 } else {
269 if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
270 // Same address range, one with no debug info
271 // (symbol) and the next with debug info. Keep
272 // the latter.
273 return true;
274 } else {
275 if (!Quiet) {
276 OS << "warning: same address range contains "
277 "different debug "
278 << "info. Removing:\n"
279 << Prev << "\nIn favor of this one:\n"
280 << Curr << "\n";
282 return true;
285 } else {
286 if (!Quiet) { // print warnings about overlaps
287 OS << "warning: function ranges overlap:\n"
288 << Prev << "\n"
289 << Curr << "\n";
292 } else if (Prev.Range.size() == 0 &&
293 Curr.Range.contains(Prev.Range.start())) {
294 if (!Quiet) {
295 OS << "warning: removing symbol:\n"
296 << Prev << "\nKeeping:\n"
297 << Curr << "\n";
299 return true;
302 return false;
304 Funcs.end());
306 // If our last function info entry doesn't have a size and if we have valid
307 // text ranges, we should set the size of the last entry since any search for
308 // a high address might match our last entry. By fixing up this size, we can
309 // help ensure we don't cause lookups to always return the last symbol that
310 // has no size when doing lookups.
311 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
312 if (auto Range =
313 ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
314 Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
317 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
318 << Funcs.size() << " total\n";
319 return Error::success();
322 uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
323 // String offset at zero is always the empty string, no copying needed.
324 if (StrOff == 0)
325 return 0;
326 return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
329 uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
330 if (S.empty())
331 return 0;
333 // The hash can be calculated outside the lock.
334 CachedHashStringRef CHStr(S);
335 std::lock_guard<std::mutex> Guard(Mutex);
336 if (Copy) {
337 // We need to provide backing storage for the string if requested
338 // since StringTableBuilder stores references to strings. Any string
339 // that comes from a section in an object file doesn't need to be
340 // copied, but any string created by code will need to be copied.
341 // This allows GsymCreator to be really fast when parsing DWARF and
342 // other object files as most strings don't need to be copied.
343 if (!StrTab.contains(CHStr))
344 CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
345 CHStr.hash()};
347 const uint32_t StrOff = StrTab.add(CHStr);
348 // Save a mapping of string offsets to the cached string reference in case
349 // we need to segment the GSYM file and copy string from one string table to
350 // another.
351 if (StringOffsetMap.count(StrOff) == 0)
352 StringOffsetMap.insert(std::make_pair(StrOff, CHStr));
353 return StrOff;
356 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
357 std::lock_guard<std::mutex> Guard(Mutex);
358 Ranges.insert(FI.Range);
359 Funcs.emplace_back(std::move(FI));
362 void GsymCreator::forEachFunctionInfo(
363 std::function<bool(FunctionInfo &)> const &Callback) {
364 std::lock_guard<std::mutex> Guard(Mutex);
365 for (auto &FI : Funcs) {
366 if (!Callback(FI))
367 break;
371 void GsymCreator::forEachFunctionInfo(
372 std::function<bool(const FunctionInfo &)> const &Callback) const {
373 std::lock_guard<std::mutex> Guard(Mutex);
374 for (const auto &FI : Funcs) {
375 if (!Callback(FI))
376 break;
380 size_t GsymCreator::getNumFunctionInfos() const {
381 std::lock_guard<std::mutex> Guard(Mutex);
382 return Funcs.size();
385 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
386 if (ValidTextRanges)
387 return ValidTextRanges->contains(Addr);
388 return true; // No valid text ranges has been set, so accept all ranges.
391 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
392 std::lock_guard<std::mutex> Guard(Mutex);
393 return Ranges.contains(Addr);
396 std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
397 if (Finalized && !Funcs.empty())
398 return std::optional<uint64_t>(Funcs.front().startAddress());
399 // This code gets used by the segmentation of GSYM files to help determine the
400 // size of the GSYM header while continually adding new FunctionInfo objects
401 // to this object, so we haven't finalized this object yet.
402 if (Ranges.empty())
403 return std::nullopt;
404 return std::optional<uint64_t>(Ranges.begin()->start());
407 std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
408 if (Finalized && !Funcs.empty())
409 return std::optional<uint64_t>(Funcs.back().startAddress());
410 // This code gets used by the segmentation of GSYM files to help determine the
411 // size of the GSYM header while continually adding new FunctionInfo objects
412 // to this object, so we haven't finalized this object yet.
413 if (Ranges.empty())
414 return std::nullopt;
415 return std::optional<uint64_t>((Ranges.end() - 1)->end());
418 std::optional<uint64_t> GsymCreator::getBaseAddress() const {
419 if (BaseAddress)
420 return BaseAddress;
421 return getFirstFunctionAddress();
424 uint64_t GsymCreator::getMaxAddressOffset() const {
425 switch (getAddressOffsetSize()) {
426 case 1: return UINT8_MAX;
427 case 2: return UINT16_MAX;
428 case 4: return UINT32_MAX;
429 case 8: return UINT64_MAX;
431 llvm_unreachable("invalid address offset");
434 uint8_t GsymCreator::getAddressOffsetSize() const {
435 const std::optional<uint64_t> BaseAddress = getBaseAddress();
436 const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
437 if (BaseAddress && LastFuncAddr) {
438 const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
439 if (AddrDelta <= UINT8_MAX)
440 return 1;
441 else if (AddrDelta <= UINT16_MAX)
442 return 2;
443 else if (AddrDelta <= UINT32_MAX)
444 return 4;
445 return 8;
447 return 1;
450 uint64_t GsymCreator::calculateHeaderAndTableSize() const {
451 uint64_t Size = sizeof(Header);
452 const size_t NumFuncs = Funcs.size();
453 // Add size of address offset table
454 Size += NumFuncs * getAddressOffsetSize();
455 // Add size of address info offsets which are 32 bit integers in version 1.
456 Size += NumFuncs * sizeof(uint32_t);
457 // Add file table size
458 Size += Files.size() * sizeof(FileEntry);
459 // Add string table size
460 Size += StrTab.getSize();
462 return Size;
465 // This function takes a InlineInfo class that was copy constructed from an
466 // InlineInfo from the \a SrcGC and updates all members that point to strings
467 // and files to point to strings and files from this GsymCreator.
468 void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
469 II.Name = copyString(SrcGC, II.Name);
470 II.CallFile = copyFile(SrcGC, II.CallFile);
471 for (auto &ChildII: II.Children)
472 fixupInlineInfo(SrcGC, ChildII);
475 uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
476 // To copy a function info we need to copy any files and strings over into
477 // this GsymCreator and then copy the function info and update the string
478 // table offsets to match the new offsets.
479 const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
480 Ranges.insert(SrcFI.Range);
482 FunctionInfo DstFI;
483 DstFI.Range = SrcFI.Range;
484 DstFI.Name = copyString(SrcGC, SrcFI.Name);
485 // Copy the line table if there is one.
486 if (SrcFI.OptLineTable) {
487 // Copy the entire line table.
488 DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
489 // Fixup all LineEntry::File entries which are indexes in the the file table
490 // from SrcGC and must be converted to file indexes from this GsymCreator.
491 LineTable &DstLT = DstFI.OptLineTable.value();
492 const size_t NumLines = DstLT.size();
493 for (size_t I=0; I<NumLines; ++I) {
494 LineEntry &LE = DstLT.get(I);
495 LE.File = copyFile(SrcGC, LE.File);
498 // Copy the inline information if needed.
499 if (SrcFI.Inline) {
500 // Make a copy of the source inline information.
501 DstFI.Inline = SrcFI.Inline.value();
502 // Fixup all strings and files in the copied inline information.
503 fixupInlineInfo(SrcGC, *DstFI.Inline);
505 std::lock_guard<std::mutex> Guard(Mutex);
506 Funcs.push_back(DstFI);
507 return Funcs.back().cacheEncoding();
510 llvm::Error GsymCreator::saveSegments(StringRef Path,
511 llvm::support::endianness ByteOrder,
512 uint64_t SegmentSize) const {
513 if (SegmentSize == 0)
514 return createStringError(std::errc::invalid_argument,
515 "invalid segment size zero");
517 size_t FuncIdx = 0;
518 const size_t NumFuncs = Funcs.size();
519 while (FuncIdx < NumFuncs) {
520 llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
521 createSegment(SegmentSize, FuncIdx);
522 if (ExpectedGC) {
523 GsymCreator *GC = ExpectedGC->get();
524 if (GC == NULL)
525 break; // We had not more functions to encode.
526 raw_null_ostream ErrorStrm;
527 llvm::Error Err = GC->finalize(ErrorStrm);
528 if (Err)
529 return Err;
530 std::string SegmentedGsymPath;
531 raw_string_ostream SGP(SegmentedGsymPath);
532 std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
533 if (FirstFuncAddr) {
534 SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
535 SGP.flush();
536 Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
537 if (Err)
538 return Err;
540 } else {
541 return ExpectedGC.takeError();
544 return Error::success();
547 llvm::Expected<std::unique_ptr<GsymCreator>>
548 GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
549 // No function entries, return empty unique pointer
550 if (FuncIdx >= Funcs.size())
551 return std::unique_ptr<GsymCreator>();
553 std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
554 // Set the base address if there is one.
555 if (BaseAddress)
556 GC->setBaseAddress(*BaseAddress);
557 // Copy the UUID value from this object into the new creator.
558 GC->setUUID(UUID);
559 const size_t NumFuncs = Funcs.size();
560 // Track how big the function infos are for the current segment so we can
561 // emit segments that are close to the requested size. It is quick math to
562 // determine the current header and tables sizes, so we can do that each loop.
563 uint64_t SegmentFuncInfosSize = 0;
564 for (; FuncIdx < NumFuncs; ++FuncIdx) {
565 const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
566 if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
567 if (SegmentFuncInfosSize == 0)
568 return createStringError(std::errc::invalid_argument,
569 "a segment size of %" PRIu64 " is to small to "
570 "fit any function infos, specify a larger value",
571 SegmentSize);
573 break;
575 SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
577 return std::move(GC);