[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / DebugInfo / GSYM / GsymCreator.cpp
blob1c20a59469dc2bd7c0f95bbbbac03a8757b1d92a
1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
20 using namespace llvm;
21 using namespace gsym;
23 GsymCreator::GsymCreator(bool Quiet)
24 : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
25 insertFile(StringRef());
28 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
29 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
30 llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
31 // We must insert the strings first, then call the FileEntry constructor.
32 // If we inline the insertString() function call into the constructor, the
33 // call order is undefined due to parameter lists not having any ordering
34 // requirements.
35 const uint32_t Dir = insertString(directory);
36 const uint32_t Base = insertString(filename);
37 FileEntry FE(Dir, Base);
39 std::lock_guard<std::mutex> Guard(Mutex);
40 const auto NextIndex = Files.size();
41 // Find FE in hash map and insert if not present.
42 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
43 if (R.second)
44 Files.emplace_back(FE);
45 return R.first->second;
48 llvm::Error GsymCreator::save(StringRef Path,
49 llvm::support::endianness ByteOrder) const {
50 std::error_code EC;
51 raw_fd_ostream OutStrm(Path, EC);
52 if (EC)
53 return llvm::errorCodeToError(EC);
54 FileWriter O(OutStrm, ByteOrder);
55 return encode(O);
58 llvm::Error GsymCreator::encode(FileWriter &O) const {
59 std::lock_guard<std::mutex> Guard(Mutex);
60 if (Funcs.empty())
61 return createStringError(std::errc::invalid_argument,
62 "no functions to encode");
63 if (!Finalized)
64 return createStringError(std::errc::invalid_argument,
65 "GsymCreator wasn't finalized prior to encoding");
67 if (Funcs.size() > UINT32_MAX)
68 return createStringError(std::errc::invalid_argument,
69 "too many FunctionInfos");
71 const uint64_t MinAddr =
72 BaseAddress ? *BaseAddress : Funcs.front().startAddress();
73 const uint64_t MaxAddr = Funcs.back().startAddress();
74 const uint64_t AddrDelta = MaxAddr - MinAddr;
75 Header Hdr;
76 Hdr.Magic = GSYM_MAGIC;
77 Hdr.Version = GSYM_VERSION;
78 Hdr.AddrOffSize = 0;
79 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
80 Hdr.BaseAddress = MinAddr;
81 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
82 Hdr.StrtabOffset = 0; // We will fix this up later.
83 Hdr.StrtabSize = 0; // We will fix this up later.
84 memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
85 if (UUID.size() > sizeof(Hdr.UUID))
86 return createStringError(std::errc::invalid_argument,
87 "invalid UUID size %u", (uint32_t)UUID.size());
88 // Set the address offset size correctly in the GSYM header.
89 if (AddrDelta <= UINT8_MAX)
90 Hdr.AddrOffSize = 1;
91 else if (AddrDelta <= UINT16_MAX)
92 Hdr.AddrOffSize = 2;
93 else if (AddrDelta <= UINT32_MAX)
94 Hdr.AddrOffSize = 4;
95 else
96 Hdr.AddrOffSize = 8;
97 // Copy the UUID value if we have one.
98 if (UUID.size() > 0)
99 memcpy(Hdr.UUID, UUID.data(), UUID.size());
100 // Write out the header.
101 llvm::Error Err = Hdr.encode(O);
102 if (Err)
103 return Err;
105 // Write out the address offsets.
106 O.alignTo(Hdr.AddrOffSize);
107 for (const auto &FuncInfo : Funcs) {
108 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
109 switch (Hdr.AddrOffSize) {
110 case 1:
111 O.writeU8(static_cast<uint8_t>(AddrOffset));
112 break;
113 case 2:
114 O.writeU16(static_cast<uint16_t>(AddrOffset));
115 break;
116 case 4:
117 O.writeU32(static_cast<uint32_t>(AddrOffset));
118 break;
119 case 8:
120 O.writeU64(AddrOffset);
121 break;
125 // Write out all zeros for the AddrInfoOffsets.
126 O.alignTo(4);
127 const off_t AddrInfoOffsetsOffset = O.tell();
128 for (size_t i = 0, n = Funcs.size(); i < n; ++i)
129 O.writeU32(0);
131 // Write out the file table
132 O.alignTo(4);
133 assert(!Files.empty());
134 assert(Files[0].Dir == 0);
135 assert(Files[0].Base == 0);
136 size_t NumFiles = Files.size();
137 if (NumFiles > UINT32_MAX)
138 return createStringError(std::errc::invalid_argument, "too many files");
139 O.writeU32(static_cast<uint32_t>(NumFiles));
140 for (auto File : Files) {
141 O.writeU32(File.Dir);
142 O.writeU32(File.Base);
145 // Write out the sting table.
146 const off_t StrtabOffset = O.tell();
147 StrTab.write(O.get_stream());
148 const off_t StrtabSize = O.tell() - StrtabOffset;
149 std::vector<uint32_t> AddrInfoOffsets;
151 // Write out the address infos for each function info.
152 for (const auto &FuncInfo : Funcs) {
153 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
154 AddrInfoOffsets.push_back(OffsetOrErr.get());
155 else
156 return OffsetOrErr.takeError();
158 // Fixup the string table offset and size in the header
159 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
160 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
162 // Fixup all address info offsets
163 uint64_t Offset = 0;
164 for (auto AddrInfoOffset : AddrInfoOffsets) {
165 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
166 Offset += 4;
168 return ErrorSuccess();
171 // Similar to std::remove_if, but the predicate is binary and it is passed both
172 // the previous and the current element.
173 template <class ForwardIt, class BinaryPredicate>
174 static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
175 BinaryPredicate Pred) {
176 if (FirstIt != LastIt) {
177 auto PrevIt = FirstIt++;
178 FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
179 return Pred(*PrevIt++, Curr);
181 if (FirstIt != LastIt)
182 for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
183 if (!Pred(*PrevIt, *CurrIt)) {
184 PrevIt = FirstIt;
185 *FirstIt++ = std::move(*CurrIt);
188 return FirstIt;
191 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
192 std::lock_guard<std::mutex> Guard(Mutex);
193 if (Finalized)
194 return createStringError(std::errc::invalid_argument, "already finalized");
195 Finalized = true;
197 // Sort function infos so we can emit sorted functions.
198 llvm::sort(Funcs);
200 // Don't let the string table indexes change by finalizing in order.
201 StrTab.finalizeInOrder();
203 // Remove duplicates function infos that have both entries from debug info
204 // (DWARF or Breakpad) and entries from the SymbolTable.
206 // Also handle overlapping function. Usually there shouldn't be any, but they
207 // can and do happen in some rare cases.
209 // (a) (b) (c)
210 // ^ ^ ^ ^
211 // |X |Y |X ^ |X
212 // | | | |Y | ^
213 // | | | v v |Y
214 // v v v v
216 // In (a) and (b), Y is ignored and X will be reported for the full range.
217 // In (c), both functions will be included in the result and lookups for an
218 // address in the intersection will return Y because of binary search.
220 // Note that in case of (b), we cannot include Y in the result because then
221 // we wouldn't find any function for range (end of Y, end of X)
222 // with binary search
223 auto NumBefore = Funcs.size();
224 Funcs.erase(
225 removeIfBinary(Funcs.begin(), Funcs.end(),
226 [&](const auto &Prev, const auto &Curr) {
227 // Empty ranges won't intersect, but we still need to
228 // catch the case where we have multiple symbols at the
229 // same address and coalesce them.
230 const bool ranges_equal = Prev.Range == Curr.Range;
231 if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
232 // Overlapping ranges or empty identical ranges.
233 if (ranges_equal) {
234 // Same address range. Check if one is from debug
235 // info and the other is from a symbol table. If
236 // so, then keep the one with debug info. Our
237 // sorting guarantees that entries with matching
238 // address ranges that have debug info are last in
239 // the sort.
240 if (Prev == Curr) {
241 // FunctionInfo entries match exactly (range,
242 // lines, inlines)
244 // We used to output a warning here, but this was
245 // so frequent on some binaries, in particular
246 // when those were built with GCC, that it slowed
247 // down processing extremely.
248 return true;
249 } else {
250 if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
251 // Same address range, one with no debug info
252 // (symbol) and the next with debug info. Keep
253 // the latter.
254 return true;
255 } else {
256 if (!Quiet) {
257 OS << "warning: same address range contains "
258 "different debug "
259 << "info. Removing:\n"
260 << Prev << "\nIn favor of this one:\n"
261 << Curr << "\n";
263 return true;
266 } else {
267 if (!Quiet) { // print warnings about overlaps
268 OS << "warning: function ranges overlap:\n"
269 << Prev << "\n"
270 << Curr << "\n";
273 } else if (Prev.Range.size() == 0 &&
274 Curr.Range.contains(Prev.Range.Start)) {
275 if (!Quiet) {
276 OS << "warning: removing symbol:\n"
277 << Prev << "\nKeeping:\n"
278 << Curr << "\n";
280 return true;
283 return false;
285 Funcs.end());
287 // If our last function info entry doesn't have a size and if we have valid
288 // text ranges, we should set the size of the last entry since any search for
289 // a high address might match our last entry. By fixing up this size, we can
290 // help ensure we don't cause lookups to always return the last symbol that
291 // has no size when doing lookups.
292 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
293 if (auto Range =
294 ValidTextRanges->getRangeThatContains(Funcs.back().Range.Start)) {
295 Funcs.back().Range.End = Range->End;
298 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
299 << Funcs.size() << " total\n";
300 return Error::success();
303 uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
304 if (S.empty())
305 return 0;
307 // The hash can be calculated outside the lock.
308 CachedHashStringRef CHStr(S);
309 std::lock_guard<std::mutex> Guard(Mutex);
310 if (Copy) {
311 // We need to provide backing storage for the string if requested
312 // since StringTableBuilder stores references to strings. Any string
313 // that comes from a section in an object file doesn't need to be
314 // copied, but any string created by code will need to be copied.
315 // This allows GsymCreator to be really fast when parsing DWARF and
316 // other object files as most strings don't need to be copied.
317 if (!StrTab.contains(CHStr))
318 CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
319 CHStr.hash()};
321 return StrTab.add(CHStr);
324 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
325 std::lock_guard<std::mutex> Guard(Mutex);
326 Ranges.insert(FI.Range);
327 Funcs.emplace_back(std::move(FI));
330 void GsymCreator::forEachFunctionInfo(
331 std::function<bool(FunctionInfo &)> const &Callback) {
332 std::lock_guard<std::mutex> Guard(Mutex);
333 for (auto &FI : Funcs) {
334 if (!Callback(FI))
335 break;
339 void GsymCreator::forEachFunctionInfo(
340 std::function<bool(const FunctionInfo &)> const &Callback) const {
341 std::lock_guard<std::mutex> Guard(Mutex);
342 for (const auto &FI : Funcs) {
343 if (!Callback(FI))
344 break;
348 size_t GsymCreator::getNumFunctionInfos() const {
349 std::lock_guard<std::mutex> Guard(Mutex);
350 return Funcs.size();
353 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
354 if (ValidTextRanges)
355 return ValidTextRanges->contains(Addr);
356 return true; // No valid text ranges has been set, so accept all ranges.
359 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
360 std::lock_guard<std::mutex> Guard(Mutex);
361 return Ranges.contains(Addr);