1 //===- GsymCreator.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
23 GsymCreator::GsymCreator(bool Quiet
)
24 : StrTab(StringTableBuilder::ELF
), Quiet(Quiet
) {
25 insertFile(StringRef());
28 uint32_t GsymCreator::insertFile(StringRef Path
, llvm::sys::path::Style Style
) {
29 llvm::StringRef directory
= llvm::sys::path::parent_path(Path
, Style
);
30 llvm::StringRef filename
= llvm::sys::path::filename(Path
, Style
);
31 // We must insert the strings first, then call the FileEntry constructor.
32 // If we inline the insertString() function call into the constructor, the
33 // call order is undefined due to parameter lists not having any ordering
35 const uint32_t Dir
= insertString(directory
);
36 const uint32_t Base
= insertString(filename
);
37 FileEntry
FE(Dir
, Base
);
39 std::lock_guard
<std::mutex
> Guard(Mutex
);
40 const auto NextIndex
= Files
.size();
41 // Find FE in hash map and insert if not present.
42 auto R
= FileEntryToIndex
.insert(std::make_pair(FE
, NextIndex
));
44 Files
.emplace_back(FE
);
45 return R
.first
->second
;
48 llvm::Error
GsymCreator::save(StringRef Path
,
49 llvm::support::endianness ByteOrder
) const {
51 raw_fd_ostream
OutStrm(Path
, EC
);
53 return llvm::errorCodeToError(EC
);
54 FileWriter
O(OutStrm
, ByteOrder
);
58 llvm::Error
GsymCreator::encode(FileWriter
&O
) const {
59 std::lock_guard
<std::mutex
> Guard(Mutex
);
61 return createStringError(std::errc::invalid_argument
,
62 "no functions to encode");
64 return createStringError(std::errc::invalid_argument
,
65 "GsymCreator wasn't finalized prior to encoding");
67 if (Funcs
.size() > UINT32_MAX
)
68 return createStringError(std::errc::invalid_argument
,
69 "too many FunctionInfos");
71 const uint64_t MinAddr
=
72 BaseAddress
? *BaseAddress
: Funcs
.front().startAddress();
73 const uint64_t MaxAddr
= Funcs
.back().startAddress();
74 const uint64_t AddrDelta
= MaxAddr
- MinAddr
;
76 Hdr
.Magic
= GSYM_MAGIC
;
77 Hdr
.Version
= GSYM_VERSION
;
79 Hdr
.UUIDSize
= static_cast<uint8_t>(UUID
.size());
80 Hdr
.BaseAddress
= MinAddr
;
81 Hdr
.NumAddresses
= static_cast<uint32_t>(Funcs
.size());
82 Hdr
.StrtabOffset
= 0; // We will fix this up later.
83 Hdr
.StrtabSize
= 0; // We will fix this up later.
84 memset(Hdr
.UUID
, 0, sizeof(Hdr
.UUID
));
85 if (UUID
.size() > sizeof(Hdr
.UUID
))
86 return createStringError(std::errc::invalid_argument
,
87 "invalid UUID size %u", (uint32_t)UUID
.size());
88 // Set the address offset size correctly in the GSYM header.
89 if (AddrDelta
<= UINT8_MAX
)
91 else if (AddrDelta
<= UINT16_MAX
)
93 else if (AddrDelta
<= UINT32_MAX
)
97 // Copy the UUID value if we have one.
99 memcpy(Hdr
.UUID
, UUID
.data(), UUID
.size());
100 // Write out the header.
101 llvm::Error Err
= Hdr
.encode(O
);
105 // Write out the address offsets.
106 O
.alignTo(Hdr
.AddrOffSize
);
107 for (const auto &FuncInfo
: Funcs
) {
108 uint64_t AddrOffset
= FuncInfo
.startAddress() - Hdr
.BaseAddress
;
109 switch (Hdr
.AddrOffSize
) {
111 O
.writeU8(static_cast<uint8_t>(AddrOffset
));
114 O
.writeU16(static_cast<uint16_t>(AddrOffset
));
117 O
.writeU32(static_cast<uint32_t>(AddrOffset
));
120 O
.writeU64(AddrOffset
);
125 // Write out all zeros for the AddrInfoOffsets.
127 const off_t AddrInfoOffsetsOffset
= O
.tell();
128 for (size_t i
= 0, n
= Funcs
.size(); i
< n
; ++i
)
131 // Write out the file table
133 assert(!Files
.empty());
134 assert(Files
[0].Dir
== 0);
135 assert(Files
[0].Base
== 0);
136 size_t NumFiles
= Files
.size();
137 if (NumFiles
> UINT32_MAX
)
138 return createStringError(std::errc::invalid_argument
, "too many files");
139 O
.writeU32(static_cast<uint32_t>(NumFiles
));
140 for (auto File
: Files
) {
141 O
.writeU32(File
.Dir
);
142 O
.writeU32(File
.Base
);
145 // Write out the sting table.
146 const off_t StrtabOffset
= O
.tell();
147 StrTab
.write(O
.get_stream());
148 const off_t StrtabSize
= O
.tell() - StrtabOffset
;
149 std::vector
<uint32_t> AddrInfoOffsets
;
151 // Write out the address infos for each function info.
152 for (const auto &FuncInfo
: Funcs
) {
153 if (Expected
<uint64_t> OffsetOrErr
= FuncInfo
.encode(O
))
154 AddrInfoOffsets
.push_back(OffsetOrErr
.get());
156 return OffsetOrErr
.takeError();
158 // Fixup the string table offset and size in the header
159 O
.fixup32((uint32_t)StrtabOffset
, offsetof(Header
, StrtabOffset
));
160 O
.fixup32((uint32_t)StrtabSize
, offsetof(Header
, StrtabSize
));
162 // Fixup all address info offsets
164 for (auto AddrInfoOffset
: AddrInfoOffsets
) {
165 O
.fixup32(AddrInfoOffset
, AddrInfoOffsetsOffset
+ Offset
);
168 return ErrorSuccess();
171 // Similar to std::remove_if, but the predicate is binary and it is passed both
172 // the previous and the current element.
173 template <class ForwardIt
, class BinaryPredicate
>
174 static ForwardIt
removeIfBinary(ForwardIt FirstIt
, ForwardIt LastIt
,
175 BinaryPredicate Pred
) {
176 if (FirstIt
!= LastIt
) {
177 auto PrevIt
= FirstIt
++;
178 FirstIt
= std::find_if(FirstIt
, LastIt
, [&](const auto &Curr
) {
179 return Pred(*PrevIt
++, Curr
);
181 if (FirstIt
!= LastIt
)
182 for (ForwardIt CurrIt
= FirstIt
; ++CurrIt
!= LastIt
;)
183 if (!Pred(*PrevIt
, *CurrIt
)) {
185 *FirstIt
++ = std::move(*CurrIt
);
191 llvm::Error
GsymCreator::finalize(llvm::raw_ostream
&OS
) {
192 std::lock_guard
<std::mutex
> Guard(Mutex
);
194 return createStringError(std::errc::invalid_argument
, "already finalized");
197 // Sort function infos so we can emit sorted functions.
200 // Don't let the string table indexes change by finalizing in order.
201 StrTab
.finalizeInOrder();
203 // Remove duplicates function infos that have both entries from debug info
204 // (DWARF or Breakpad) and entries from the SymbolTable.
206 // Also handle overlapping function. Usually there shouldn't be any, but they
207 // can and do happen in some rare cases.
216 // In (a) and (b), Y is ignored and X will be reported for the full range.
217 // In (c), both functions will be included in the result and lookups for an
218 // address in the intersection will return Y because of binary search.
220 // Note that in case of (b), we cannot include Y in the result because then
221 // we wouldn't find any function for range (end of Y, end of X)
222 // with binary search
223 auto NumBefore
= Funcs
.size();
225 removeIfBinary(Funcs
.begin(), Funcs
.end(),
226 [&](const auto &Prev
, const auto &Curr
) {
227 // Empty ranges won't intersect, but we still need to
228 // catch the case where we have multiple symbols at the
229 // same address and coalesce them.
230 const bool ranges_equal
= Prev
.Range
== Curr
.Range
;
231 if (ranges_equal
|| Prev
.Range
.intersects(Curr
.Range
)) {
232 // Overlapping ranges or empty identical ranges.
234 // Same address range. Check if one is from debug
235 // info and the other is from a symbol table. If
236 // so, then keep the one with debug info. Our
237 // sorting guarantees that entries with matching
238 // address ranges that have debug info are last in
241 // FunctionInfo entries match exactly (range,
244 // We used to output a warning here, but this was
245 // so frequent on some binaries, in particular
246 // when those were built with GCC, that it slowed
247 // down processing extremely.
250 if (!Prev
.hasRichInfo() && Curr
.hasRichInfo()) {
251 // Same address range, one with no debug info
252 // (symbol) and the next with debug info. Keep
257 OS
<< "warning: same address range contains "
259 << "info. Removing:\n"
260 << Prev
<< "\nIn favor of this one:\n"
267 if (!Quiet
) { // print warnings about overlaps
268 OS
<< "warning: function ranges overlap:\n"
273 } else if (Prev
.Range
.size() == 0 &&
274 Curr
.Range
.contains(Prev
.Range
.Start
)) {
276 OS
<< "warning: removing symbol:\n"
277 << Prev
<< "\nKeeping:\n"
287 // If our last function info entry doesn't have a size and if we have valid
288 // text ranges, we should set the size of the last entry since any search for
289 // a high address might match our last entry. By fixing up this size, we can
290 // help ensure we don't cause lookups to always return the last symbol that
291 // has no size when doing lookups.
292 if (!Funcs
.empty() && Funcs
.back().Range
.size() == 0 && ValidTextRanges
) {
294 ValidTextRanges
->getRangeThatContains(Funcs
.back().Range
.Start
)) {
295 Funcs
.back().Range
.End
= Range
->End
;
298 OS
<< "Pruned " << NumBefore
- Funcs
.size() << " functions, ended with "
299 << Funcs
.size() << " total\n";
300 return Error::success();
303 uint32_t GsymCreator::insertString(StringRef S
, bool Copy
) {
307 // The hash can be calculated outside the lock.
308 CachedHashStringRef
CHStr(S
);
309 std::lock_guard
<std::mutex
> Guard(Mutex
);
311 // We need to provide backing storage for the string if requested
312 // since StringTableBuilder stores references to strings. Any string
313 // that comes from a section in an object file doesn't need to be
314 // copied, but any string created by code will need to be copied.
315 // This allows GsymCreator to be really fast when parsing DWARF and
316 // other object files as most strings don't need to be copied.
317 if (!StrTab
.contains(CHStr
))
318 CHStr
= CachedHashStringRef
{StringStorage
.insert(S
).first
->getKey(),
321 return StrTab
.add(CHStr
);
324 void GsymCreator::addFunctionInfo(FunctionInfo
&&FI
) {
325 std::lock_guard
<std::mutex
> Guard(Mutex
);
326 Ranges
.insert(FI
.Range
);
327 Funcs
.emplace_back(std::move(FI
));
330 void GsymCreator::forEachFunctionInfo(
331 std::function
<bool(FunctionInfo
&)> const &Callback
) {
332 std::lock_guard
<std::mutex
> Guard(Mutex
);
333 for (auto &FI
: Funcs
) {
339 void GsymCreator::forEachFunctionInfo(
340 std::function
<bool(const FunctionInfo
&)> const &Callback
) const {
341 std::lock_guard
<std::mutex
> Guard(Mutex
);
342 for (const auto &FI
: Funcs
) {
348 size_t GsymCreator::getNumFunctionInfos() const {
349 std::lock_guard
<std::mutex
> Guard(Mutex
);
353 bool GsymCreator::IsValidTextAddress(uint64_t Addr
) const {
355 return ValidTextRanges
->contains(Addr
);
356 return true; // No valid text ranges has been set, so accept all ranges.
359 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr
) const {
360 std::lock_guard
<std::mutex
> Guard(Mutex
);
361 return Ranges
.contains(Addr
);