1 //===- GsymReader.cpp -----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/GSYM/GsymReader.h"
16 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
17 #include "llvm/DebugInfo/GSYM/LineTable.h"
18 #include "llvm/Support/BinaryStreamReader.h"
19 #include "llvm/Support/DataExtractor.h"
20 #include "llvm/Support/MemoryBuffer.h"
25 GsymReader::GsymReader(std::unique_ptr
<MemoryBuffer
> Buffer
)
26 : MemBuffer(std::move(Buffer
)), Endian(llvm::endianness::native
) {}
28 GsymReader::GsymReader(GsymReader
&&RHS
) = default;
30 GsymReader::~GsymReader() = default;
32 llvm::Expected
<GsymReader
> GsymReader::openFile(StringRef Filename
) {
33 // Open the input file and return an appropriate error if needed.
34 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BuffOrErr
=
35 MemoryBuffer::getFileOrSTDIN(Filename
);
36 auto Err
= BuffOrErr
.getError();
38 return llvm::errorCodeToError(Err
);
39 return create(BuffOrErr
.get());
42 llvm::Expected
<GsymReader
> GsymReader::copyBuffer(StringRef Bytes
) {
43 auto MemBuffer
= MemoryBuffer::getMemBufferCopy(Bytes
, "GSYM bytes");
44 return create(MemBuffer
);
47 llvm::Expected
<llvm::gsym::GsymReader
>
48 GsymReader::create(std::unique_ptr
<MemoryBuffer
> &MemBuffer
) {
50 return createStringError(std::errc::invalid_argument
,
51 "invalid memory buffer");
52 GsymReader
GR(std::move(MemBuffer
));
53 llvm::Error Err
= GR
.parse();
55 return std::move(Err
);
61 BinaryStreamReader
FileData(MemBuffer
->getBuffer(), llvm::endianness::native
);
62 // Check for the magic bytes. This file format is designed to be mmap'ed
63 // into a process and accessed as read only. This is done for performance
64 // and efficiency for symbolicating and parsing GSYM data.
65 if (FileData
.readObject(Hdr
))
66 return createStringError(std::errc::invalid_argument
,
67 "not enough data for a GSYM header");
69 const auto HostByteOrder
= llvm::endianness::native
;
72 Endian
= HostByteOrder
;
75 // This is a GSYM file, but not native endianness.
76 Endian
= sys::IsBigEndianHost
? llvm::endianness::little
77 : llvm::endianness::big
;
78 Swap
.reset(new SwappedData
);
81 return createStringError(std::errc::invalid_argument
,
85 bool DataIsLittleEndian
= HostByteOrder
!= llvm::endianness::little
;
86 // Read a correctly byte swapped header if we need to.
88 DataExtractor
Data(MemBuffer
->getBuffer(), DataIsLittleEndian
, 4);
89 if (auto ExpectedHdr
= Header::decode(Data
))
90 Swap
->Hdr
= ExpectedHdr
.get();
92 return ExpectedHdr
.takeError();
96 // Detect errors in the header and report any that are found. If we make it
97 // past this without errors, we know we have a good magic value, a supported
98 // version number, verified address offset size and a valid UUID size.
99 if (Error Err
= Hdr
->checkForError())
103 // This is the native endianness case that is most common and optimized for
104 // efficient lookups. Here we just grab pointers to the native data and
105 // use ArrayRef objects to allow efficient read only access.
107 // Read the address offsets.
108 if (FileData
.padToAlignment(Hdr
->AddrOffSize
) ||
109 FileData
.readArray(AddrOffsets
,
110 Hdr
->NumAddresses
* Hdr
->AddrOffSize
))
111 return createStringError(std::errc::invalid_argument
,
112 "failed to read address table");
114 // Read the address info offsets.
115 if (FileData
.padToAlignment(4) ||
116 FileData
.readArray(AddrInfoOffsets
, Hdr
->NumAddresses
))
117 return createStringError(std::errc::invalid_argument
,
118 "failed to read address info offsets table");
120 // Read the file table.
121 uint32_t NumFiles
= 0;
122 if (FileData
.readInteger(NumFiles
) || FileData
.readArray(Files
, NumFiles
))
123 return createStringError(std::errc::invalid_argument
,
124 "failed to read file table");
126 // Get the string table.
127 FileData
.setOffset(Hdr
->StrtabOffset
);
128 if (FileData
.readFixedString(StrTab
.Data
, Hdr
->StrtabSize
))
129 return createStringError(std::errc::invalid_argument
,
130 "failed to read string table");
132 // This is the non native endianness case that is not common and not
133 // optimized for lookups. Here we decode the important tables into local
134 // storage and then set the ArrayRef objects to point to these swapped
135 // copies of the read only data so lookups can be as efficient as possible.
136 DataExtractor
Data(MemBuffer
->getBuffer(), DataIsLittleEndian
, 4);
138 // Read the address offsets.
139 uint64_t Offset
= alignTo(sizeof(Header
), Hdr
->AddrOffSize
);
140 Swap
->AddrOffsets
.resize(Hdr
->NumAddresses
* Hdr
->AddrOffSize
);
141 switch (Hdr
->AddrOffSize
) {
143 if (!Data
.getU8(&Offset
, Swap
->AddrOffsets
.data(), Hdr
->NumAddresses
))
144 return createStringError(std::errc::invalid_argument
,
145 "failed to read address table");
148 if (!Data
.getU16(&Offset
,
149 reinterpret_cast<uint16_t *>(Swap
->AddrOffsets
.data()),
151 return createStringError(std::errc::invalid_argument
,
152 "failed to read address table");
155 if (!Data
.getU32(&Offset
,
156 reinterpret_cast<uint32_t *>(Swap
->AddrOffsets
.data()),
158 return createStringError(std::errc::invalid_argument
,
159 "failed to read address table");
162 if (!Data
.getU64(&Offset
,
163 reinterpret_cast<uint64_t *>(Swap
->AddrOffsets
.data()),
165 return createStringError(std::errc::invalid_argument
,
166 "failed to read address table");
168 AddrOffsets
= ArrayRef
<uint8_t>(Swap
->AddrOffsets
);
170 // Read the address info offsets.
171 Offset
= alignTo(Offset
, 4);
172 Swap
->AddrInfoOffsets
.resize(Hdr
->NumAddresses
);
173 if (Data
.getU32(&Offset
, Swap
->AddrInfoOffsets
.data(), Hdr
->NumAddresses
))
174 AddrInfoOffsets
= ArrayRef
<uint32_t>(Swap
->AddrInfoOffsets
);
176 return createStringError(std::errc::invalid_argument
,
177 "failed to read address table");
178 // Read the file table.
179 const uint32_t NumFiles
= Data
.getU32(&Offset
);
181 Swap
->Files
.resize(NumFiles
);
182 if (Data
.getU32(&Offset
, &Swap
->Files
[0].Dir
, NumFiles
*2))
183 Files
= ArrayRef
<FileEntry
>(Swap
->Files
);
185 return createStringError(std::errc::invalid_argument
,
186 "failed to read file table");
188 // Get the string table.
189 StrTab
.Data
= MemBuffer
->getBuffer().substr(Hdr
->StrtabOffset
,
191 if (StrTab
.Data
.empty())
192 return createStringError(std::errc::invalid_argument
,
193 "failed to read string table");
195 return Error::success();
199 const Header
&GsymReader::getHeader() const {
200 // The only way to get a GsymReader is from GsymReader::openFile(...) or
201 // GsymReader::copyBuffer() and the header must be valid and initialized to
202 // a valid pointer value, so the assert below should not trigger.
207 std::optional
<uint64_t> GsymReader::getAddress(size_t Index
) const {
208 switch (Hdr
->AddrOffSize
) {
209 case 1: return addressForIndex
<uint8_t>(Index
);
210 case 2: return addressForIndex
<uint16_t>(Index
);
211 case 4: return addressForIndex
<uint32_t>(Index
);
212 case 8: return addressForIndex
<uint64_t>(Index
);
217 std::optional
<uint64_t> GsymReader::getAddressInfoOffset(size_t Index
) const {
218 const auto NumAddrInfoOffsets
= AddrInfoOffsets
.size();
219 if (Index
< NumAddrInfoOffsets
)
220 return AddrInfoOffsets
[Index
];
225 GsymReader::getAddressIndex(const uint64_t Addr
) const {
226 if (Addr
>= Hdr
->BaseAddress
) {
227 const uint64_t AddrOffset
= Addr
- Hdr
->BaseAddress
;
228 std::optional
<uint64_t> AddrOffsetIndex
;
229 switch (Hdr
->AddrOffSize
) {
231 AddrOffsetIndex
= getAddressOffsetIndex
<uint8_t>(AddrOffset
);
234 AddrOffsetIndex
= getAddressOffsetIndex
<uint16_t>(AddrOffset
);
237 AddrOffsetIndex
= getAddressOffsetIndex
<uint32_t>(AddrOffset
);
240 AddrOffsetIndex
= getAddressOffsetIndex
<uint64_t>(AddrOffset
);
243 return createStringError(std::errc::invalid_argument
,
244 "unsupported address offset size %u",
248 return *AddrOffsetIndex
;
250 return createStringError(std::errc::invalid_argument
,
251 "address 0x%" PRIx64
" is not in GSYM", Addr
);
255 llvm::Expected
<DataExtractor
>
256 GsymReader::getFunctionInfoDataForAddress(uint64_t Addr
,
257 uint64_t &FuncStartAddr
) const {
258 Expected
<uint64_t> ExpectedAddrIdx
= getAddressIndex(Addr
);
259 if (!ExpectedAddrIdx
)
260 return ExpectedAddrIdx
.takeError();
261 const uint64_t FirstAddrIdx
= *ExpectedAddrIdx
;
262 // The AddrIdx is the first index of the function info entries that match
263 // \a Addr. We need to iterate over all function info objects that start with
264 // the same address until we find a range that contains \a Addr.
265 std::optional
<uint64_t> FirstFuncStartAddr
;
266 const size_t NumAddresses
= getNumAddresses();
267 for (uint64_t AddrIdx
= FirstAddrIdx
; AddrIdx
< NumAddresses
; ++AddrIdx
) {
268 auto ExpextedData
= getFunctionInfoDataAtIndex(AddrIdx
, FuncStartAddr
);
269 // If there was an error, return the error.
273 // Remember the first function start address if it hasn't already been set.
274 // If it is already valid, check to see if it matches the first function
275 // start address and only continue if it matches.
276 if (FirstFuncStartAddr
.has_value()) {
277 if (*FirstFuncStartAddr
!= FuncStartAddr
)
278 break; // Done with consecutive function entries with same address.
280 FirstFuncStartAddr
= FuncStartAddr
;
282 // Make sure the current function address ranges contains \a Addr.
283 // Some symbols on Darwin don't have valid sizes, so if we run into a
284 // symbol with zero size, then we have found a match for our address.
286 // The first thing the encoding of a FunctionInfo object is the function
289 uint32_t FuncSize
= ExpextedData
->getU32(&Offset
);
291 AddressRange(FuncStartAddr
, FuncStartAddr
+ FuncSize
).contains(Addr
))
294 return createStringError(std::errc::invalid_argument
,
295 "address 0x%" PRIx64
" is not in GSYM", Addr
);
298 llvm::Expected
<DataExtractor
>
299 GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx
,
300 uint64_t &FuncStartAddr
) const {
301 if (AddrIdx
>= getNumAddresses())
302 return createStringError(std::errc::invalid_argument
,
303 "invalid address index %" PRIu64
, AddrIdx
);
304 const uint32_t AddrInfoOffset
= AddrInfoOffsets
[AddrIdx
];
305 assert((Endian
== endianness::big
|| Endian
== endianness::little
) &&
306 "Endian must be either big or little");
307 StringRef Bytes
= MemBuffer
->getBuffer().substr(AddrInfoOffset
);
309 return createStringError(std::errc::invalid_argument
,
310 "invalid address info offset 0x%" PRIx32
,
312 std::optional
<uint64_t> OptFuncStartAddr
= getAddress(AddrIdx
);
313 if (!OptFuncStartAddr
)
314 return createStringError(std::errc::invalid_argument
,
315 "failed to extract address[%" PRIu64
"]", AddrIdx
);
316 FuncStartAddr
= *OptFuncStartAddr
;
317 return DataExtractor(Bytes
, Endian
== llvm::endianness::little
, 4);
320 llvm::Expected
<FunctionInfo
> GsymReader::getFunctionInfo(uint64_t Addr
) const {
321 uint64_t FuncStartAddr
= 0;
322 if (auto ExpectedData
= getFunctionInfoDataForAddress(Addr
, FuncStartAddr
))
323 return FunctionInfo::decode(*ExpectedData
, FuncStartAddr
);
325 return ExpectedData
.takeError();
328 llvm::Expected
<FunctionInfo
>
329 GsymReader::getFunctionInfoAtIndex(uint64_t Idx
) const {
330 uint64_t FuncStartAddr
= 0;
331 if (auto ExpectedData
= getFunctionInfoDataAtIndex(Idx
, FuncStartAddr
))
332 return FunctionInfo::decode(*ExpectedData
, FuncStartAddr
);
334 return ExpectedData
.takeError();
337 llvm::Expected
<LookupResult
>
338 GsymReader::lookup(uint64_t Addr
,
339 std::optional
<DataExtractor
> *MergedFunctionsData
) const {
340 uint64_t FuncStartAddr
= 0;
341 if (auto ExpectedData
= getFunctionInfoDataForAddress(Addr
, FuncStartAddr
))
342 return FunctionInfo::lookup(*ExpectedData
, *this, FuncStartAddr
, Addr
,
343 MergedFunctionsData
);
345 return ExpectedData
.takeError();
348 llvm::Expected
<std::vector
<LookupResult
>>
349 GsymReader::lookupAll(uint64_t Addr
) const {
350 std::vector
<LookupResult
> Results
;
351 std::optional
<DataExtractor
> MergedFunctionsData
;
353 // First perform a lookup to get the primary function info result.
354 auto MainResult
= lookup(Addr
, &MergedFunctionsData
);
356 return MainResult
.takeError();
358 // Add the main result as the first entry.
359 Results
.push_back(std::move(*MainResult
));
361 // Now process any merged functions data that was found during the lookup.
362 if (MergedFunctionsData
) {
363 // Get data extractors for each merged function.
364 auto ExpectedMergedFuncExtractors
=
365 MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData
);
366 if (!ExpectedMergedFuncExtractors
)
367 return ExpectedMergedFuncExtractors
.takeError();
369 // Process each merged function data.
370 for (DataExtractor
&MergedData
: *ExpectedMergedFuncExtractors
) {
371 if (auto FI
= FunctionInfo::lookup(MergedData
, *this,
372 MainResult
->FuncRange
.start(), Addr
)) {
373 Results
.push_back(std::move(*FI
));
375 return FI
.takeError();
383 void GsymReader::dump(raw_ostream
&OS
) {
384 const auto &Header
= getHeader();
385 // Dump the GSYM header.
386 OS
<< Header
<< "\n";
387 // Dump the address table.
388 OS
<< "Address Table:\n";
389 OS
<< "INDEX OFFSET";
391 switch (Hdr
->AddrOffSize
) {
392 case 1: OS
<< "8 "; break;
393 case 2: OS
<< "16"; break;
394 case 4: OS
<< "32"; break;
395 case 8: OS
<< "64"; break;
396 default: OS
<< "??"; break;
398 OS
<< " (ADDRESS)\n";
399 OS
<< "====== =============================== \n";
400 for (uint32_t I
= 0; I
< Header
.NumAddresses
; ++I
) {
401 OS
<< format("[%4u] ", I
);
402 switch (Hdr
->AddrOffSize
) {
403 case 1: OS
<< HEX8(getAddrOffsets
<uint8_t>()[I
]); break;
404 case 2: OS
<< HEX16(getAddrOffsets
<uint16_t>()[I
]); break;
405 case 4: OS
<< HEX32(getAddrOffsets
<uint32_t>()[I
]); break;
406 case 8: OS
<< HEX32(getAddrOffsets
<uint64_t>()[I
]); break;
409 OS
<< " (" << HEX64(*getAddress(I
)) << ")\n";
411 // Dump the address info offsets table.
412 OS
<< "\nAddress Info Offsets:\n";
413 OS
<< "INDEX Offset\n";
414 OS
<< "====== ==========\n";
415 for (uint32_t I
= 0; I
< Header
.NumAddresses
; ++I
)
416 OS
<< format("[%4u] ", I
) << HEX32(AddrInfoOffsets
[I
]) << "\n";
417 // Dump the file table.
419 OS
<< "INDEX DIRECTORY BASENAME PATH\n";
420 OS
<< "====== ========== ========== ==============================\n";
421 for (uint32_t I
= 0; I
< Files
.size(); ++I
) {
422 OS
<< format("[%4u] ", I
) << HEX32(Files
[I
].Dir
) << ' '
423 << HEX32(Files
[I
].Base
) << ' ';
424 dump(OS
, getFile(I
));
427 OS
<< "\n" << StrTab
<< "\n";
429 for (uint32_t I
= 0; I
< Header
.NumAddresses
; ++I
) {
430 OS
<< "FunctionInfo @ " << HEX32(AddrInfoOffsets
[I
]) << ": ";
431 if (auto FI
= getFunctionInfoAtIndex(I
))
434 logAllUnhandledErrors(FI
.takeError(), OS
, "FunctionInfo:");
438 void GsymReader::dump(raw_ostream
&OS
, const FunctionInfo
&FI
,
441 OS
<< FI
.Range
<< " \"" << getString(FI
.Name
) << "\"\n";
443 dump(OS
, *FI
.OptLineTable
, Indent
);
445 dump(OS
, *FI
.Inline
, Indent
);
448 dump(OS
, *FI
.CallSites
, Indent
);
450 if (FI
.MergedFunctions
) {
451 assert(Indent
== 0 && "MergedFunctionsInfo should only exist at top level");
452 dump(OS
, *FI
.MergedFunctions
);
456 void GsymReader::dump(raw_ostream
&OS
, const MergedFunctionsInfo
&MFI
) {
457 for (uint32_t inx
= 0; inx
< MFI
.MergedFunctions
.size(); inx
++) {
458 OS
<< "++ Merged FunctionInfos[" << inx
<< "]:\n";
459 dump(OS
, MFI
.MergedFunctions
[inx
], 4);
463 void GsymReader::dump(raw_ostream
&OS
, const CallSiteInfo
&CSI
) {
464 OS
<< HEX16(CSI
.ReturnOffset
);
467 auto addFlag
= [&](const char *Flag
) {
473 if (CSI
.Flags
== CallSiteInfo::Flags::None
)
476 if (CSI
.Flags
& CallSiteInfo::Flags::InternalCall
)
477 addFlag("InternalCall");
479 if (CSI
.Flags
& CallSiteInfo::Flags::ExternalCall
)
480 addFlag("ExternalCall");
482 OS
<< " Flags[" << Flags
<< "]";
484 if (!CSI
.MatchRegex
.empty()) {
485 OS
<< " MatchRegex[";
486 for (uint32_t i
= 0; i
< CSI
.MatchRegex
.size(); ++i
) {
489 OS
<< getString(CSI
.MatchRegex
[i
]);
495 void GsymReader::dump(raw_ostream
&OS
, const CallSiteInfoCollection
&CSIC
,
498 OS
<< "CallSites (by relative return offset):\n";
499 for (const auto &CS
: CSIC
.CallSites
) {
507 void GsymReader::dump(raw_ostream
&OS
, const LineTable
<
, uint32_t Indent
) {
509 OS
<< "LineTable:\n";
512 OS
<< " " << HEX64(LE
.Addr
) << ' ';
514 dump(OS
, getFile(LE
.File
));
515 OS
<< ':' << LE
.Line
<< '\n';
519 void GsymReader::dump(raw_ostream
&OS
, const InlineInfo
&II
, uint32_t Indent
) {
521 OS
<< "InlineInfo:\n";
524 OS
<< II
.Ranges
<< ' ' << getString(II
.Name
);
525 if (II
.CallFile
!= 0) {
526 if (auto File
= getFile(II
.CallFile
)) {
527 OS
<< " called from ";
529 OS
<< ':' << II
.CallLine
;
533 for (const auto &ChildII
: II
.Children
)
534 dump(OS
, ChildII
, Indent
+ 2);
537 void GsymReader::dump(raw_ostream
&OS
, std::optional
<FileEntry
> FE
) {
539 // IF we have the file from index 0, then don't print anything
540 if (FE
->Dir
== 0 && FE
->Base
== 0)
542 StringRef Dir
= getString(FE
->Dir
);
543 StringRef Base
= getString(FE
->Base
);
546 if (Dir
.contains('\\') && !Dir
.contains('/'))
554 if (!Dir
.empty() || !Base
.empty())
557 OS
<< "<invalid-file>";