1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/SwapByteOrder.h"
38 class InstrProfReader
;
40 /// A file format agnostic iterator over profiling data.
41 class InstrProfIterator
: public std::iterator
<std::input_iterator_tag
,
42 NamedInstrProfRecord
> {
43 InstrProfReader
*Reader
= nullptr;
49 InstrProfIterator() = default;
50 InstrProfIterator(InstrProfReader
*Reader
) : Reader(Reader
) { Increment(); }
52 InstrProfIterator
&operator++() { Increment(); return *this; }
53 bool operator==(const InstrProfIterator
&RHS
) { return Reader
== RHS
.Reader
; }
54 bool operator!=(const InstrProfIterator
&RHS
) { return Reader
!= RHS
.Reader
; }
55 value_type
&operator*() { return Record
; }
56 value_type
*operator->() { return &Record
; }
59 /// Base class and interface for reading profiling data of any known instrprof
60 /// format. Provides an iterator over NamedInstrProfRecords.
61 class InstrProfReader
{
62 instrprof_error LastError
= instrprof_error::success
;
65 InstrProfReader() = default;
66 virtual ~InstrProfReader() = default;
68 /// Read the header. Required before reading first record.
69 virtual Error
readHeader() = 0;
71 /// Read a single record.
72 virtual Error
readNextRecord(NamedInstrProfRecord
&Record
) = 0;
74 /// Iterator over profile data.
75 InstrProfIterator
begin() { return InstrProfIterator(this); }
76 InstrProfIterator
end() { return InstrProfIterator(); }
78 virtual bool isIRLevelProfile() const = 0;
80 /// Return the PGO symtab. There are three different readers:
81 /// Raw, Text, and Indexed profile readers. The first two types
82 /// of readers are used only by llvm-profdata tool, while the indexed
83 /// profile reader is also used by llvm-cov tool and the compiler (
84 /// backend or frontend). Since creating PGO symtab can create
85 /// significant runtime and memory overhead (as it touches data
86 /// for the whole program), InstrProfSymtab for the indexed profile
87 /// reader should be created on demand and it is recommended to be
88 /// only used for dumping purpose with llvm-proftool, not with the
90 virtual InstrProfSymtab
&getSymtab() = 0;
93 std::unique_ptr
<InstrProfSymtab
> Symtab
;
95 /// Set the current error and return same.
96 Error
error(instrprof_error Err
) {
98 if (Err
== instrprof_error::success
)
99 return Error::success();
100 return make_error
<InstrProfError
>(Err
);
103 Error
error(Error
&&E
) { return error(InstrProfError::take(std::move(E
))); }
105 /// Clear the current error and return a successful one.
106 Error
success() { return error(instrprof_error::success
); }
109 /// Return true if the reader has finished reading the profile data.
110 bool isEOF() { return LastError
== instrprof_error::eof
; }
112 /// Return true if the reader encountered an error reading profiling data.
113 bool hasError() { return LastError
!= instrprof_error::success
&& !isEOF(); }
115 /// Get the current error.
118 return make_error
<InstrProfError
>(LastError
);
119 return Error::success();
122 /// Factory method to create an appropriately typed reader for the given
124 static Expected
<std::unique_ptr
<InstrProfReader
>> create(const Twine
&Path
);
126 static Expected
<std::unique_ptr
<InstrProfReader
>>
127 create(std::unique_ptr
<MemoryBuffer
> Buffer
);
130 /// Reader for the simple text based instrprof format.
132 /// This format is a simple text format that's suitable for test data. Records
133 /// are separated by one or more blank lines, and record fields are separated by
136 /// Each record consists of a function name, a function hash, a number of
137 /// counters, and then each counter value, in that order.
138 class TextInstrProfReader
: public InstrProfReader
{
140 /// The profile data file contents.
141 std::unique_ptr
<MemoryBuffer
> DataBuffer
;
142 /// Iterator over the profile data.
144 bool IsIRLevelProfile
= false;
146 Error
readValueProfileData(InstrProfRecord
&Record
);
149 TextInstrProfReader(std::unique_ptr
<MemoryBuffer
> DataBuffer_
)
150 : DataBuffer(std::move(DataBuffer_
)), Line(*DataBuffer
, true, '#') {}
151 TextInstrProfReader(const TextInstrProfReader
&) = delete;
152 TextInstrProfReader
&operator=(const TextInstrProfReader
&) = delete;
154 /// Return true if the given buffer is in text instrprof format.
155 static bool hasFormat(const MemoryBuffer
&Buffer
);
157 bool isIRLevelProfile() const override
{ return IsIRLevelProfile
; }
160 Error
readHeader() override
;
162 /// Read a single record.
163 Error
readNextRecord(NamedInstrProfRecord
&Record
) override
;
165 InstrProfSymtab
&getSymtab() override
{
166 assert(Symtab
.get());
167 return *Symtab
.get();
171 /// Reader for the raw instrprof binary format from runtime.
173 /// This format is a raw memory dump of the instrumentation-baed profiling data
174 /// from the runtime. It has no index.
176 /// Templated on the unsigned type whose size matches pointers on the platform
177 /// that wrote the profile.
178 template <class IntPtrT
>
179 class RawInstrProfReader
: public InstrProfReader
{
181 /// The profile data file contents.
182 std::unique_ptr
<MemoryBuffer
> DataBuffer
;
183 bool ShouldSwapBytes
;
184 // The value of the version field of the raw profile data header. The lower 56
185 // bits specifies the format version and the most significant 8 bits specify
186 // the variant types of the profile.
188 uint64_t CountersDelta
;
190 const RawInstrProf::ProfileData
<IntPtrT
> *Data
;
191 const RawInstrProf::ProfileData
<IntPtrT
> *DataEnd
;
192 const uint64_t *CountersStart
;
193 const char *NamesStart
;
195 // After value profile is all read, this pointer points to
196 // the header of next profile data (if exists)
197 const uint8_t *ValueDataStart
;
198 uint32_t ValueKindLast
;
199 uint32_t CurValueDataSize
;
202 RawInstrProfReader(std::unique_ptr
<MemoryBuffer
> DataBuffer
)
203 : DataBuffer(std::move(DataBuffer
)) {}
204 RawInstrProfReader(const RawInstrProfReader
&) = delete;
205 RawInstrProfReader
&operator=(const RawInstrProfReader
&) = delete;
207 static bool hasFormat(const MemoryBuffer
&DataBuffer
);
208 Error
readHeader() override
;
209 Error
readNextRecord(NamedInstrProfRecord
&Record
) override
;
211 bool isIRLevelProfile() const override
{
212 return (Version
& VARIANT_MASK_IR_PROF
) != 0;
215 InstrProfSymtab
&getSymtab() override
{
216 assert(Symtab
.get());
217 return *Symtab
.get();
221 Error
createSymtab(InstrProfSymtab
&Symtab
);
222 Error
readNextHeader(const char *CurrentPos
);
223 Error
readHeader(const RawInstrProf::Header
&Header
);
225 template <class IntT
> IntT
swap(IntT Int
) const {
226 return ShouldSwapBytes
? sys::getSwappedBytes(Int
) : Int
;
229 support::endianness
getDataEndianness() const {
230 support::endianness HostEndian
= getHostEndianness();
231 if (!ShouldSwapBytes
)
233 if (HostEndian
== support::little
)
236 return support::little
;
239 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes
) {
240 return 7 & (sizeof(uint64_t) - SizeInBytes
% sizeof(uint64_t));
243 Error
readName(NamedInstrProfRecord
&Record
);
244 Error
readFuncHash(NamedInstrProfRecord
&Record
);
245 Error
readRawCounts(InstrProfRecord
&Record
);
246 Error
readValueProfilingData(InstrProfRecord
&Record
);
247 bool atEnd() const { return Data
== DataEnd
; }
251 ValueDataStart
+= CurValueDataSize
;
254 const char *getNextHeaderPos() const {
256 return (const char *)ValueDataStart
;
259 const uint64_t *getCounter(IntPtrT CounterPtr
) const {
260 ptrdiff_t Offset
= (swap(CounterPtr
) - CountersDelta
) / sizeof(uint64_t);
261 return CountersStart
+ Offset
;
264 StringRef
getName(uint64_t NameRef
) const {
265 return Symtab
->getFuncName(swap(NameRef
));
269 using RawInstrProfReader32
= RawInstrProfReader
<uint32_t>;
270 using RawInstrProfReader64
= RawInstrProfReader
<uint64_t>;
272 namespace IndexedInstrProf
{
274 enum class HashT
: uint32_t;
276 } // end namespace IndexedInstrProf
278 /// Trait for lookups into the on-disk hash table for the binary instrprof
280 class InstrProfLookupTrait
{
281 std::vector
<NamedInstrProfRecord
> DataBuffer
;
282 IndexedInstrProf::HashT HashType
;
283 unsigned FormatVersion
;
284 // Endianness of the input value profile data.
285 // It should be LE by default, but can be changed
286 // for testing purpose.
287 support::endianness ValueProfDataEndianness
= support::little
;
290 InstrProfLookupTrait(IndexedInstrProf::HashT HashType
, unsigned FormatVersion
)
291 : HashType(HashType
), FormatVersion(FormatVersion
) {}
293 using data_type
= ArrayRef
<NamedInstrProfRecord
>;
295 using internal_key_type
= StringRef
;
296 using external_key_type
= StringRef
;
297 using hash_value_type
= uint64_t;
298 using offset_type
= uint64_t;
300 static bool EqualKey(StringRef A
, StringRef B
) { return A
== B
; }
301 static StringRef
GetInternalKey(StringRef K
) { return K
; }
302 static StringRef
GetExternalKey(StringRef K
) { return K
; }
304 hash_value_type
ComputeHash(StringRef K
);
306 static std::pair
<offset_type
, offset_type
>
307 ReadKeyDataLength(const unsigned char *&D
) {
308 using namespace support
;
310 offset_type KeyLen
= endian::readNext
<offset_type
, little
, unaligned
>(D
);
311 offset_type DataLen
= endian::readNext
<offset_type
, little
, unaligned
>(D
);
312 return std::make_pair(KeyLen
, DataLen
);
315 StringRef
ReadKey(const unsigned char *D
, offset_type N
) {
316 return StringRef((const char *)D
, N
);
319 bool readValueProfilingData(const unsigned char *&D
,
320 const unsigned char *const End
);
321 data_type
ReadData(StringRef K
, const unsigned char *D
, offset_type N
);
323 // Used for testing purpose only.
324 void setValueProfDataEndianness(support::endianness Endianness
) {
325 ValueProfDataEndianness
= Endianness
;
329 struct InstrProfReaderIndexBase
{
330 virtual ~InstrProfReaderIndexBase() = default;
332 // Read all the profile records with the same key pointed to the current
334 virtual Error
getRecords(ArrayRef
<NamedInstrProfRecord
> &Data
) = 0;
336 // Read all the profile records with the key equal to FuncName
337 virtual Error
getRecords(StringRef FuncName
,
338 ArrayRef
<NamedInstrProfRecord
> &Data
) = 0;
339 virtual void advanceToNextKey() = 0;
340 virtual bool atEnd() const = 0;
341 virtual void setValueProfDataEndianness(support::endianness Endianness
) = 0;
342 virtual uint64_t getVersion() const = 0;
343 virtual bool isIRLevelProfile() const = 0;
344 virtual Error
populateSymtab(InstrProfSymtab
&) = 0;
347 using OnDiskHashTableImplV3
=
348 OnDiskIterableChainedHashTable
<InstrProfLookupTrait
>;
350 template <typename HashTableImpl
>
351 class InstrProfReaderItaniumRemapper
;
353 template <typename HashTableImpl
>
354 class InstrProfReaderIndex
: public InstrProfReaderIndexBase
{
356 std::unique_ptr
<HashTableImpl
> HashTable
;
357 typename
HashTableImpl::data_iterator RecordIterator
;
358 uint64_t FormatVersion
;
360 friend class InstrProfReaderItaniumRemapper
<HashTableImpl
>;
363 InstrProfReaderIndex(const unsigned char *Buckets
,
364 const unsigned char *const Payload
,
365 const unsigned char *const Base
,
366 IndexedInstrProf::HashT HashType
, uint64_t Version
);
367 ~InstrProfReaderIndex() override
= default;
369 Error
getRecords(ArrayRef
<NamedInstrProfRecord
> &Data
) override
;
370 Error
getRecords(StringRef FuncName
,
371 ArrayRef
<NamedInstrProfRecord
> &Data
) override
;
372 void advanceToNextKey() override
{ RecordIterator
++; }
374 bool atEnd() const override
{
375 return RecordIterator
== HashTable
->data_end();
378 void setValueProfDataEndianness(support::endianness Endianness
) override
{
379 HashTable
->getInfoObj().setValueProfDataEndianness(Endianness
);
382 uint64_t getVersion() const override
{ return GET_VERSION(FormatVersion
); }
384 bool isIRLevelProfile() const override
{
385 return (FormatVersion
& VARIANT_MASK_IR_PROF
) != 0;
388 Error
populateSymtab(InstrProfSymtab
&Symtab
) override
{
389 return Symtab
.create(HashTable
->keys());
393 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
394 class InstrProfReaderRemapper
{
396 virtual ~InstrProfReaderRemapper() {}
397 virtual Error
populateRemappings() { return Error::success(); }
398 virtual Error
getRecords(StringRef FuncName
,
399 ArrayRef
<NamedInstrProfRecord
> &Data
) = 0;
402 /// Reader for the indexed binary instrprof format.
403 class IndexedInstrProfReader
: public InstrProfReader
{
405 /// The profile data file contents.
406 std::unique_ptr
<MemoryBuffer
> DataBuffer
;
407 /// The profile remapping file contents.
408 std::unique_ptr
<MemoryBuffer
> RemappingBuffer
;
409 /// The index into the profile data.
410 std::unique_ptr
<InstrProfReaderIndexBase
> Index
;
411 /// The profile remapping file contents.
412 std::unique_ptr
<InstrProfReaderRemapper
> Remapper
;
413 /// Profile summary data.
414 std::unique_ptr
<ProfileSummary
> Summary
;
415 // Index to the current record in the record array.
416 unsigned RecordIndex
;
418 // Read the profile summary. Return a pointer pointing to one byte past the
419 // end of the summary data if it exists or the input \c Cur.
420 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version
,
421 const unsigned char *Cur
);
424 IndexedInstrProfReader(
425 std::unique_ptr
<MemoryBuffer
> DataBuffer
,
426 std::unique_ptr
<MemoryBuffer
> RemappingBuffer
= nullptr)
427 : DataBuffer(std::move(DataBuffer
)),
428 RemappingBuffer(std::move(RemappingBuffer
)), RecordIndex(0) {}
429 IndexedInstrProfReader(const IndexedInstrProfReader
&) = delete;
430 IndexedInstrProfReader
&operator=(const IndexedInstrProfReader
&) = delete;
432 /// Return the profile version.
433 uint64_t getVersion() const { return Index
->getVersion(); }
434 bool isIRLevelProfile() const override
{ return Index
->isIRLevelProfile(); }
436 /// Return true if the given buffer is in an indexed instrprof format.
437 static bool hasFormat(const MemoryBuffer
&DataBuffer
);
439 /// Read the file header.
440 Error
readHeader() override
;
441 /// Read a single record.
442 Error
readNextRecord(NamedInstrProfRecord
&Record
) override
;
444 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
445 Expected
<InstrProfRecord
> getInstrProfRecord(StringRef FuncName
,
448 /// Fill Counts with the profile data for the given function name.
449 Error
getFunctionCounts(StringRef FuncName
, uint64_t FuncHash
,
450 std::vector
<uint64_t> &Counts
);
452 /// Return the maximum of all known function counts.
453 uint64_t getMaximumFunctionCount() { return Summary
->getMaxFunctionCount(); }
455 /// Factory method to create an indexed reader.
456 static Expected
<std::unique_ptr
<IndexedInstrProfReader
>>
457 create(const Twine
&Path
, const Twine
&RemappingPath
= "");
459 static Expected
<std::unique_ptr
<IndexedInstrProfReader
>>
460 create(std::unique_ptr
<MemoryBuffer
> Buffer
,
461 std::unique_ptr
<MemoryBuffer
> RemappingBuffer
= nullptr);
463 // Used for testing purpose only.
464 void setValueProfDataEndianness(support::endianness Endianness
) {
465 Index
->setValueProfDataEndianness(Endianness
);
468 // See description in the base class. This interface is designed
469 // to be used by llvm-profdata (for dumping). Avoid using this when
470 // the client is the compiler.
471 InstrProfSymtab
&getSymtab() override
;
472 ProfileSummary
&getSummary() { return *(Summary
.get()); }
475 } // end namespace llvm
477 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H