1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/SwapByteOrder.h"
38 class InstrProfReader
;
40 /// A file format agnostic iterator over profiling data.
41 class InstrProfIterator
: public std::iterator
<std::input_iterator_tag
,
42 NamedInstrProfRecord
> {
43 InstrProfReader
*Reader
= nullptr;
49 InstrProfIterator() = default;
50 InstrProfIterator(InstrProfReader
*Reader
) : Reader(Reader
) { Increment(); }
52 InstrProfIterator
&operator++() { Increment(); return *this; }
53 bool operator==(const InstrProfIterator
&RHS
) { return Reader
== RHS
.Reader
; }
54 bool operator!=(const InstrProfIterator
&RHS
) { return Reader
!= RHS
.Reader
; }
55 value_type
&operator*() { return Record
; }
56 value_type
*operator->() { return &Record
; }
59 /// Base class and interface for reading profiling data of any known instrprof
60 /// format. Provides an iterator over NamedInstrProfRecords.
61 class InstrProfReader
{
62 instrprof_error LastError
= instrprof_error::success
;
65 InstrProfReader() = default;
66 virtual ~InstrProfReader() = default;
68 /// Read the header. Required before reading first record.
69 virtual Error
readHeader() = 0;
71 /// Read a single record.
72 virtual Error
readNextRecord(NamedInstrProfRecord
&Record
) = 0;
74 /// Iterator over profile data.
75 InstrProfIterator
begin() { return InstrProfIterator(this); }
76 InstrProfIterator
end() { return InstrProfIterator(); }
78 virtual bool isIRLevelProfile() const = 0;
80 virtual bool hasCSIRLevelProfile() const = 0;
82 /// Return the PGO symtab. There are three different readers:
83 /// Raw, Text, and Indexed profile readers. The first two types
84 /// of readers are used only by llvm-profdata tool, while the indexed
85 /// profile reader is also used by llvm-cov tool and the compiler (
86 /// backend or frontend). Since creating PGO symtab can create
87 /// significant runtime and memory overhead (as it touches data
88 /// for the whole program), InstrProfSymtab for the indexed profile
89 /// reader should be created on demand and it is recommended to be
90 /// only used for dumping purpose with llvm-proftool, not with the
92 virtual InstrProfSymtab
&getSymtab() = 0;
94 /// Compute the sum of counts and return in Sum.
95 void accumulateCounts(CountSumOrPercent
&Sum
, bool IsCS
);
98 std::unique_ptr
<InstrProfSymtab
> Symtab
;
100 /// Set the current error and return same.
101 Error
error(instrprof_error Err
) {
103 if (Err
== instrprof_error::success
)
104 return Error::success();
105 return make_error
<InstrProfError
>(Err
);
108 Error
error(Error
&&E
) { return error(InstrProfError::take(std::move(E
))); }
110 /// Clear the current error and return a successful one.
111 Error
success() { return error(instrprof_error::success
); }
114 /// Return true if the reader has finished reading the profile data.
115 bool isEOF() { return LastError
== instrprof_error::eof
; }
117 /// Return true if the reader encountered an error reading profiling data.
118 bool hasError() { return LastError
!= instrprof_error::success
&& !isEOF(); }
120 /// Get the current error.
123 return make_error
<InstrProfError
>(LastError
);
124 return Error::success();
127 /// Factory method to create an appropriately typed reader for the given
129 static Expected
<std::unique_ptr
<InstrProfReader
>> create(const Twine
&Path
);
131 static Expected
<std::unique_ptr
<InstrProfReader
>>
132 create(std::unique_ptr
<MemoryBuffer
> Buffer
);
135 /// Reader for the simple text based instrprof format.
137 /// This format is a simple text format that's suitable for test data. Records
138 /// are separated by one or more blank lines, and record fields are separated by
141 /// Each record consists of a function name, a function hash, a number of
142 /// counters, and then each counter value, in that order.
143 class TextInstrProfReader
: public InstrProfReader
{
145 /// The profile data file contents.
146 std::unique_ptr
<MemoryBuffer
> DataBuffer
;
147 /// Iterator over the profile data.
149 bool IsIRLevelProfile
= false;
150 bool HasCSIRLevelProfile
= false;
152 Error
readValueProfileData(InstrProfRecord
&Record
);
155 TextInstrProfReader(std::unique_ptr
<MemoryBuffer
> DataBuffer_
)
156 : DataBuffer(std::move(DataBuffer_
)), Line(*DataBuffer
, true, '#') {}
157 TextInstrProfReader(const TextInstrProfReader
&) = delete;
158 TextInstrProfReader
&operator=(const TextInstrProfReader
&) = delete;
160 /// Return true if the given buffer is in text instrprof format.
161 static bool hasFormat(const MemoryBuffer
&Buffer
);
163 bool isIRLevelProfile() const override
{ return IsIRLevelProfile
; }
165 bool hasCSIRLevelProfile() const override
{ return HasCSIRLevelProfile
; }
168 Error
readHeader() override
;
170 /// Read a single record.
171 Error
readNextRecord(NamedInstrProfRecord
&Record
) override
;
173 InstrProfSymtab
&getSymtab() override
{
174 assert(Symtab
.get());
175 return *Symtab
.get();
179 /// Reader for the raw instrprof binary format from runtime.
181 /// This format is a raw memory dump of the instrumentation-baed profiling data
182 /// from the runtime. It has no index.
184 /// Templated on the unsigned type whose size matches pointers on the platform
185 /// that wrote the profile.
186 template <class IntPtrT
>
187 class RawInstrProfReader
: public InstrProfReader
{
189 /// The profile data file contents.
190 std::unique_ptr
<MemoryBuffer
> DataBuffer
;
191 bool ShouldSwapBytes
;
192 // The value of the version field of the raw profile data header. The lower 56
193 // bits specifies the format version and the most significant 8 bits specify
194 // the variant types of the profile.
196 uint64_t CountersDelta
;
198 const RawInstrProf::ProfileData
<IntPtrT
> *Data
;
199 const RawInstrProf::ProfileData
<IntPtrT
> *DataEnd
;
200 const uint64_t *CountersStart
;
201 const char *NamesStart
;
203 // After value profile is all read, this pointer points to
204 // the header of next profile data (if exists)
205 const uint8_t *ValueDataStart
;
206 uint32_t ValueKindLast
;
207 uint32_t CurValueDataSize
;
210 RawInstrProfReader(std::unique_ptr
<MemoryBuffer
> DataBuffer
)
211 : DataBuffer(std::move(DataBuffer
)) {}
212 RawInstrProfReader(const RawInstrProfReader
&) = delete;
213 RawInstrProfReader
&operator=(const RawInstrProfReader
&) = delete;
215 static bool hasFormat(const MemoryBuffer
&DataBuffer
);
216 Error
readHeader() override
;
217 Error
readNextRecord(NamedInstrProfRecord
&Record
) override
;
219 bool isIRLevelProfile() const override
{
220 return (Version
& VARIANT_MASK_IR_PROF
) != 0;
223 bool hasCSIRLevelProfile() const override
{
224 return (Version
& VARIANT_MASK_CSIR_PROF
) != 0;
227 InstrProfSymtab
&getSymtab() override
{
228 assert(Symtab
.get());
229 return *Symtab
.get();
233 Error
createSymtab(InstrProfSymtab
&Symtab
);
234 Error
readNextHeader(const char *CurrentPos
);
235 Error
readHeader(const RawInstrProf::Header
&Header
);
237 template <class IntT
> IntT
swap(IntT Int
) const {
238 return ShouldSwapBytes
? sys::getSwappedBytes(Int
) : Int
;
241 support::endianness
getDataEndianness() const {
242 support::endianness HostEndian
= getHostEndianness();
243 if (!ShouldSwapBytes
)
245 if (HostEndian
== support::little
)
248 return support::little
;
251 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes
) {
252 return 7 & (sizeof(uint64_t) - SizeInBytes
% sizeof(uint64_t));
255 Error
readName(NamedInstrProfRecord
&Record
);
256 Error
readFuncHash(NamedInstrProfRecord
&Record
);
257 Error
readRawCounts(InstrProfRecord
&Record
);
258 Error
readValueProfilingData(InstrProfRecord
&Record
);
259 bool atEnd() const { return Data
== DataEnd
; }
263 ValueDataStart
+= CurValueDataSize
;
266 const char *getNextHeaderPos() const {
268 return (const char *)ValueDataStart
;
271 /// Get the offset of \p CounterPtr from the start of the counters section of
272 /// the profile. The offset has units of "number of counters", i.e. increasing
273 /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
274 ptrdiff_t getCounterOffset(IntPtrT CounterPtr
) const {
275 return (swap(CounterPtr
) - CountersDelta
) / sizeof(uint64_t);
278 const uint64_t *getCounter(ptrdiff_t Offset
) const {
279 return CountersStart
+ Offset
;
282 StringRef
getName(uint64_t NameRef
) const {
283 return Symtab
->getFuncName(swap(NameRef
));
287 using RawInstrProfReader32
= RawInstrProfReader
<uint32_t>;
288 using RawInstrProfReader64
= RawInstrProfReader
<uint64_t>;
290 namespace IndexedInstrProf
{
292 enum class HashT
: uint32_t;
294 } // end namespace IndexedInstrProf
296 /// Trait for lookups into the on-disk hash table for the binary instrprof
298 class InstrProfLookupTrait
{
299 std::vector
<NamedInstrProfRecord
> DataBuffer
;
300 IndexedInstrProf::HashT HashType
;
301 unsigned FormatVersion
;
302 // Endianness of the input value profile data.
303 // It should be LE by default, but can be changed
304 // for testing purpose.
305 support::endianness ValueProfDataEndianness
= support::little
;
308 InstrProfLookupTrait(IndexedInstrProf::HashT HashType
, unsigned FormatVersion
)
309 : HashType(HashType
), FormatVersion(FormatVersion
) {}
311 using data_type
= ArrayRef
<NamedInstrProfRecord
>;
313 using internal_key_type
= StringRef
;
314 using external_key_type
= StringRef
;
315 using hash_value_type
= uint64_t;
316 using offset_type
= uint64_t;
318 static bool EqualKey(StringRef A
, StringRef B
) { return A
== B
; }
319 static StringRef
GetInternalKey(StringRef K
) { return K
; }
320 static StringRef
GetExternalKey(StringRef K
) { return K
; }
322 hash_value_type
ComputeHash(StringRef K
);
324 static std::pair
<offset_type
, offset_type
>
325 ReadKeyDataLength(const unsigned char *&D
) {
326 using namespace support
;
328 offset_type KeyLen
= endian::readNext
<offset_type
, little
, unaligned
>(D
);
329 offset_type DataLen
= endian::readNext
<offset_type
, little
, unaligned
>(D
);
330 return std::make_pair(KeyLen
, DataLen
);
333 StringRef
ReadKey(const unsigned char *D
, offset_type N
) {
334 return StringRef((const char *)D
, N
);
337 bool readValueProfilingData(const unsigned char *&D
,
338 const unsigned char *const End
);
339 data_type
ReadData(StringRef K
, const unsigned char *D
, offset_type N
);
341 // Used for testing purpose only.
342 void setValueProfDataEndianness(support::endianness Endianness
) {
343 ValueProfDataEndianness
= Endianness
;
347 struct InstrProfReaderIndexBase
{
348 virtual ~InstrProfReaderIndexBase() = default;
350 // Read all the profile records with the same key pointed to the current
352 virtual Error
getRecords(ArrayRef
<NamedInstrProfRecord
> &Data
) = 0;
354 // Read all the profile records with the key equal to FuncName
355 virtual Error
getRecords(StringRef FuncName
,
356 ArrayRef
<NamedInstrProfRecord
> &Data
) = 0;
357 virtual void advanceToNextKey() = 0;
358 virtual bool atEnd() const = 0;
359 virtual void setValueProfDataEndianness(support::endianness Endianness
) = 0;
360 virtual uint64_t getVersion() const = 0;
361 virtual bool isIRLevelProfile() const = 0;
362 virtual bool hasCSIRLevelProfile() const = 0;
363 virtual Error
populateSymtab(InstrProfSymtab
&) = 0;
366 using OnDiskHashTableImplV3
=
367 OnDiskIterableChainedHashTable
<InstrProfLookupTrait
>;
369 template <typename HashTableImpl
>
370 class InstrProfReaderItaniumRemapper
;
372 template <typename HashTableImpl
>
373 class InstrProfReaderIndex
: public InstrProfReaderIndexBase
{
375 std::unique_ptr
<HashTableImpl
> HashTable
;
376 typename
HashTableImpl::data_iterator RecordIterator
;
377 uint64_t FormatVersion
;
379 friend class InstrProfReaderItaniumRemapper
<HashTableImpl
>;
382 InstrProfReaderIndex(const unsigned char *Buckets
,
383 const unsigned char *const Payload
,
384 const unsigned char *const Base
,
385 IndexedInstrProf::HashT HashType
, uint64_t Version
);
386 ~InstrProfReaderIndex() override
= default;
388 Error
getRecords(ArrayRef
<NamedInstrProfRecord
> &Data
) override
;
389 Error
getRecords(StringRef FuncName
,
390 ArrayRef
<NamedInstrProfRecord
> &Data
) override
;
391 void advanceToNextKey() override
{ RecordIterator
++; }
393 bool atEnd() const override
{
394 return RecordIterator
== HashTable
->data_end();
397 void setValueProfDataEndianness(support::endianness Endianness
) override
{
398 HashTable
->getInfoObj().setValueProfDataEndianness(Endianness
);
401 uint64_t getVersion() const override
{ return GET_VERSION(FormatVersion
); }
403 bool isIRLevelProfile() const override
{
404 return (FormatVersion
& VARIANT_MASK_IR_PROF
) != 0;
407 bool hasCSIRLevelProfile() const override
{
408 return (FormatVersion
& VARIANT_MASK_CSIR_PROF
) != 0;
411 Error
populateSymtab(InstrProfSymtab
&Symtab
) override
{
412 return Symtab
.create(HashTable
->keys());
416 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
417 class InstrProfReaderRemapper
{
419 virtual ~InstrProfReaderRemapper() {}
420 virtual Error
populateRemappings() { return Error::success(); }
421 virtual Error
getRecords(StringRef FuncName
,
422 ArrayRef
<NamedInstrProfRecord
> &Data
) = 0;
425 /// Reader for the indexed binary instrprof format.
426 class IndexedInstrProfReader
: public InstrProfReader
{
428 /// The profile data file contents.
429 std::unique_ptr
<MemoryBuffer
> DataBuffer
;
430 /// The profile remapping file contents.
431 std::unique_ptr
<MemoryBuffer
> RemappingBuffer
;
432 /// The index into the profile data.
433 std::unique_ptr
<InstrProfReaderIndexBase
> Index
;
434 /// The profile remapping file contents.
435 std::unique_ptr
<InstrProfReaderRemapper
> Remapper
;
436 /// Profile summary data.
437 std::unique_ptr
<ProfileSummary
> Summary
;
438 /// Context sensitive profile summary data.
439 std::unique_ptr
<ProfileSummary
> CS_Summary
;
440 // Index to the current record in the record array.
441 unsigned RecordIndex
;
443 // Read the profile summary. Return a pointer pointing to one byte past the
444 // end of the summary data if it exists or the input \c Cur.
445 // \c UseCS indicates whether to use the context-sensitive profile summary.
446 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version
,
447 const unsigned char *Cur
, bool UseCS
);
450 IndexedInstrProfReader(
451 std::unique_ptr
<MemoryBuffer
> DataBuffer
,
452 std::unique_ptr
<MemoryBuffer
> RemappingBuffer
= nullptr)
453 : DataBuffer(std::move(DataBuffer
)),
454 RemappingBuffer(std::move(RemappingBuffer
)), RecordIndex(0) {}
455 IndexedInstrProfReader(const IndexedInstrProfReader
&) = delete;
456 IndexedInstrProfReader
&operator=(const IndexedInstrProfReader
&) = delete;
458 /// Return the profile version.
459 uint64_t getVersion() const { return Index
->getVersion(); }
460 bool isIRLevelProfile() const override
{ return Index
->isIRLevelProfile(); }
461 bool hasCSIRLevelProfile() const override
{
462 return Index
->hasCSIRLevelProfile();
465 /// Return true if the given buffer is in an indexed instrprof format.
466 static bool hasFormat(const MemoryBuffer
&DataBuffer
);
468 /// Read the file header.
469 Error
readHeader() override
;
470 /// Read a single record.
471 Error
readNextRecord(NamedInstrProfRecord
&Record
) override
;
473 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
474 Expected
<InstrProfRecord
> getInstrProfRecord(StringRef FuncName
,
477 /// Fill Counts with the profile data for the given function name.
478 Error
getFunctionCounts(StringRef FuncName
, uint64_t FuncHash
,
479 std::vector
<uint64_t> &Counts
);
481 /// Return the maximum of all known function counts.
482 /// \c UseCS indicates whether to use the context-sensitive count.
483 uint64_t getMaximumFunctionCount(bool UseCS
) {
485 assert(CS_Summary
&& "No context sensitive profile summary");
486 return CS_Summary
->getMaxFunctionCount();
488 assert(Summary
&& "No profile summary");
489 return Summary
->getMaxFunctionCount();
493 /// Factory method to create an indexed reader.
494 static Expected
<std::unique_ptr
<IndexedInstrProfReader
>>
495 create(const Twine
&Path
, const Twine
&RemappingPath
= "");
497 static Expected
<std::unique_ptr
<IndexedInstrProfReader
>>
498 create(std::unique_ptr
<MemoryBuffer
> Buffer
,
499 std::unique_ptr
<MemoryBuffer
> RemappingBuffer
= nullptr);
501 // Used for testing purpose only.
502 void setValueProfDataEndianness(support::endianness Endianness
) {
503 Index
->setValueProfDataEndianness(Endianness
);
506 // See description in the base class. This interface is designed
507 // to be used by llvm-profdata (for dumping). Avoid using this when
508 // the client is the compiler.
509 InstrProfSymtab
&getSymtab() override
;
511 /// Return the profile summary.
512 /// \c UseCS indicates whether to use the context-sensitive summary.
513 ProfileSummary
&getSummary(bool UseCS
) {
515 assert(CS_Summary
&& "No context sensitive summary");
516 return *(CS_Summary
.get());
518 assert(Summary
&& "No profile summary");
519 return *(Summary
.get());
524 } // end namespace llvm
526 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H