1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Passes/BinaryPasses.h"
18 #include "bolt/Profile/BoltAddressTranslation.h"
19 #include "bolt/Profile/Heatmap.h"
20 #include "bolt/Profile/YAMLProfileWriter.h"
21 #include "bolt/Utils/CommandLineOpts.h"
22 #include "bolt/Utils/Utils.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/Errc.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Program.h"
32 #include "llvm/Support/Regex.h"
33 #include "llvm/Support/Timer.h"
34 #include "llvm/Support/raw_ostream.h"
37 #include <unordered_map>
40 #define DEBUG_TYPE "aggregator"
48 BasicAggregation("nl",
49 cl::desc("aggregate basic samples (without LBR info)"),
50 cl::cat(AggregatorCategory
));
52 static cl::opt
<std::string
>
53 ITraceAggregation("itrace",
54 cl::desc("Generate LBR info with perf itrace argument"),
55 cl::cat(AggregatorCategory
));
58 FilterMemProfile("filter-mem-profile",
59 cl::desc("if processing a memory profile, filter out stack or heap accesses "
60 "that won't be useful for BOLT to reduce profile file size"),
62 cl::cat(AggregatorCategory
));
64 static cl::opt
<unsigned long long>
66 cl::desc("only use samples from process with specified PID"),
69 cl::cat(AggregatorCategory
));
72 IgnoreBuildID("ignore-build-id",
73 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
75 cl::cat(AggregatorCategory
));
77 static cl::opt
<bool> IgnoreInterruptLBR(
78 "ignore-interrupt-lbr",
79 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
80 cl::init(true), cl::cat(AggregatorCategory
));
82 static cl::opt
<unsigned long long>
83 MaxSamples("max-samples",
85 cl::desc("maximum number of samples to read from LBR profile"),
88 cl::cat(AggregatorCategory
));
90 extern cl::opt
<opts::ProfileFormatKind
> ProfileFormat
;
91 extern cl::opt
<bool> ProfileUsePseudoProbes
;
92 extern cl::opt
<std::string
> SaveProfile
;
94 cl::opt
<bool> ReadPreAggregated(
95 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
96 cl::cat(AggregatorCategory
));
99 TimeAggregator("time-aggr",
100 cl::desc("time BOLT aggregator"),
103 cl::cat(AggregatorCategory
));
106 UseEventPC("use-event-pc",
107 cl::desc("use event PC in combination with LBR sampling"),
108 cl::cat(AggregatorCategory
));
110 static cl::opt
<bool> WriteAutoFDOData(
111 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
112 cl::cat(AggregatorCategory
));
118 const char TimerGroupName
[] = "aggregator";
119 const char TimerGroupDesc
[] = "Aggregator";
121 std::vector
<SectionNameAndRange
> getTextSections(const BinaryContext
*BC
) {
122 std::vector
<SectionNameAndRange
> sections
;
123 for (BinarySection
&Section
: BC
->sections()) {
124 if (!Section
.isText())
126 if (Section
.getSize() == 0)
129 {Section
.getName(), Section
.getAddress(), Section
.getEndAddress()});
132 [](const SectionNameAndRange
&A
, const SectionNameAndRange
&B
) {
133 return A
.BeginAddress
< B
.BeginAddress
;
139 constexpr uint64_t DataAggregator::KernelBaseAddr
;
141 DataAggregator::~DataAggregator() { deleteTempFiles(); }
144 void deleteTempFile(const std::string
&FileName
) {
145 if (std::error_code Errc
= sys::fs::remove(FileName
.c_str()))
146 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
147 << " with error " << Errc
.message() << "\n";
151 void DataAggregator::deleteTempFiles() {
152 for (std::string
&FileName
: TempFiles
)
153 deleteTempFile(FileName
);
157 void DataAggregator::findPerfExecutable() {
158 std::optional
<std::string
> PerfExecutable
=
159 sys::Process::FindInEnvPath("PATH", "perf");
160 if (!PerfExecutable
) {
161 outs() << "PERF2BOLT: No perf executable found!\n";
164 PerfPath
= *PerfExecutable
;
167 void DataAggregator::start() {
168 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename
<< "\n";
170 // Don't launch perf for pre-aggregated files
171 if (opts::ReadPreAggregated
)
174 findPerfExecutable();
176 if (opts::BasicAggregation
) {
177 launchPerfProcess("events without LBR",
179 "script -F pid,event,ip",
181 } else if (!opts::ITraceAggregation
.empty()) {
182 std::string ItracePerfScriptArgs
= llvm::formatv(
183 "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation
);
184 launchPerfProcess("branch events with itrace", MainEventsPPI
,
185 ItracePerfScriptArgs
.c_str(),
188 launchPerfProcess("branch events",
190 "script -F pid,ip,brstack",
194 // Note: we launch script for mem events regardless of the option, as the
195 // command fails fairly fast if mem events were not collected.
196 launchPerfProcess("mem events",
198 "script -F pid,event,addr,ip",
201 launchPerfProcess("process events", MMapEventsPPI
,
202 "script --show-mmap-events --no-itrace",
205 launchPerfProcess("task events", TaskEventsPPI
,
206 "script --show-task-events --no-itrace",
210 void DataAggregator::abort() {
211 if (opts::ReadPreAggregated
)
216 // Kill subprocesses in case they are not finished
217 sys::Wait(TaskEventsPPI
.PI
, 1, &Error
);
218 sys::Wait(MMapEventsPPI
.PI
, 1, &Error
);
219 sys::Wait(MainEventsPPI
.PI
, 1, &Error
);
220 sys::Wait(MemEventsPPI
.PI
, 1, &Error
);
227 void DataAggregator::launchPerfProcess(StringRef Name
, PerfProcessInfo
&PPI
,
228 const char *ArgsString
, bool Wait
) {
229 SmallVector
<StringRef
, 4> Argv
;
231 outs() << "PERF2BOLT: spawning perf job to read " << Name
<< '\n';
232 Argv
.push_back(PerfPath
.data());
234 StringRef(ArgsString
).split(Argv
, ' ');
235 Argv
.push_back("-f");
236 Argv
.push_back("-i");
237 Argv
.push_back(Filename
.c_str());
239 if (std::error_code Errc
=
240 sys::fs::createTemporaryFile("perf.script", "out", PPI
.StdoutPath
)) {
241 errs() << "PERF2BOLT: failed to create temporary file " << PPI
.StdoutPath
242 << " with error " << Errc
.message() << "\n";
245 TempFiles
.push_back(PPI
.StdoutPath
.data());
247 if (std::error_code Errc
=
248 sys::fs::createTemporaryFile("perf.script", "err", PPI
.StderrPath
)) {
249 errs() << "PERF2BOLT: failed to create temporary file " << PPI
.StderrPath
250 << " with error " << Errc
.message() << "\n";
253 TempFiles
.push_back(PPI
.StderrPath
.data());
255 std::optional
<StringRef
> Redirects
[] = {
256 std::nullopt
, // Stdin
257 StringRef(PPI
.StdoutPath
.data()), // Stdout
258 StringRef(PPI
.StderrPath
.data())}; // Stderr
261 dbgs() << "Launching perf: ";
262 for (StringRef Arg
: Argv
)
263 dbgs() << Arg
<< " ";
264 dbgs() << " 1> " << PPI
.StdoutPath
.data() << " 2> " << PPI
.StderrPath
.data()
269 PPI
.PI
.ReturnCode
= sys::ExecuteAndWait(PerfPath
.data(), Argv
,
270 /*envp*/ std::nullopt
, Redirects
);
272 PPI
.PI
= sys::ExecuteNoWait(PerfPath
.data(), Argv
, /*envp*/ std::nullopt
,
276 void DataAggregator::processFileBuildID(StringRef FileBuildID
) {
277 PerfProcessInfo BuildIDProcessInfo
;
278 launchPerfProcess("buildid list",
283 if (BuildIDProcessInfo
.PI
.ReturnCode
!= 0) {
284 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
285 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo
.StderrPath
.data());
286 StringRef ErrBuf
= (*MB
)->getBuffer();
288 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo
.PI
.ReturnCode
294 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
295 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo
.StdoutPath
.data());
296 if (std::error_code EC
= MB
.getError()) {
297 errs() << "Cannot open " << BuildIDProcessInfo
.StdoutPath
.data() << ": "
298 << EC
.message() << "\n";
302 FileBuf
= std::move(*MB
);
303 ParsingBuf
= FileBuf
->getBuffer();
305 std::optional
<StringRef
> FileName
= getFileNameForBuildID(FileBuildID
);
307 if (hasAllBuildIDs()) {
308 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
309 "This indicates the input binary supplied for data aggregation "
310 "is not the same recorded by perf when collecting profiling "
311 "data, or there were no samples recorded for the binary. "
312 "Use -ignore-build-id option to override.\n";
313 if (!opts::IgnoreBuildID
)
316 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
317 "data was recorded without it\n";
320 } else if (*FileName
!= llvm::sys::path::filename(BC
->getFilename())) {
321 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
322 BuildIDBinaryName
= std::string(*FileName
);
324 outs() << "PERF2BOLT: matched build-id and file name\n";
328 bool DataAggregator::checkPerfDataMagic(StringRef FileName
) {
329 if (opts::ReadPreAggregated
)
332 Expected
<sys::fs::file_t
> FD
= sys::fs::openNativeFileForRead(FileName
);
334 consumeError(FD
.takeError());
338 char Buf
[7] = {0, 0, 0, 0, 0, 0, 0};
340 auto Close
= make_scope_exit([&] { sys::fs::closeFile(*FD
); });
341 Expected
<size_t> BytesRead
= sys::fs::readNativeFileSlice(
342 *FD
, MutableArrayRef(Buf
, sizeof(Buf
)), 0);
344 consumeError(BytesRead
.takeError());
351 if (strncmp(Buf
, "PERFILE", 7) == 0)
356 void DataAggregator::parsePreAggregated() {
359 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
360 MemoryBuffer::getFileOrSTDIN(Filename
);
361 if (std::error_code EC
= MB
.getError()) {
362 errs() << "PERF2BOLT-ERROR: cannot open " << Filename
<< ": "
363 << EC
.message() << "\n";
367 FileBuf
= std::move(*MB
);
368 ParsingBuf
= FileBuf
->getBuffer();
371 if (parsePreAggregatedLBRSamples()) {
372 errs() << "PERF2BOLT: failed to parse samples\n";
377 std::error_code
DataAggregator::writeAutoFDOData(StringRef OutputFilename
) {
378 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
379 NamedRegionTimer
T("writeAutoFDO", "Processing branch events", TimerGroupName
,
380 TimerGroupDesc
, opts::TimeAggregator
);
383 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
388 // number of unique traces
389 // from_1-to_1:count_1
390 // from_2-to_2:count_2
392 // from_n-to_n:count_n
393 // number of unique sample addresses
398 // number of unique LBR entries
399 // src_1->dst_1:count_1
400 // src_2->dst_2:count_2
402 // src_n->dst_n:count_n
404 const uint64_t FirstAllocAddress
= this->BC
->FirstAllocAddress
;
406 // AutoFDO addresses are relative to the first allocated loadable program
408 auto filterAddress
= [&FirstAllocAddress
](uint64_t Address
) -> uint64_t {
409 if (Address
< FirstAllocAddress
)
411 return Address
- FirstAllocAddress
;
414 OutFile
<< FallthroughLBRs
.size() << "\n";
415 for (const auto &[Trace
, Info
] : FallthroughLBRs
) {
416 OutFile
<< formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace
.From
),
417 filterAddress(Trace
.To
),
418 Info
.InternCount
+ Info
.ExternCount
);
421 OutFile
<< BasicSamples
.size() << "\n";
422 for (const auto [PC
, HitCount
] : BasicSamples
)
423 OutFile
<< formatv("{0:x-}:{1}\n", filterAddress(PC
), HitCount
);
425 OutFile
<< BranchLBRs
.size() << "\n";
426 for (const auto &[Trace
, Info
] : BranchLBRs
) {
427 OutFile
<< formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace
.From
),
428 filterAddress(Trace
.To
), Info
.TakenCount
);
431 outs() << "PERF2BOLT: wrote " << FallthroughLBRs
.size() << " unique traces, "
432 << BasicSamples
.size() << " sample addresses and " << BranchLBRs
.size()
433 << " unique branches to " << OutputFilename
<< "\n";
435 return std::error_code();
438 void DataAggregator::filterBinaryMMapInfo() {
439 if (opts::FilterPID
) {
440 auto MMapInfoIter
= BinaryMMapInfo
.find(opts::FilterPID
);
441 if (MMapInfoIter
!= BinaryMMapInfo
.end()) {
442 MMapInfo MMap
= MMapInfoIter
->second
;
443 BinaryMMapInfo
.clear();
444 BinaryMMapInfo
.insert(std::make_pair(MMap
.PID
, MMap
));
446 if (errs().has_colors())
447 errs().changeColor(raw_ostream::RED
);
448 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
449 << opts::FilterPID
<< "\""
450 << " for binary \"" << BC
->getFilename() << "\".";
451 assert(!BinaryMMapInfo
.empty() && "No memory map for matching binary");
452 errs() << " Profile for the following process is available:\n";
453 for (std::pair
<const uint64_t, MMapInfo
> &MMI
: BinaryMMapInfo
)
454 outs() << " " << MMI
.second
.PID
455 << (MMI
.second
.Forked
? " (forked)\n" : "\n");
457 if (errs().has_colors())
465 int DataAggregator::prepareToParse(StringRef Name
, PerfProcessInfo
&Process
,
466 PerfProcessErrorCallbackTy Callback
) {
468 outs() << "PERF2BOLT: waiting for perf " << Name
469 << " collection to finish...\n";
470 sys::ProcessInfo PI
= sys::Wait(Process
.PI
, std::nullopt
, &Error
);
472 if (!Error
.empty()) {
473 errs() << "PERF-ERROR: " << PerfPath
<< ": " << Error
<< "\n";
478 if (PI
.ReturnCode
!= 0) {
479 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> ErrorMB
=
480 MemoryBuffer::getFileOrSTDIN(Process
.StderrPath
.data());
481 StringRef ErrBuf
= (*ErrorMB
)->getBuffer();
484 Callback(PI
.ReturnCode
, ErrBuf
);
485 return PI
.ReturnCode
;
488 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
489 MemoryBuffer::getFileOrSTDIN(Process
.StdoutPath
.data());
490 if (std::error_code EC
= MB
.getError()) {
491 errs() << "Cannot open " << Process
.StdoutPath
.data() << ": "
492 << EC
.message() << "\n";
497 FileBuf
= std::move(*MB
);
498 ParsingBuf
= FileBuf
->getBuffer();
501 return PI
.ReturnCode
;
504 Error
DataAggregator::preprocessProfile(BinaryContext
&BC
) {
507 if (opts::ReadPreAggregated
) {
508 parsePreAggregated();
509 return Error::success();
512 if (std::optional
<StringRef
> FileBuildID
= BC
.getFileBuildID()) {
513 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID
<< "\n";
514 processFileBuildID(*FileBuildID
);
516 errs() << "BOLT-WARNING: build-id will not be checked because we could "
517 "not read one from input binary\n";
520 auto ErrorCallback
= [](int ReturnCode
, StringRef ErrBuf
) {
521 errs() << "PERF-ERROR: return code " << ReturnCode
<< "\n" << ErrBuf
;
525 auto MemEventsErrorCallback
= [&](int ReturnCode
, StringRef ErrBuf
) {
526 Regex
NoData("Samples for '.*' event do not have ADDR attribute set. "
527 "Cannot print 'addr' field.");
528 if (!NoData
.match(ErrBuf
))
529 ErrorCallback(ReturnCode
, ErrBuf
);
532 if (BC
.IsLinuxKernel
) {
533 // Current MMap parsing logic does not work with linux kernel.
534 // MMap entries for linux kernel uses PERF_RECORD_MMAP
535 // format instead of typical PERF_RECORD_MMAP2 format.
536 // Since linux kernel address mapping is absolute (same as
537 // in the ELF file), we avoid parsing MMap in linux kernel mode.
538 // While generating optimized linux kernel binary, we may need
539 // to parse MMap entries.
541 // In linux kernel mode, we analyze and optimize
542 // all linux kernel binary instructions, irrespective
543 // of whether they are due to system calls or due to
544 // interrupts. Therefore, we cannot ignore interrupt
545 // in Linux kernel mode.
546 opts::IgnoreInterruptLBR
= false;
548 prepareToParse("mmap events", MMapEventsPPI
, ErrorCallback
);
549 if (parseMMapEvents())
550 errs() << "PERF2BOLT: failed to parse mmap events\n";
553 prepareToParse("task events", TaskEventsPPI
, ErrorCallback
);
554 if (parseTaskEvents())
555 errs() << "PERF2BOLT: failed to parse task events\n";
557 filterBinaryMMapInfo();
558 prepareToParse("events", MainEventsPPI
, ErrorCallback
);
560 if (opts::HeatmapMode
) {
561 if (std::error_code EC
= printLBRHeatMap()) {
562 errs() << "ERROR: failed to print heat map: " << EC
.message() << '\n';
568 if ((!opts::BasicAggregation
&& parseBranchEvents()) ||
569 (opts::BasicAggregation
&& parseBasicEvents()))
570 errs() << "PERF2BOLT: failed to parse samples\n";
572 // We can finish early if the goal is just to generate data for autofdo
573 if (opts::WriteAutoFDOData
) {
574 if (std::error_code EC
= writeAutoFDOData(opts::OutputFilename
))
575 errs() << "Error writing autofdo data to file: " << EC
.message() << "\n";
581 // Special handling for memory events
582 if (prepareToParse("mem events", MemEventsPPI
, MemEventsErrorCallback
))
583 return Error::success();
585 if (const std::error_code EC
= parseMemEvents())
586 errs() << "PERF2BOLT: failed to parse memory events: " << EC
.message()
591 return Error::success();
594 Error
DataAggregator::readProfile(BinaryContext
&BC
) {
597 for (auto &BFI
: BC
.getBinaryFunctions()) {
598 BinaryFunction
&Function
= BFI
.second
;
599 convertBranchData(Function
);
602 if (opts::AggregateOnly
) {
603 if (opts::ProfileFormat
== opts::ProfileFormatKind::PF_Fdata
)
604 if (std::error_code EC
= writeAggregatedFile(opts::OutputFilename
))
605 report_error("cannot create output data file", EC
);
607 // BAT YAML is handled by DataAggregator since normal YAML output requires
608 // CFG which is not available in BAT mode.
610 if (opts::ProfileFormat
== opts::ProfileFormatKind::PF_YAML
)
611 if (std::error_code EC
= writeBATYAML(BC
, opts::OutputFilename
))
612 report_error("cannot create output data file", EC
);
613 if (!opts::SaveProfile
.empty())
614 if (std::error_code EC
= writeBATYAML(BC
, opts::SaveProfile
))
615 report_error("cannot create output data file", EC
);
619 return Error::success();
622 bool DataAggregator::mayHaveProfileData(const BinaryFunction
&Function
) {
623 return Function
.hasProfileAvailable();
626 void DataAggregator::processProfile(BinaryContext
&BC
) {
627 if (opts::ReadPreAggregated
)
628 processPreAggregated();
629 else if (opts::BasicAggregation
)
630 processBasicEvents();
632 processBranchEvents();
636 // Mark all functions with registered events as having a valid profile.
637 const auto Flags
= opts::BasicAggregation
? BinaryFunction::PF_SAMPLE
638 : BinaryFunction::PF_LBR
;
639 for (auto &BFI
: BC
.getBinaryFunctions()) {
640 BinaryFunction
&BF
= BFI
.second
;
641 if (getBranchData(BF
) || getFuncSampleData(BF
.getNames()))
642 BF
.markProfiled(Flags
);
645 for (auto &FuncBranches
: NamesToBranches
)
646 llvm::stable_sort(FuncBranches
.second
.Data
);
648 for (auto &MemEvents
: NamesToMemEvents
)
649 llvm::stable_sort(MemEvents
.second
.Data
);
651 // Release intermediate storage.
653 clear(FallthroughLBRs
);
654 clear(AggregatedLBRs
);
660 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address
) const {
661 if (!BC
->containsAddress(Address
))
664 return BC
->getBinaryFunctionContainingAddress(Address
, /*CheckPastEnd=*/false,
665 /*UseMaxSize=*/true);
669 DataAggregator::getBATParentFunction(const BinaryFunction
&Func
) const {
671 if (const uint64_t HotAddr
= BAT
->fetchParentAddress(Func
.getAddress()))
672 return getBinaryFunctionContainingAddress(HotAddr
);
676 StringRef
DataAggregator::getLocationName(const BinaryFunction
&Func
,
679 return Func
.getOneName();
681 const BinaryFunction
*OrigFunc
= &Func
;
682 // If it is a local function, prefer the name containing the file name where
683 // the local function was declared
684 for (StringRef AlternativeName
: OrigFunc
->getNames()) {
685 size_t FileNameIdx
= AlternativeName
.find('/');
686 // Confirm the alternative name has the pattern Symbol/FileName/1 before
688 if (FileNameIdx
== StringRef::npos
||
689 AlternativeName
.find('/', FileNameIdx
+ 1) == StringRef::npos
)
691 return AlternativeName
;
693 return OrigFunc
->getOneName();
696 bool DataAggregator::doSample(BinaryFunction
&OrigFunc
, uint64_t Address
,
698 BinaryFunction
*ParentFunc
= getBATParentFunction(OrigFunc
);
699 BinaryFunction
&Func
= ParentFunc
? *ParentFunc
: OrigFunc
;
701 NumColdSamples
+= Count
;
703 auto I
= NamesToSamples
.find(Func
.getOneName());
704 if (I
== NamesToSamples
.end()) {
706 StringRef LocName
= getLocationName(Func
, BAT
);
707 std::tie(I
, Success
) = NamesToSamples
.insert(
708 std::make_pair(Func
.getOneName(),
709 FuncSampleData(LocName
, FuncSampleData::ContainerTy())));
712 Address
-= Func
.getAddress();
714 Address
= BAT
->translate(Func
.getAddress(), Address
, /*IsBranchSrc=*/false);
716 I
->second
.bumpCount(Address
, Count
);
720 bool DataAggregator::doIntraBranch(BinaryFunction
&Func
, uint64_t From
,
721 uint64_t To
, uint64_t Count
,
723 FuncBranchData
*AggrData
= getBranchData(Func
);
725 AggrData
= &NamesToBranches
[Func
.getOneName()];
726 AggrData
->Name
= getLocationName(Func
, BAT
);
727 setBranchData(Func
, AggrData
);
730 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
731 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func
, From
, To
));
732 AggrData
->bumpBranchCount(From
, To
, Count
, Mispreds
);
736 bool DataAggregator::doInterBranch(BinaryFunction
*FromFunc
,
737 BinaryFunction
*ToFunc
, uint64_t From
,
738 uint64_t To
, uint64_t Count
,
740 FuncBranchData
*FromAggrData
= nullptr;
741 FuncBranchData
*ToAggrData
= nullptr;
745 SrcFunc
= getLocationName(*FromFunc
, BAT
);
746 FromAggrData
= getBranchData(*FromFunc
);
748 FromAggrData
= &NamesToBranches
[FromFunc
->getOneName()];
749 FromAggrData
->Name
= SrcFunc
;
750 setBranchData(*FromFunc
, FromAggrData
);
753 recordExit(*FromFunc
, From
, Mispreds
, Count
);
756 DstFunc
= getLocationName(*ToFunc
, BAT
);
757 ToAggrData
= getBranchData(*ToFunc
);
759 ToAggrData
= &NamesToBranches
[ToFunc
->getOneName()];
760 ToAggrData
->Name
= DstFunc
;
761 setBranchData(*ToFunc
, ToAggrData
);
764 recordEntry(*ToFunc
, To
, Mispreds
, Count
);
768 FromAggrData
->bumpCallCount(From
, Location(!DstFunc
.empty(), DstFunc
, To
),
771 ToAggrData
->bumpEntryCount(Location(!SrcFunc
.empty(), SrcFunc
, From
), To
,
776 bool DataAggregator::doBranch(uint64_t From
, uint64_t To
, uint64_t Count
,
778 bool IsReturn
= false;
779 auto handleAddress
= [&](uint64_t &Addr
, bool IsFrom
) -> BinaryFunction
* {
780 if (BinaryFunction
*Func
= getBinaryFunctionContainingAddress(Addr
)) {
781 Addr
-= Func
->getAddress();
783 auto checkReturn
= [&](auto MaybeInst
) {
784 IsReturn
= MaybeInst
&& BC
->MIB
->isReturn(*MaybeInst
);
786 if (Func
->hasInstructions())
787 checkReturn(Func
->getInstructionAtOffset(Addr
));
789 checkReturn(Func
->disassembleInstructionAtOffset(Addr
));
793 Addr
= BAT
->translate(Func
->getAddress(), Addr
, IsFrom
);
795 if (BinaryFunction
*ParentFunc
= getBATParentFunction(*Func
)) {
798 NumColdSamples
+= Count
;
806 BinaryFunction
*FromFunc
= handleAddress(From
, /*IsFrom=*/true);
810 BinaryFunction
*ToFunc
= handleAddress(To
, /*IsFrom=*/false);
811 if (!FromFunc
&& !ToFunc
)
814 // Treat recursive control transfers as inter-branches.
815 if (FromFunc
== ToFunc
&& To
!= 0) {
816 recordBranch(*FromFunc
, From
, To
, Count
, Mispreds
);
817 return doIntraBranch(*FromFunc
, From
, To
, Count
, Mispreds
);
820 return doInterBranch(FromFunc
, ToFunc
, From
, To
, Count
, Mispreds
);
823 bool DataAggregator::doTrace(const LBREntry
&First
, const LBREntry
&Second
,
825 BinaryFunction
*FromFunc
= getBinaryFunctionContainingAddress(First
.To
);
826 BinaryFunction
*ToFunc
= getBinaryFunctionContainingAddress(Second
.From
);
827 if (!FromFunc
|| !ToFunc
) {
829 dbgs() << "Out of range trace starting in " << FromFunc
->getPrintName()
830 << formatv(" @ {0:x}", First
.To
- FromFunc
->getAddress())
831 << " and ending in " << ToFunc
->getPrintName()
832 << formatv(" @ {0:x}\n", Second
.From
- ToFunc
->getAddress());
834 NumLongRangeTraces
+= Count
;
837 if (FromFunc
!= ToFunc
) {
838 NumInvalidTraces
+= Count
;
840 dbgs() << "Invalid trace starting in " << FromFunc
->getPrintName()
841 << formatv(" @ {0:x}", First
.To
- FromFunc
->getAddress())
842 << " and ending in " << ToFunc
->getPrintName()
843 << formatv(" @ {0:x}\n", Second
.From
- ToFunc
->getAddress());
848 std::optional
<BoltAddressTranslation::FallthroughListTy
> FTs
=
849 BAT
? BAT
->getFallthroughsInTrace(FromFunc
->getAddress(), First
.To
,
851 : getFallthroughsInTrace(*FromFunc
, First
, Second
, Count
);
854 dbgs() << "Invalid trace starting in " << FromFunc
->getPrintName()
855 << " @ " << Twine::utohexstr(First
.To
- FromFunc
->getAddress())
856 << " and ending in " << ToFunc
->getPrintName() << " @ "
857 << ToFunc
->getPrintName() << " @ "
858 << Twine::utohexstr(Second
.From
- ToFunc
->getAddress()) << '\n');
859 NumInvalidTraces
+= Count
;
863 LLVM_DEBUG(dbgs() << "Processing " << FTs
->size() << " fallthroughs for "
864 << FromFunc
->getPrintName() << ":"
865 << Twine::utohexstr(First
.To
) << " to "
866 << Twine::utohexstr(Second
.From
) << ".\n");
867 BinaryFunction
*ParentFunc
= getBATParentFunction(*FromFunc
);
868 for (auto [From
, To
] : *FTs
) {
870 From
= BAT
->translate(FromFunc
->getAddress(), From
, /*IsBranchSrc=*/true);
871 To
= BAT
->translate(FromFunc
->getAddress(), To
, /*IsBranchSrc=*/false);
873 doIntraBranch(ParentFunc
? *ParentFunc
: *FromFunc
, From
, To
, Count
, false);
879 std::optional
<SmallVector
<std::pair
<uint64_t, uint64_t>, 16>>
880 DataAggregator::getFallthroughsInTrace(BinaryFunction
&BF
,
881 const LBREntry
&FirstLBR
,
882 const LBREntry
&SecondLBR
,
883 uint64_t Count
) const {
884 SmallVector
<std::pair
<uint64_t, uint64_t>, 16> Branches
;
886 BinaryContext
&BC
= BF
.getBinaryContext();
891 assert(BF
.hasCFG() && "can only record traces in CFG state");
893 // Offsets of the trace within this function.
894 const uint64_t From
= FirstLBR
.To
- BF
.getAddress();
895 const uint64_t To
= SecondLBR
.From
- BF
.getAddress();
900 const BinaryBasicBlock
*FromBB
= BF
.getBasicBlockContainingOffset(From
);
901 const BinaryBasicBlock
*ToBB
= BF
.getBasicBlockContainingOffset(To
);
903 if (!FromBB
|| !ToBB
)
906 // Adjust FromBB if the first LBR is a return from the last instruction in
907 // the previous block (that instruction should be a call).
908 if (From
== FromBB
->getOffset() && !BF
.containsAddress(FirstLBR
.From
) &&
909 !FromBB
->isEntryPoint() && !FromBB
->isLandingPad()) {
910 const BinaryBasicBlock
*PrevBB
=
911 BF
.getLayout().getBlock(FromBB
->getIndex() - 1);
912 if (PrevBB
->getSuccessor(FromBB
->getLabel())) {
913 const MCInst
*Instr
= PrevBB
->getLastNonPseudoInstr();
914 if (Instr
&& BC
.MIB
->isCall(*Instr
))
917 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
920 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR
<< '\n');
924 // Fill out information for fall-through edges. The From and To could be
925 // within the same basic block, e.g. when two call instructions are in the
926 // same block. In this case we skip the processing.
930 // Process blocks in the original layout order.
931 BinaryBasicBlock
*BB
= BF
.getLayout().getBlock(FromBB
->getIndex());
932 assert(BB
== FromBB
&& "index mismatch");
934 BinaryBasicBlock
*NextBB
= BF
.getLayout().getBlock(BB
->getIndex() + 1);
935 assert((NextBB
&& NextBB
->getOffset() > BB
->getOffset()) && "bad layout");
937 // Check for bad LBRs.
938 if (!BB
->getSuccessor(NextBB
->getLabel())) {
939 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
940 << " " << FirstLBR
<< '\n'
941 << " " << SecondLBR
<< '\n');
945 const MCInst
*Instr
= BB
->getLastNonPseudoInstr();
948 Offset
= BC
.MIB
->getOffsetWithDefault(*Instr
, 0);
950 Offset
= BB
->getOffset();
952 Branches
.emplace_back(Offset
, NextBB
->getOffset());
957 // Record fall-through jumps
958 for (const auto &[FromOffset
, ToOffset
] : Branches
) {
959 BinaryBasicBlock
*FromBB
= BF
.getBasicBlockContainingOffset(FromOffset
);
960 BinaryBasicBlock
*ToBB
= BF
.getBasicBlockAtOffset(ToOffset
);
961 assert(FromBB
&& ToBB
);
962 BinaryBasicBlock::BinaryBranchInfo
&BI
= FromBB
->getBranchInfo(*ToBB
);
969 bool DataAggregator::recordEntry(BinaryFunction
&BF
, uint64_t To
, bool Mispred
,
970 uint64_t Count
) const {
971 if (To
> BF
.getSize())
974 if (!BF
.hasProfile())
975 BF
.ExecutionCount
= 0;
977 BinaryBasicBlock
*EntryBB
= nullptr;
979 BF
.ExecutionCount
+= Count
;
981 EntryBB
= &BF
.front();
982 } else if (BinaryBasicBlock
*BB
= BF
.getBasicBlockAtOffset(To
)) {
983 if (BB
->isEntryPoint())
988 EntryBB
->setExecutionCount(EntryBB
->getKnownExecutionCount() + Count
);
993 bool DataAggregator::recordExit(BinaryFunction
&BF
, uint64_t From
, bool Mispred
,
994 uint64_t Count
) const {
995 if (!BF
.isSimple() || From
> BF
.getSize())
998 if (!BF
.hasProfile())
999 BF
.ExecutionCount
= 0;
1004 ErrorOr
<LBREntry
> DataAggregator::parseLBREntry() {
1006 ErrorOr
<StringRef
> FromStrRes
= parseString('/');
1007 if (std::error_code EC
= FromStrRes
.getError())
1009 StringRef OffsetStr
= FromStrRes
.get();
1010 if (OffsetStr
.getAsInteger(0, Res
.From
)) {
1011 reportError("expected hexadecimal number with From address");
1012 Diag
<< "Found: " << OffsetStr
<< "\n";
1013 return make_error_code(llvm::errc::io_error
);
1016 ErrorOr
<StringRef
> ToStrRes
= parseString('/');
1017 if (std::error_code EC
= ToStrRes
.getError())
1019 OffsetStr
= ToStrRes
.get();
1020 if (OffsetStr
.getAsInteger(0, Res
.To
)) {
1021 reportError("expected hexadecimal number with To address");
1022 Diag
<< "Found: " << OffsetStr
<< "\n";
1023 return make_error_code(llvm::errc::io_error
);
1026 ErrorOr
<StringRef
> MispredStrRes
= parseString('/');
1027 if (std::error_code EC
= MispredStrRes
.getError())
1029 StringRef MispredStr
= MispredStrRes
.get();
1030 if (MispredStr
.size() != 1 ||
1031 (MispredStr
[0] != 'P' && MispredStr
[0] != 'M' && MispredStr
[0] != '-')) {
1032 reportError("expected single char for mispred bit");
1033 Diag
<< "Found: " << MispredStr
<< "\n";
1034 return make_error_code(llvm::errc::io_error
);
1036 Res
.Mispred
= MispredStr
[0] == 'M';
1038 static bool MispredWarning
= true;
1039 if (MispredStr
[0] == '-' && MispredWarning
) {
1040 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1041 MispredWarning
= false;
1044 ErrorOr
<StringRef
> Rest
= parseString(FieldSeparator
, true);
1045 if (std::error_code EC
= Rest
.getError())
1047 if (Rest
.get().size() < 5) {
1048 reportError("expected rest of LBR entry");
1049 Diag
<< "Found: " << Rest
.get() << "\n";
1050 return make_error_code(llvm::errc::io_error
);
1055 bool DataAggregator::checkAndConsumeFS() {
1056 if (ParsingBuf
[0] != FieldSeparator
)
1059 ParsingBuf
= ParsingBuf
.drop_front(1);
1064 void DataAggregator::consumeRestOfLine() {
1065 size_t LineEnd
= ParsingBuf
.find_first_of('\n');
1066 if (LineEnd
== StringRef::npos
) {
1067 ParsingBuf
= StringRef();
1072 ParsingBuf
= ParsingBuf
.drop_front(LineEnd
+ 1);
1077 bool DataAggregator::checkNewLine() {
1078 return ParsingBuf
[0] == '\n';
1081 ErrorOr
<DataAggregator::PerfBranchSample
> DataAggregator::parseBranchSample() {
1082 PerfBranchSample Res
;
1084 while (checkAndConsumeFS()) {
1087 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1088 if (std::error_code EC
= PIDRes
.getError())
1090 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1091 if (!BC
->IsLinuxKernel
&& MMapInfoIter
== BinaryMMapInfo
.end()) {
1092 consumeRestOfLine();
1093 return make_error_code(errc::no_such_process
);
1096 while (checkAndConsumeFS()) {
1099 ErrorOr
<uint64_t> PCRes
= parseHexField(FieldSeparator
, true);
1100 if (std::error_code EC
= PCRes
.getError())
1102 Res
.PC
= PCRes
.get();
1104 if (checkAndConsumeNewLine())
1107 while (!checkAndConsumeNewLine()) {
1108 checkAndConsumeFS();
1110 ErrorOr
<LBREntry
> LBRRes
= parseLBREntry();
1111 if (std::error_code EC
= LBRRes
.getError())
1113 LBREntry LBR
= LBRRes
.get();
1114 if (ignoreKernelInterrupt(LBR
))
1116 if (!BC
->HasFixedLoadAddress
)
1117 adjustLBR(LBR
, MMapInfoIter
->second
);
1118 Res
.LBR
.push_back(LBR
);
1124 ErrorOr
<DataAggregator::PerfBasicSample
> DataAggregator::parseBasicSample() {
1125 while (checkAndConsumeFS()) {
1128 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1129 if (std::error_code EC
= PIDRes
.getError())
1132 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1133 if (MMapInfoIter
== BinaryMMapInfo
.end()) {
1134 consumeRestOfLine();
1135 return PerfBasicSample
{StringRef(), 0};
1138 while (checkAndConsumeFS()) {
1141 ErrorOr
<StringRef
> Event
= parseString(FieldSeparator
);
1142 if (std::error_code EC
= Event
.getError())
1145 while (checkAndConsumeFS()) {
1148 ErrorOr
<uint64_t> AddrRes
= parseHexField(FieldSeparator
, true);
1149 if (std::error_code EC
= AddrRes
.getError())
1152 if (!checkAndConsumeNewLine()) {
1153 reportError("expected end of line");
1154 return make_error_code(llvm::errc::io_error
);
1157 uint64_t Address
= *AddrRes
;
1158 if (!BC
->HasFixedLoadAddress
)
1159 adjustAddress(Address
, MMapInfoIter
->second
);
1161 return PerfBasicSample
{Event
.get(), Address
};
1164 ErrorOr
<DataAggregator::PerfMemSample
> DataAggregator::parseMemSample() {
1165 PerfMemSample Res
{0, 0};
1167 while (checkAndConsumeFS()) {
1170 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1171 if (std::error_code EC
= PIDRes
.getError())
1174 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1175 if (MMapInfoIter
== BinaryMMapInfo
.end()) {
1176 consumeRestOfLine();
1180 while (checkAndConsumeFS()) {
1183 ErrorOr
<StringRef
> Event
= parseString(FieldSeparator
);
1184 if (std::error_code EC
= Event
.getError())
1186 if (!Event
.get().contains("mem-loads")) {
1187 consumeRestOfLine();
1191 while (checkAndConsumeFS()) {
1194 ErrorOr
<uint64_t> AddrRes
= parseHexField(FieldSeparator
);
1195 if (std::error_code EC
= AddrRes
.getError())
1198 while (checkAndConsumeFS()) {
1201 ErrorOr
<uint64_t> PCRes
= parseHexField(FieldSeparator
, true);
1202 if (std::error_code EC
= PCRes
.getError()) {
1203 consumeRestOfLine();
1207 if (!checkAndConsumeNewLine()) {
1208 reportError("expected end of line");
1209 return make_error_code(llvm::errc::io_error
);
1212 uint64_t Address
= *AddrRes
;
1213 if (!BC
->HasFixedLoadAddress
)
1214 adjustAddress(Address
, MMapInfoIter
->second
);
1216 return PerfMemSample
{PCRes
.get(), Address
};
1219 ErrorOr
<Location
> DataAggregator::parseLocationOrOffset() {
1220 auto parseOffset
= [this]() -> ErrorOr
<Location
> {
1221 ErrorOr
<uint64_t> Res
= parseHexField(FieldSeparator
);
1222 if (std::error_code EC
= Res
.getError())
1224 return Location(Res
.get());
1227 size_t Sep
= ParsingBuf
.find_first_of(" \n");
1228 if (Sep
== StringRef::npos
)
1229 return parseOffset();
1230 StringRef LookAhead
= ParsingBuf
.substr(0, Sep
);
1231 if (!LookAhead
.contains(':'))
1232 return parseOffset();
1234 ErrorOr
<StringRef
> BuildID
= parseString(':');
1235 if (std::error_code EC
= BuildID
.getError())
1237 ErrorOr
<uint64_t> Offset
= parseHexField(FieldSeparator
);
1238 if (std::error_code EC
= Offset
.getError())
1240 return Location(true, BuildID
.get(), Offset
.get());
1243 ErrorOr
<DataAggregator::AggregatedLBREntry
>
1244 DataAggregator::parseAggregatedLBREntry() {
1245 while (checkAndConsumeFS()) {
1248 ErrorOr
<StringRef
> TypeOrErr
= parseString(FieldSeparator
);
1249 if (std::error_code EC
= TypeOrErr
.getError())
1251 auto Type
= AggregatedLBREntry::BRANCH
;
1252 if (TypeOrErr
.get() == "B") {
1253 Type
= AggregatedLBREntry::BRANCH
;
1254 } else if (TypeOrErr
.get() == "F") {
1255 Type
= AggregatedLBREntry::FT
;
1256 } else if (TypeOrErr
.get() == "f") {
1257 Type
= AggregatedLBREntry::FT_EXTERNAL_ORIGIN
;
1259 reportError("expected B, F or f");
1260 return make_error_code(llvm::errc::io_error
);
1263 while (checkAndConsumeFS()) {
1265 ErrorOr
<Location
> From
= parseLocationOrOffset();
1266 if (std::error_code EC
= From
.getError())
1269 while (checkAndConsumeFS()) {
1271 ErrorOr
<Location
> To
= parseLocationOrOffset();
1272 if (std::error_code EC
= To
.getError())
1275 while (checkAndConsumeFS()) {
1277 ErrorOr
<int64_t> Frequency
=
1278 parseNumberField(FieldSeparator
, Type
!= AggregatedLBREntry::BRANCH
);
1279 if (std::error_code EC
= Frequency
.getError())
1282 uint64_t Mispreds
= 0;
1283 if (Type
== AggregatedLBREntry::BRANCH
) {
1284 while (checkAndConsumeFS()) {
1286 ErrorOr
<int64_t> MispredsOrErr
= parseNumberField(FieldSeparator
, true);
1287 if (std::error_code EC
= MispredsOrErr
.getError())
1289 Mispreds
= static_cast<uint64_t>(MispredsOrErr
.get());
1292 if (!checkAndConsumeNewLine()) {
1293 reportError("expected end of line");
1294 return make_error_code(llvm::errc::io_error
);
1297 return AggregatedLBREntry
{From
.get(), To
.get(),
1298 static_cast<uint64_t>(Frequency
.get()), Mispreds
,
1302 bool DataAggregator::ignoreKernelInterrupt(LBREntry
&LBR
) const {
1303 return opts::IgnoreInterruptLBR
&&
1304 (LBR
.From
>= KernelBaseAddr
|| LBR
.To
>= KernelBaseAddr
);
1307 std::error_code
DataAggregator::printLBRHeatMap() {
1308 outs() << "PERF2BOLT: parse branch events...\n";
1309 NamedRegionTimer
T("parseBranch", "Parsing branch events", TimerGroupName
,
1310 TimerGroupDesc
, opts::TimeAggregator
);
1312 if (BC
->IsLinuxKernel
) {
1313 opts::HeatmapMaxAddress
= 0xffffffffffffffff;
1314 opts::HeatmapMinAddress
= KernelBaseAddr
;
1316 Heatmap
HM(opts::HeatmapBlock
, opts::HeatmapMinAddress
,
1317 opts::HeatmapMaxAddress
, getTextSections(BC
));
1318 uint64_t NumTotalSamples
= 0;
1320 if (opts::BasicAggregation
) {
1322 ErrorOr
<PerfBasicSample
> SampleRes
= parseBasicSample();
1323 if (std::error_code EC
= SampleRes
.getError()) {
1324 if (EC
== errc::no_such_process
)
1328 PerfBasicSample
&Sample
= SampleRes
.get();
1329 HM
.registerAddress(Sample
.PC
);
1332 outs() << "HEATMAP: read " << NumTotalSamples
<< " basic samples\n";
1335 ErrorOr
<PerfBranchSample
> SampleRes
= parseBranchSample();
1336 if (std::error_code EC
= SampleRes
.getError()) {
1337 if (EC
== errc::no_such_process
)
1342 PerfBranchSample
&Sample
= SampleRes
.get();
1344 // LBRs are stored in reverse execution order. NextLBR refers to the next
1345 // executed branch record.
1346 const LBREntry
*NextLBR
= nullptr;
1347 for (const LBREntry
&LBR
: Sample
.LBR
) {
1349 // Record fall-through trace.
1350 const uint64_t TraceFrom
= LBR
.To
;
1351 const uint64_t TraceTo
= NextLBR
->From
;
1352 ++FallthroughLBRs
[Trace(TraceFrom
, TraceTo
)].InternCount
;
1356 if (!Sample
.LBR
.empty()) {
1357 HM
.registerAddress(Sample
.LBR
.front().To
);
1358 HM
.registerAddress(Sample
.LBR
.back().From
);
1360 NumTotalSamples
+= Sample
.LBR
.size();
1362 outs() << "HEATMAP: read " << NumTotalSamples
<< " LBR samples\n";
1363 outs() << "HEATMAP: " << FallthroughLBRs
.size() << " unique traces\n";
1366 if (!NumTotalSamples
) {
1367 if (opts::BasicAggregation
) {
1368 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1369 "Cannot build heatmap.";
1371 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1372 "Cannot build heatmap. Use -nl for building heatmap from "
1378 outs() << "HEATMAP: building heat map...\n";
1380 for (const auto &LBR
: FallthroughLBRs
) {
1381 const Trace
&Trace
= LBR
.first
;
1382 const FTInfo
&Info
= LBR
.second
;
1383 HM
.registerAddressRange(Trace
.From
, Trace
.To
, Info
.InternCount
);
1386 if (HM
.getNumInvalidRanges())
1387 outs() << "HEATMAP: invalid traces: " << HM
.getNumInvalidRanges() << '\n';
1390 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1394 HM
.print(opts::OutputFilename
);
1395 if (opts::OutputFilename
== "-")
1396 HM
.printCDF(opts::OutputFilename
);
1398 HM
.printCDF(opts::OutputFilename
+ ".csv");
1399 if (opts::OutputFilename
== "-")
1400 HM
.printSectionHotness(opts::OutputFilename
);
1402 HM
.printSectionHotness(opts::OutputFilename
+ "-section-hotness.csv");
1404 return std::error_code();
1407 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample
&Sample
,
1408 bool NeedsSkylakeFix
) {
1409 uint64_t NumTraces
{0};
1410 // LBRs are stored in reverse execution order. NextPC refers to the next
1411 // recorded executed PC.
1412 uint64_t NextPC
= opts::UseEventPC
? Sample
.PC
: 0;
1413 uint32_t NumEntry
= 0;
1414 for (const LBREntry
&LBR
: Sample
.LBR
) {
1416 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1417 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1418 // us to likely record an invalid trace and generate a stale function for
1419 // BAT mode (non BAT disassembles the function and is able to ignore this
1420 // trace at aggregation time). Drop first 2 entries (last two, in
1421 // chronological order)
1422 if (NeedsSkylakeFix
&& NumEntry
<= 2)
1425 // Record fall-through trace.
1426 const uint64_t TraceFrom
= LBR
.To
;
1427 const uint64_t TraceTo
= NextPC
;
1428 const BinaryFunction
*TraceBF
=
1429 getBinaryFunctionContainingAddress(TraceFrom
);
1430 if (TraceBF
&& TraceBF
->containsAddress(TraceTo
)) {
1431 FTInfo
&Info
= FallthroughLBRs
[Trace(TraceFrom
, TraceTo
)];
1432 if (TraceBF
->containsAddress(LBR
.From
))
1437 const BinaryFunction
*ToFunc
=
1438 getBinaryFunctionContainingAddress(TraceTo
);
1439 if (TraceBF
&& ToFunc
) {
1441 dbgs() << "Invalid trace starting in " << TraceBF
->getPrintName()
1442 << formatv(" @ {0:x}", TraceFrom
- TraceBF
->getAddress())
1443 << formatv(" and ending @ {0:x}\n", TraceTo
);
1448 dbgs() << "Out of range trace starting in "
1449 << (TraceBF
? TraceBF
->getPrintName() : "None")
1450 << formatv(" @ {0:x}",
1451 TraceFrom
- (TraceBF
? TraceBF
->getAddress() : 0))
1452 << " and ending in "
1453 << (ToFunc
? ToFunc
->getPrintName() : "None")
1454 << formatv(" @ {0:x}\n",
1455 TraceTo
- (ToFunc
? ToFunc
->getAddress() : 0));
1457 ++NumLongRangeTraces
;
1464 uint64_t From
= getBinaryFunctionContainingAddress(LBR
.From
) ? LBR
.From
: 0;
1465 uint64_t To
= getBinaryFunctionContainingAddress(LBR
.To
) ? LBR
.To
: 0;
1468 TakenBranchInfo
&Info
= BranchLBRs
[Trace(From
, To
)];
1470 Info
.MispredCount
+= LBR
.Mispred
;
1475 std::error_code
DataAggregator::parseBranchEvents() {
1476 outs() << "PERF2BOLT: parse branch events...\n";
1477 NamedRegionTimer
T("parseBranch", "Parsing branch events", TimerGroupName
,
1478 TimerGroupDesc
, opts::TimeAggregator
);
1480 uint64_t NumTotalSamples
= 0;
1481 uint64_t NumEntries
= 0;
1482 uint64_t NumSamples
= 0;
1483 uint64_t NumSamplesNoLBR
= 0;
1484 uint64_t NumTraces
= 0;
1485 bool NeedsSkylakeFix
= false;
1487 while (hasData() && NumTotalSamples
< opts::MaxSamples
) {
1490 ErrorOr
<PerfBranchSample
> SampleRes
= parseBranchSample();
1491 if (std::error_code EC
= SampleRes
.getError()) {
1492 if (EC
== errc::no_such_process
)
1498 PerfBranchSample
&Sample
= SampleRes
.get();
1499 if (opts::WriteAutoFDOData
)
1500 ++BasicSamples
[Sample
.PC
];
1502 if (Sample
.LBR
.empty()) {
1507 NumEntries
+= Sample
.LBR
.size();
1508 if (BAT
&& Sample
.LBR
.size() == 32 && !NeedsSkylakeFix
) {
1509 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1510 NeedsSkylakeFix
= true;
1513 NumTraces
+= parseLBRSample(Sample
, NeedsSkylakeFix
);
1516 for (const Trace
&Trace
: llvm::make_first_range(BranchLBRs
))
1517 for (const uint64_t Addr
: {Trace
.From
, Trace
.To
})
1518 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Addr
))
1519 BF
->setHasProfileAvailable();
1521 auto printColored
= [](raw_ostream
&OS
, float Percent
, float T1
, float T2
) {
1523 if (OS
.has_colors()) {
1525 OS
.changeColor(raw_ostream::RED
);
1526 else if (Percent
> T1
)
1527 OS
.changeColor(raw_ostream::YELLOW
);
1529 OS
.changeColor(raw_ostream::GREEN
);
1531 OS
<< format("%.1f%%", Percent
);
1532 if (OS
.has_colors())
1537 outs() << "PERF2BOLT: read " << NumSamples
<< " samples and " << NumEntries
1538 << " LBR entries\n";
1539 if (NumTotalSamples
) {
1540 if (NumSamples
&& NumSamplesNoLBR
== NumSamples
) {
1541 // Note: we don't know if perf2bolt is being used to parse memory samples
1542 // at this point. In this case, it is OK to parse zero LBRs.
1543 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1544 "LBR. Record profile with perf record -j any or run perf2bolt "
1545 "in no-LBR mode with -nl (the performance improvement in -nl "
1546 "mode may be limited)\n";
1548 const uint64_t IgnoredSamples
= NumTotalSamples
- NumSamples
;
1549 const float PercentIgnored
= 100.0f
* IgnoredSamples
/ NumTotalSamples
;
1550 outs() << "PERF2BOLT: " << IgnoredSamples
<< " samples";
1551 printColored(outs(), PercentIgnored
, 20, 50);
1552 outs() << " were ignored\n";
1553 if (PercentIgnored
> 50.0f
)
1554 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1555 "were attributed to the input binary\n";
1558 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1559 << NumInvalidTraces
;
1561 if (NumTraces
> 0) {
1562 Perc
= NumInvalidTraces
* 100.0f
/ NumTraces
;
1563 printColored(outs(), Perc
, 5, 10);
1567 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1568 "binary is probably not the same binary used during profiling "
1569 "collection. The generated data may be ineffective for improving "
1572 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1573 << NumLongRangeTraces
;
1575 outs() << format(" (%.1f%%)", NumLongRangeTraces
* 100.0f
/ NumTraces
);
1578 if (NumColdSamples
> 0) {
1579 const float ColdSamples
= NumColdSamples
* 100.0f
/ NumTotalSamples
;
1580 outs() << "PERF2BOLT: " << NumColdSamples
1581 << format(" (%.1f%%)", ColdSamples
)
1582 << " samples recorded in cold regions of split functions.\n";
1583 if (ColdSamples
> 5.0f
)
1585 << "WARNING: The BOLT-processed binary where samples were collected "
1586 "likely used bad data or your service observed a large shift in "
1587 "profile. You may want to audit this.\n";
1590 return std::error_code();
1593 void DataAggregator::processBranchEvents() {
1594 outs() << "PERF2BOLT: processing branch events...\n";
1595 NamedRegionTimer
T("processBranch", "Processing branch events",
1596 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1598 for (const auto &AggrLBR
: FallthroughLBRs
) {
1599 const Trace
&Loc
= AggrLBR
.first
;
1600 const FTInfo
&Info
= AggrLBR
.second
;
1601 LBREntry First
{Loc
.From
, Loc
.From
, false};
1602 LBREntry Second
{Loc
.To
, Loc
.To
, false};
1603 if (Info
.InternCount
)
1604 doTrace(First
, Second
, Info
.InternCount
);
1605 if (Info
.ExternCount
) {
1607 doTrace(First
, Second
, Info
.ExternCount
);
1611 for (const auto &AggrLBR
: BranchLBRs
) {
1612 const Trace
&Loc
= AggrLBR
.first
;
1613 const TakenBranchInfo
&Info
= AggrLBR
.second
;
1614 doBranch(Loc
.From
, Loc
.To
, Info
.TakenCount
, Info
.MispredCount
);
1618 std::error_code
DataAggregator::parseBasicEvents() {
1619 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1620 NamedRegionTimer
T("parseBasic", "Parsing basic events", TimerGroupName
,
1621 TimerGroupDesc
, opts::TimeAggregator
);
1623 ErrorOr
<PerfBasicSample
> Sample
= parseBasicSample();
1624 if (std::error_code EC
= Sample
.getError())
1630 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Sample
->PC
))
1631 BF
->setHasProfileAvailable();
1633 ++BasicSamples
[Sample
->PC
];
1634 EventNames
.insert(Sample
->EventName
);
1637 return std::error_code();
1640 void DataAggregator::processBasicEvents() {
1641 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1642 NamedRegionTimer
T("processBasic", "Processing basic events", TimerGroupName
,
1643 TimerGroupDesc
, opts::TimeAggregator
);
1644 uint64_t OutOfRangeSamples
= 0;
1645 uint64_t NumSamples
= 0;
1646 for (auto &Sample
: BasicSamples
) {
1647 const uint64_t PC
= Sample
.first
;
1648 const uint64_t HitCount
= Sample
.second
;
1649 NumSamples
+= HitCount
;
1650 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(PC
);
1652 OutOfRangeSamples
+= HitCount
;
1656 doSample(*Func
, PC
, HitCount
);
1658 outs() << "PERF2BOLT: read " << NumSamples
<< " samples\n";
1660 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1661 << OutOfRangeSamples
;
1663 if (NumSamples
> 0) {
1665 Perc
= OutOfRangeSamples
* 100.0f
/ NumSamples
;
1666 if (outs().has_colors()) {
1668 outs().changeColor(raw_ostream::RED
);
1669 else if (Perc
> 40.0f
)
1670 outs().changeColor(raw_ostream::YELLOW
);
1672 outs().changeColor(raw_ostream::GREEN
);
1674 outs() << format("%.1f%%", Perc
);
1675 if (outs().has_colors())
1676 outs().resetColor();
1681 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1682 "binary is probably not the same binary used during profiling "
1683 "collection. The generated data may be ineffective for improving "
1687 std::error_code
DataAggregator::parseMemEvents() {
1688 outs() << "PERF2BOLT: parsing memory events...\n";
1689 NamedRegionTimer
T("parseMemEvents", "Parsing mem events", TimerGroupName
,
1690 TimerGroupDesc
, opts::TimeAggregator
);
1692 ErrorOr
<PerfMemSample
> Sample
= parseMemSample();
1693 if (std::error_code EC
= Sample
.getError())
1696 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Sample
->PC
))
1697 BF
->setHasProfileAvailable();
1699 MemSamples
.emplace_back(std::move(Sample
.get()));
1702 return std::error_code();
1705 void DataAggregator::processMemEvents() {
1706 NamedRegionTimer
T("ProcessMemEvents", "Processing mem events",
1707 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1708 for (const PerfMemSample
&Sample
: MemSamples
) {
1709 uint64_t PC
= Sample
.PC
;
1710 uint64_t Addr
= Sample
.Addr
;
1714 // Try to resolve symbol for PC
1715 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(PC
);
1717 LLVM_DEBUG(if (PC
!= 0) {
1718 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC
, Addr
);
1723 FuncName
= Func
->getOneName();
1724 PC
-= Func
->getAddress();
1726 // Try to resolve symbol for memory load
1727 if (BinaryData
*BD
= BC
->getBinaryDataContainingAddress(Addr
)) {
1728 MemName
= BD
->getName();
1729 Addr
-= BD
->getAddress();
1730 } else if (opts::FilterMemProfile
) {
1731 // Filter out heap/stack accesses
1735 const Location
FuncLoc(!FuncName
.empty(), FuncName
, PC
);
1736 const Location
AddrLoc(!MemName
.empty(), MemName
, Addr
);
1738 FuncMemData
*MemData
= &NamesToMemEvents
[FuncName
];
1739 MemData
->Name
= FuncName
;
1740 setMemData(*Func
, MemData
);
1741 MemData
->update(FuncLoc
, AddrLoc
);
1742 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc
<< " = " << AddrLoc
<< "\n");
1746 std::error_code
DataAggregator::parsePreAggregatedLBRSamples() {
1747 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1748 NamedRegionTimer
T("parseAggregated", "Parsing aggregated branch events",
1749 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1751 ErrorOr
<AggregatedLBREntry
> AggrEntry
= parseAggregatedLBREntry();
1752 if (std::error_code EC
= AggrEntry
.getError())
1755 for (const uint64_t Addr
: {AggrEntry
->From
.Offset
, AggrEntry
->To
.Offset
})
1756 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Addr
))
1757 BF
->setHasProfileAvailable();
1759 AggregatedLBRs
.emplace_back(std::move(AggrEntry
.get()));
1762 return std::error_code();
1765 void DataAggregator::processPreAggregated() {
1766 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1767 NamedRegionTimer
T("processAggregated", "Processing aggregated branch events",
1768 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1770 uint64_t NumTraces
= 0;
1771 for (const AggregatedLBREntry
&AggrEntry
: AggregatedLBRs
) {
1772 switch (AggrEntry
.EntryType
) {
1773 case AggregatedLBREntry::BRANCH
:
1774 doBranch(AggrEntry
.From
.Offset
, AggrEntry
.To
.Offset
, AggrEntry
.Count
,
1775 AggrEntry
.Mispreds
);
1777 case AggregatedLBREntry::FT
:
1778 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN
: {
1779 LBREntry First
{AggrEntry
.EntryType
== AggregatedLBREntry::FT
1780 ? AggrEntry
.From
.Offset
1782 AggrEntry
.From
.Offset
, false};
1783 LBREntry Second
{AggrEntry
.To
.Offset
, AggrEntry
.To
.Offset
, false};
1784 doTrace(First
, Second
, AggrEntry
.Count
);
1785 NumTraces
+= AggrEntry
.Count
;
1791 outs() << "PERF2BOLT: read " << AggregatedLBRs
.size()
1792 << " aggregated LBR entries\n";
1793 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1794 << NumInvalidTraces
;
1796 if (NumTraces
> 0) {
1798 Perc
= NumInvalidTraces
* 100.0f
/ NumTraces
;
1799 if (outs().has_colors()) {
1801 outs().changeColor(raw_ostream::RED
);
1802 else if (Perc
> 5.0f
)
1803 outs().changeColor(raw_ostream::YELLOW
);
1805 outs().changeColor(raw_ostream::GREEN
);
1807 outs() << format("%.1f%%", Perc
);
1808 if (outs().has_colors())
1809 outs().resetColor();
1814 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1815 "binary is probably not the same binary used during profiling "
1816 "collection. The generated data may be ineffective for improving "
1819 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1820 << NumLongRangeTraces
;
1822 outs() << format(" (%.1f%%)", NumLongRangeTraces
* 100.0f
/ NumTraces
);
1826 std::optional
<int32_t> DataAggregator::parseCommExecEvent() {
1827 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1828 if (LineEnd
== StringRef::npos
) {
1829 reportError("expected rest of line");
1830 Diag
<< "Found: " << ParsingBuf
<< "\n";
1831 return std::nullopt
;
1833 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1835 size_t Pos
= Line
.find("PERF_RECORD_COMM exec");
1836 if (Pos
== StringRef::npos
)
1837 return std::nullopt
;
1838 Line
= Line
.drop_front(Pos
);
1841 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1842 StringRef PIDStr
= Line
.rsplit(':').second
.split('/').first
;
1844 if (PIDStr
.getAsInteger(10, PID
)) {
1845 reportError("expected PID");
1846 Diag
<< "Found: " << PIDStr
<< "in '" << Line
<< "'\n";
1847 return std::nullopt
;
1854 std::optional
<uint64_t> parsePerfTime(const StringRef TimeStr
) {
1855 const StringRef SecTimeStr
= TimeStr
.split('.').first
;
1856 const StringRef USecTimeStr
= TimeStr
.split('.').second
;
1859 if (SecTimeStr
.getAsInteger(10, SecTime
) ||
1860 USecTimeStr
.getAsInteger(10, USecTime
))
1861 return std::nullopt
;
1862 return SecTime
* 1000000ULL + USecTime
;
1866 std::optional
<DataAggregator::ForkInfo
> DataAggregator::parseForkEvent() {
1867 while (checkAndConsumeFS()) {
1870 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1871 if (LineEnd
== StringRef::npos
) {
1872 reportError("expected rest of line");
1873 Diag
<< "Found: " << ParsingBuf
<< "\n";
1874 return std::nullopt
;
1876 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1878 size_t Pos
= Line
.find("PERF_RECORD_FORK");
1879 if (Pos
== StringRef::npos
) {
1880 consumeRestOfLine();
1881 return std::nullopt
;
1886 const StringRef TimeStr
=
1887 Line
.substr(0, Pos
).rsplit(':').first
.rsplit(FieldSeparator
).second
;
1888 if (std::optional
<uint64_t> TimeRes
= parsePerfTime(TimeStr
)) {
1892 Line
= Line
.drop_front(Pos
);
1895 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1896 const StringRef ChildPIDStr
= Line
.split('(').second
.split(':').first
;
1897 if (ChildPIDStr
.getAsInteger(10, FI
.ChildPID
)) {
1898 reportError("expected PID");
1899 Diag
<< "Found: " << ChildPIDStr
<< "in '" << Line
<< "'\n";
1900 return std::nullopt
;
1903 const StringRef ParentPIDStr
= Line
.rsplit('(').second
.split(':').first
;
1904 if (ParentPIDStr
.getAsInteger(10, FI
.ParentPID
)) {
1905 reportError("expected PID");
1906 Diag
<< "Found: " << ParentPIDStr
<< "in '" << Line
<< "'\n";
1907 return std::nullopt
;
1910 consumeRestOfLine();
1915 ErrorOr
<std::pair
<StringRef
, DataAggregator::MMapInfo
>>
1916 DataAggregator::parseMMapEvent() {
1917 while (checkAndConsumeFS()) {
1920 MMapInfo ParsedInfo
;
1922 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1923 if (LineEnd
== StringRef::npos
) {
1924 reportError("expected rest of line");
1925 Diag
<< "Found: " << ParsingBuf
<< "\n";
1926 return make_error_code(llvm::errc::io_error
);
1928 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1930 size_t Pos
= Line
.find("PERF_RECORD_MMAP2");
1931 if (Pos
== StringRef::npos
) {
1932 consumeRestOfLine();
1933 return std::make_pair(StringRef(), ParsedInfo
);
1937 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1939 const StringRef TimeStr
=
1940 Line
.substr(0, Pos
).rsplit(':').first
.rsplit(FieldSeparator
).second
;
1941 if (std::optional
<uint64_t> TimeRes
= parsePerfTime(TimeStr
))
1942 ParsedInfo
.Time
= *TimeRes
;
1944 Line
= Line
.drop_front(Pos
);
1947 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1949 StringRef FileName
= Line
.rsplit(FieldSeparator
).second
;
1950 if (FileName
.starts_with("//") || FileName
.starts_with("[")) {
1951 consumeRestOfLine();
1952 return std::make_pair(StringRef(), ParsedInfo
);
1954 FileName
= sys::path::filename(FileName
);
1956 const StringRef PIDStr
= Line
.split(FieldSeparator
).second
.split('/').first
;
1957 if (PIDStr
.getAsInteger(10, ParsedInfo
.PID
)) {
1958 reportError("expected PID");
1959 Diag
<< "Found: " << PIDStr
<< "in '" << Line
<< "'\n";
1960 return make_error_code(llvm::errc::io_error
);
1963 const StringRef BaseAddressStr
= Line
.split('[').second
.split('(').first
;
1964 if (BaseAddressStr
.getAsInteger(0, ParsedInfo
.MMapAddress
)) {
1965 reportError("expected base address");
1966 Diag
<< "Found: " << BaseAddressStr
<< "in '" << Line
<< "'\n";
1967 return make_error_code(llvm::errc::io_error
);
1970 const StringRef SizeStr
= Line
.split('(').second
.split(')').first
;
1971 if (SizeStr
.getAsInteger(0, ParsedInfo
.Size
)) {
1972 reportError("expected mmaped size");
1973 Diag
<< "Found: " << SizeStr
<< "in '" << Line
<< "'\n";
1974 return make_error_code(llvm::errc::io_error
);
1977 const StringRef OffsetStr
=
1978 Line
.split('@').second
.ltrim().split(FieldSeparator
).first
;
1979 if (OffsetStr
.getAsInteger(0, ParsedInfo
.Offset
)) {
1980 reportError("expected mmaped page-aligned offset");
1981 Diag
<< "Found: " << OffsetStr
<< "in '" << Line
<< "'\n";
1982 return make_error_code(llvm::errc::io_error
);
1985 consumeRestOfLine();
1987 return std::make_pair(FileName
, ParsedInfo
);
1990 std::error_code
DataAggregator::parseMMapEvents() {
1991 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1992 NamedRegionTimer
T("parseMMapEvents", "Parsing mmap events", TimerGroupName
,
1993 TimerGroupDesc
, opts::TimeAggregator
);
1995 std::multimap
<StringRef
, MMapInfo
> GlobalMMapInfo
;
1997 ErrorOr
<std::pair
<StringRef
, MMapInfo
>> FileMMapInfoRes
= parseMMapEvent();
1998 if (std::error_code EC
= FileMMapInfoRes
.getError())
2001 std::pair
<StringRef
, MMapInfo
> FileMMapInfo
= FileMMapInfoRes
.get();
2002 if (FileMMapInfo
.second
.PID
== -1)
2004 if (FileMMapInfo
.first
== "(deleted)")
2007 // Consider only the first mapping of the file for any given PID
2008 auto Range
= GlobalMMapInfo
.equal_range(FileMMapInfo
.first
);
2009 bool PIDExists
= llvm::any_of(make_range(Range
), [&](const auto &MI
) {
2010 return MI
.second
.PID
== FileMMapInfo
.second
.PID
;
2016 GlobalMMapInfo
.insert(FileMMapInfo
);
2020 dbgs() << "FileName -> mmap info:\n"
2021 << " Filename : PID [MMapAddr, Size, Offset]\n";
2022 for (const auto &[Name
, MMap
] : GlobalMMapInfo
)
2023 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name
, MMap
.PID
,
2024 MMap
.MMapAddress
, MMap
.Size
, MMap
.Offset
);
2027 StringRef NameToUse
= llvm::sys::path::filename(BC
->getFilename());
2028 if (GlobalMMapInfo
.count(NameToUse
) == 0 && !BuildIDBinaryName
.empty()) {
2029 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2030 << "\" for profile matching\n";
2031 NameToUse
= BuildIDBinaryName
;
2034 auto Range
= GlobalMMapInfo
.equal_range(NameToUse
);
2035 for (MMapInfo
&MMapInfo
: llvm::make_second_range(make_range(Range
))) {
2036 if (BC
->HasFixedLoadAddress
&& MMapInfo
.MMapAddress
) {
2037 // Check that the binary mapping matches one of the segments.
2038 bool MatchFound
= llvm::any_of(
2039 llvm::make_second_range(BC
->SegmentMapInfo
),
2040 [&](SegmentInfo
&SegInfo
) {
2041 // The mapping is page-aligned and hence the MMapAddress could be
2042 // different from the segment start address. We cannot know the page
2043 // size of the mapping, but we know it should not exceed the segment
2044 // alignment value. Hence we are performing an approximate check.
2045 return SegInfo
.Address
>= MMapInfo
.MMapAddress
&&
2046 SegInfo
.Address
- MMapInfo
.MMapAddress
< SegInfo
.Alignment
;
2049 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2050 << " at 0x" << Twine::utohexstr(MMapInfo
.MMapAddress
) << '\n';
2055 // Set base address for shared objects.
2056 if (!BC
->HasFixedLoadAddress
) {
2057 std::optional
<uint64_t> BaseAddress
=
2058 BC
->getBaseAddressForMapping(MMapInfo
.MMapAddress
, MMapInfo
.Offset
);
2060 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2061 "binary when memory mapped at 0x"
2062 << Twine::utohexstr(MMapInfo
.MMapAddress
)
2063 << " using file offset 0x" << Twine::utohexstr(MMapInfo
.Offset
)
2064 << ". Ignoring profile data for this mapping\n";
2067 MMapInfo
.BaseAddress
= *BaseAddress
;
2071 BinaryMMapInfo
.insert(std::make_pair(MMapInfo
.PID
, MMapInfo
));
2074 if (BinaryMMapInfo
.empty()) {
2075 if (errs().has_colors())
2076 errs().changeColor(raw_ostream::RED
);
2077 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2078 << BC
->getFilename() << "\".";
2079 if (!GlobalMMapInfo
.empty()) {
2080 errs() << " Profile for the following binary name(s) is available:\n";
2081 for (auto I
= GlobalMMapInfo
.begin(), IE
= GlobalMMapInfo
.end(); I
!= IE
;
2082 I
= GlobalMMapInfo
.upper_bound(I
->first
))
2083 errs() << " " << I
->first
<< '\n';
2084 errs() << "Please rename the input binary.\n";
2086 errs() << " Failed to extract any binary name from a profile.\n";
2088 if (errs().has_colors())
2089 errs().resetColor();
2094 return std::error_code();
2097 std::error_code
DataAggregator::parseTaskEvents() {
2098 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2099 NamedRegionTimer
T("parseTaskEvents", "Parsing task events", TimerGroupName
,
2100 TimerGroupDesc
, opts::TimeAggregator
);
2103 if (std::optional
<int32_t> CommInfo
= parseCommExecEvent()) {
2104 // Remove forked child that ran execve
2105 auto MMapInfoIter
= BinaryMMapInfo
.find(*CommInfo
);
2106 if (MMapInfoIter
!= BinaryMMapInfo
.end() && MMapInfoIter
->second
.Forked
)
2107 BinaryMMapInfo
.erase(MMapInfoIter
);
2108 consumeRestOfLine();
2112 std::optional
<ForkInfo
> ForkInfo
= parseForkEvent();
2116 if (ForkInfo
->ParentPID
== ForkInfo
->ChildPID
)
2119 if (ForkInfo
->Time
== 0) {
2120 // Process was forked and mmaped before perf ran. In this case the child
2121 // should have its own mmap entry unless it was execve'd.
2125 auto MMapInfoIter
= BinaryMMapInfo
.find(ForkInfo
->ParentPID
);
2126 if (MMapInfoIter
== BinaryMMapInfo
.end())
2129 MMapInfo MMapInfo
= MMapInfoIter
->second
;
2130 MMapInfo
.PID
= ForkInfo
->ChildPID
;
2131 MMapInfo
.Forked
= true;
2132 BinaryMMapInfo
.insert(std::make_pair(MMapInfo
.PID
, MMapInfo
));
2135 outs() << "PERF2BOLT: input binary is associated with "
2136 << BinaryMMapInfo
.size() << " PID(s)\n";
2139 for (const MMapInfo
&MMI
: llvm::make_second_range(BinaryMMapInfo
))
2140 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI
.PID
,
2141 (MMI
.Forked
? " (forked)" : ""), MMI
.MMapAddress
,
2145 return std::error_code();
2148 std::optional
<std::pair
<StringRef
, StringRef
>>
2149 DataAggregator::parseNameBuildIDPair() {
2150 while (checkAndConsumeFS()) {
2153 ErrorOr
<StringRef
> BuildIDStr
= parseString(FieldSeparator
, true);
2154 if (std::error_code EC
= BuildIDStr
.getError())
2155 return std::nullopt
;
2157 // If one of the strings is missing, don't issue a parsing error, but still
2158 // do not return a value.
2159 consumeAllRemainingFS();
2161 return std::nullopt
;
2163 ErrorOr
<StringRef
> NameStr
= parseString(FieldSeparator
, true);
2164 if (std::error_code EC
= NameStr
.getError())
2165 return std::nullopt
;
2167 consumeRestOfLine();
2168 return std::make_pair(NameStr
.get(), BuildIDStr
.get());
2171 bool DataAggregator::hasAllBuildIDs() {
2172 const StringRef SavedParsingBuf
= ParsingBuf
;
2177 bool HasInvalidEntries
= false;
2179 if (!parseNameBuildIDPair()) {
2180 HasInvalidEntries
= true;
2185 ParsingBuf
= SavedParsingBuf
;
2187 return !HasInvalidEntries
;
2190 std::optional
<StringRef
>
2191 DataAggregator::getFileNameForBuildID(StringRef FileBuildID
) {
2192 const StringRef SavedParsingBuf
= ParsingBuf
;
2196 std::optional
<std::pair
<StringRef
, StringRef
>> IDPair
=
2197 parseNameBuildIDPair();
2199 consumeRestOfLine();
2203 if (IDPair
->second
.starts_with(FileBuildID
)) {
2204 FileName
= sys::path::filename(IDPair
->first
);
2209 ParsingBuf
= SavedParsingBuf
;
2211 if (!FileName
.empty())
2214 return std::nullopt
;
2218 DataAggregator::writeAggregatedFile(StringRef OutputFilename
) const {
2220 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
2224 bool WriteMemLocs
= false;
2226 auto writeLocation
= [&OutFile
, &WriteMemLocs
](const Location
&Loc
) {
2228 OutFile
<< (Loc
.IsSymbol
? "4 " : "3 ");
2230 OutFile
<< (Loc
.IsSymbol
? "1 " : "0 ");
2231 OutFile
<< (Loc
.Name
.empty() ? "[unknown]" : getEscapedName(Loc
.Name
))
2232 << " " << Twine::utohexstr(Loc
.Offset
) << FieldSeparator
;
2235 uint64_t BranchValues
= 0;
2236 uint64_t MemValues
= 0;
2239 OutFile
<< "boltedcollection\n";
2240 if (opts::BasicAggregation
) {
2241 OutFile
<< "no_lbr";
2242 for (const StringMapEntry
<std::nullopt_t
> &Entry
: EventNames
)
2243 OutFile
<< " " << Entry
.getKey();
2246 for (const auto &KV
: NamesToSamples
) {
2247 const FuncSampleData
&FSD
= KV
.second
;
2248 for (const SampleInfo
&SI
: FSD
.Data
) {
2249 writeLocation(SI
.Loc
);
2250 OutFile
<< SI
.Hits
<< "\n";
2255 for (const auto &KV
: NamesToBranches
) {
2256 const FuncBranchData
&FBD
= KV
.second
;
2257 for (const BranchInfo
&BI
: FBD
.Data
) {
2258 writeLocation(BI
.From
);
2259 writeLocation(BI
.To
);
2260 OutFile
<< BI
.Mispreds
<< " " << BI
.Branches
<< "\n";
2263 for (const BranchInfo
&BI
: FBD
.EntryData
) {
2264 // Do not output if source is a known symbol, since this was already
2265 // accounted for in the source function
2266 if (BI
.From
.IsSymbol
)
2268 writeLocation(BI
.From
);
2269 writeLocation(BI
.To
);
2270 OutFile
<< BI
.Mispreds
<< " " << BI
.Branches
<< "\n";
2275 WriteMemLocs
= true;
2276 for (const auto &KV
: NamesToMemEvents
) {
2277 const FuncMemData
&FMD
= KV
.second
;
2278 for (const MemInfo
&MemEvent
: FMD
.Data
) {
2279 writeLocation(MemEvent
.Offset
);
2280 writeLocation(MemEvent
.Addr
);
2281 OutFile
<< MemEvent
.Count
<< "\n";
2287 outs() << "PERF2BOLT: wrote " << BranchValues
<< " objects and " << MemValues
2288 << " memory objects to " << OutputFilename
<< "\n";
2290 return std::error_code();
2293 std::error_code
DataAggregator::writeBATYAML(BinaryContext
&BC
,
2294 StringRef OutputFilename
) const {
2296 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
2300 yaml::bolt::BinaryProfile BP
;
2302 const MCPseudoProbeDecoder
*PseudoProbeDecoder
=
2303 opts::ProfileUsePseudoProbes
? BC
.getPseudoProbeDecoder() : nullptr;
2305 // Fill out the header info.
2306 BP
.Header
.Version
= 1;
2307 BP
.Header
.FileName
= std::string(BC
.getFilename());
2308 std::optional
<StringRef
> BuildID
= BC
.getFileBuildID();
2309 BP
.Header
.Id
= BuildID
? std::string(*BuildID
) : "<unknown>";
2310 BP
.Header
.Origin
= std::string(getReaderName());
2311 // Only the input binary layout order is supported.
2312 BP
.Header
.IsDFSOrder
= false;
2313 // FIXME: Need to match hash function used to produce BAT hashes.
2314 BP
.Header
.HashFunction
= HashFunction::Default
;
2316 ListSeparator
LS(",");
2317 raw_string_ostream
EventNamesOS(BP
.Header
.EventNames
);
2318 for (const StringMapEntry
<std::nullopt_t
> &EventEntry
: EventNames
)
2319 EventNamesOS
<< LS
<< EventEntry
.first().str();
2321 BP
.Header
.Flags
= opts::BasicAggregation
? BinaryFunction::PF_SAMPLE
2322 : BinaryFunction::PF_LBR
;
2324 if (!opts::BasicAggregation
) {
2325 // Convert profile for functions not covered by BAT
2326 for (auto &BFI
: BC
.getBinaryFunctions()) {
2327 BinaryFunction
&Function
= BFI
.second
;
2328 if (!Function
.hasProfile())
2330 if (BAT
->isBATFunction(Function
.getAddress()))
2332 BP
.Functions
.emplace_back(
2333 YAMLProfileWriter::convert(Function
, /*UseDFS=*/false, BAT
));
2336 for (const auto &KV
: NamesToBranches
) {
2337 const StringRef FuncName
= KV
.first
;
2338 const FuncBranchData
&Branches
= KV
.second
;
2339 yaml::bolt::BinaryFunctionProfile YamlBF
;
2340 BinaryData
*BD
= BC
.getBinaryDataByName(FuncName
);
2342 uint64_t FuncAddress
= BD
->getAddress();
2343 if (!BAT
->isBATFunction(FuncAddress
))
2345 BinaryFunction
*BF
= BC
.getBinaryFunctionAtAddress(FuncAddress
);
2347 YamlBF
.Name
= getLocationName(*BF
, BAT
);
2348 YamlBF
.Id
= BF
->getFunctionNumber();
2349 YamlBF
.Hash
= BAT
->getBFHash(FuncAddress
);
2350 YamlBF
.ExecCount
= BF
->getKnownExecutionCount();
2351 YamlBF
.NumBasicBlocks
= BAT
->getNumBasicBlocks(FuncAddress
);
2352 const BoltAddressTranslation::BBHashMapTy
&BlockMap
=
2353 BAT
->getBBHashMap(FuncAddress
);
2354 YamlBF
.Blocks
.resize(YamlBF
.NumBasicBlocks
);
2356 for (auto &&[Entry
, YamlBB
] : llvm::zip(BlockMap
, YamlBF
.Blocks
)) {
2357 const auto &Block
= Entry
.second
;
2358 YamlBB
.Hash
= Block
.Hash
;
2359 YamlBB
.Index
= Block
.Index
;
2362 // Lookup containing basic block offset and index
2363 auto getBlock
= [&BlockMap
](uint32_t Offset
) {
2364 auto BlockIt
= BlockMap
.upper_bound(Offset
);
2365 if (LLVM_UNLIKELY(BlockIt
== BlockMap
.begin())) {
2366 errs() << "BOLT-ERROR: invalid BAT section\n";
2370 return std::pair(BlockIt
->first
, BlockIt
->second
.Index
);
2373 for (const BranchInfo
&BI
: Branches
.Data
) {
2374 using namespace yaml::bolt
;
2375 const auto &[BlockOffset
, BlockIndex
] = getBlock(BI
.From
.Offset
);
2376 BinaryBasicBlockProfile
&YamlBB
= YamlBF
.Blocks
[BlockIndex
];
2377 if (BI
.To
.IsSymbol
&& BI
.To
.Name
== BI
.From
.Name
&& BI
.To
.Offset
!= 0) {
2379 const unsigned SuccIndex
= getBlock(BI
.To
.Offset
).second
;
2380 auto &SI
= YamlBB
.Successors
.emplace_back(SuccessorInfo
{SuccIndex
});
2381 SI
.Count
= BI
.Branches
;
2382 SI
.Mispreds
= BI
.Mispreds
;
2385 const uint32_t Offset
= BI
.From
.Offset
- BlockOffset
;
2386 auto &CSI
= YamlBB
.CallSites
.emplace_back(CallSiteInfo
{Offset
});
2387 CSI
.Count
= BI
.Branches
;
2388 CSI
.Mispreds
= BI
.Mispreds
;
2389 if (const BinaryData
*BD
= BC
.getBinaryDataByName(BI
.To
.Name
))
2390 YAMLProfileWriter::setCSIDestination(BC
, CSI
, BD
->getSymbol(), BAT
,
2394 // Set entry counts, similar to DataReader::readProfile.
2395 for (const BranchInfo
&BI
: Branches
.EntryData
) {
2396 if (!BlockMap
.isInputBlock(BI
.To
.Offset
)) {
2397 if (opts::Verbosity
>= 1)
2398 errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName
2399 << " at 0x" << Twine::utohexstr(BI
.To
.Offset
) << '\n';
2402 const unsigned BlockIndex
= BlockMap
.getBBIndex(BI
.To
.Offset
);
2403 YamlBF
.Blocks
[BlockIndex
].ExecCount
+= BI
.Branches
;
2405 if (PseudoProbeDecoder
) {
2406 if ((YamlBF
.GUID
= BF
->getGUID())) {
2407 const MCPseudoProbeFuncDesc
*FuncDesc
=
2408 PseudoProbeDecoder
->getFuncDescForGUID(YamlBF
.GUID
);
2409 YamlBF
.PseudoProbeDescHash
= FuncDesc
->FuncHash
;
2411 // Fetch probes belonging to all fragments
2412 const AddressProbesMap
&ProbeMap
=
2413 PseudoProbeDecoder
->getAddress2ProbesMap();
2414 BinaryFunction::FragmentsSetTy
Fragments(BF
->Fragments
);
2415 Fragments
.insert(BF
);
2416 for (const BinaryFunction
*F
: Fragments
) {
2417 const uint64_t FuncAddr
= F
->getAddress();
2418 const auto &FragmentProbes
=
2419 llvm::make_range(ProbeMap
.lower_bound(FuncAddr
),
2420 ProbeMap
.lower_bound(FuncAddr
+ F
->getSize()));
2421 for (const auto &[OutputAddress
, Probes
] : FragmentProbes
) {
2422 const uint32_t InputOffset
= BAT
->translate(
2423 FuncAddr
, OutputAddress
- FuncAddr
, /*IsBranchSrc=*/true);
2424 const unsigned BlockIndex
= getBlock(InputOffset
).second
;
2425 for (const MCDecodedPseudoProbe
&Probe
: Probes
)
2426 YamlBF
.Blocks
[BlockIndex
].PseudoProbes
.emplace_back(
2427 yaml::bolt::PseudoProbeInfo
{Probe
.getGuid(), Probe
.getIndex(),
2432 // Drop blocks without a hash, won't be useful for stale matching.
2433 llvm::erase_if(YamlBF
.Blocks
,
2434 [](const yaml::bolt::BinaryBasicBlockProfile
&YamlBB
) {
2435 return YamlBB
.Hash
== (yaml::Hex64
)0;
2437 BP
.Functions
.emplace_back(YamlBF
);
2441 // Write the profile.
2442 yaml::Output
Out(OutFile
, nullptr, 0);
2444 return std::error_code();
2447 void DataAggregator::dump() const { DataReader::dump(); }
2449 void DataAggregator::dump(const LBREntry
&LBR
) const {
2450 Diag
<< "From: " << Twine::utohexstr(LBR
.From
)
2451 << " To: " << Twine::utohexstr(LBR
.To
) << " Mispred? " << LBR
.Mispred
2455 void DataAggregator::dump(const PerfBranchSample
&Sample
) const {
2456 Diag
<< "Sample LBR entries: " << Sample
.LBR
.size() << "\n";
2457 for (const LBREntry
&LBR
: Sample
.LBR
)
2461 void DataAggregator::dump(const PerfMemSample
&Sample
) const {
2462 Diag
<< "Sample mem entries: " << Sample
.PC
<< ": " << Sample
.Addr
<< "\n";