1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Passes/BinaryPasses.h"
18 #include "bolt/Profile/BoltAddressTranslation.h"
19 #include "bolt/Profile/Heatmap.h"
20 #include "bolt/Profile/YAMLProfileWriter.h"
21 #include "bolt/Utils/CommandLineOpts.h"
22 #include "bolt/Utils/Utils.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/Errc.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Program.h"
32 #include "llvm/Support/Regex.h"
33 #include "llvm/Support/Timer.h"
34 #include "llvm/Support/raw_ostream.h"
37 #include <unordered_map>
40 #define DEBUG_TYPE "aggregator"
48 BasicAggregation("nl",
49 cl::desc("aggregate basic samples (without LBR info)"),
50 cl::cat(AggregatorCategory
));
52 static cl::opt
<std::string
>
53 ITraceAggregation("itrace",
54 cl::desc("Generate LBR info with perf itrace argument"),
55 cl::cat(AggregatorCategory
));
58 FilterMemProfile("filter-mem-profile",
59 cl::desc("if processing a memory profile, filter out stack or heap accesses "
60 "that won't be useful for BOLT to reduce profile file size"),
62 cl::cat(AggregatorCategory
));
64 static cl::opt
<unsigned long long>
66 cl::desc("only use samples from process with specified PID"),
69 cl::cat(AggregatorCategory
));
72 IgnoreBuildID("ignore-build-id",
73 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
75 cl::cat(AggregatorCategory
));
77 static cl::opt
<bool> IgnoreInterruptLBR(
78 "ignore-interrupt-lbr",
79 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
80 cl::init(true), cl::cat(AggregatorCategory
));
82 static cl::opt
<unsigned long long>
83 MaxSamples("max-samples",
85 cl::desc("maximum number of samples to read from LBR profile"),
88 cl::cat(AggregatorCategory
));
90 extern cl::opt
<opts::ProfileFormatKind
> ProfileFormat
;
91 extern cl::opt
<bool> ProfileWritePseudoProbes
;
92 extern cl::opt
<std::string
> SaveProfile
;
94 cl::opt
<bool> ReadPreAggregated(
95 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
96 cl::cat(AggregatorCategory
));
99 TimeAggregator("time-aggr",
100 cl::desc("time BOLT aggregator"),
103 cl::cat(AggregatorCategory
));
106 UseEventPC("use-event-pc",
107 cl::desc("use event PC in combination with LBR sampling"),
108 cl::cat(AggregatorCategory
));
110 static cl::opt
<bool> WriteAutoFDOData(
111 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
112 cl::cat(AggregatorCategory
));
118 const char TimerGroupName
[] = "aggregator";
119 const char TimerGroupDesc
[] = "Aggregator";
121 std::vector
<SectionNameAndRange
> getTextSections(const BinaryContext
*BC
) {
122 std::vector
<SectionNameAndRange
> sections
;
123 for (BinarySection
&Section
: BC
->sections()) {
124 if (!Section
.isText())
126 if (Section
.getSize() == 0)
129 {Section
.getName(), Section
.getAddress(), Section
.getEndAddress()});
132 [](const SectionNameAndRange
&A
, const SectionNameAndRange
&B
) {
133 return A
.BeginAddress
< B
.BeginAddress
;
139 constexpr uint64_t DataAggregator::KernelBaseAddr
;
141 DataAggregator::~DataAggregator() { deleteTempFiles(); }
144 void deleteTempFile(const std::string
&FileName
) {
145 if (std::error_code Errc
= sys::fs::remove(FileName
.c_str()))
146 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
147 << " with error " << Errc
.message() << "\n";
151 void DataAggregator::deleteTempFiles() {
152 for (std::string
&FileName
: TempFiles
)
153 deleteTempFile(FileName
);
157 void DataAggregator::findPerfExecutable() {
158 std::optional
<std::string
> PerfExecutable
=
159 sys::Process::FindInEnvPath("PATH", "perf");
160 if (!PerfExecutable
) {
161 outs() << "PERF2BOLT: No perf executable found!\n";
164 PerfPath
= *PerfExecutable
;
167 void DataAggregator::start() {
168 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename
<< "\n";
170 // Don't launch perf for pre-aggregated files
171 if (opts::ReadPreAggregated
)
174 findPerfExecutable();
176 if (opts::BasicAggregation
) {
177 launchPerfProcess("events without LBR",
179 "script -F pid,event,ip",
181 } else if (!opts::ITraceAggregation
.empty()) {
182 std::string ItracePerfScriptArgs
= llvm::formatv(
183 "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation
);
184 launchPerfProcess("branch events with itrace", MainEventsPPI
,
185 ItracePerfScriptArgs
.c_str(),
188 launchPerfProcess("branch events",
190 "script -F pid,ip,brstack",
194 // Note: we launch script for mem events regardless of the option, as the
195 // command fails fairly fast if mem events were not collected.
196 launchPerfProcess("mem events",
198 "script -F pid,event,addr,ip",
201 launchPerfProcess("process events", MMapEventsPPI
,
202 "script --show-mmap-events --no-itrace",
205 launchPerfProcess("task events", TaskEventsPPI
,
206 "script --show-task-events --no-itrace",
210 void DataAggregator::abort() {
211 if (opts::ReadPreAggregated
)
216 // Kill subprocesses in case they are not finished
217 sys::Wait(TaskEventsPPI
.PI
, 1, &Error
);
218 sys::Wait(MMapEventsPPI
.PI
, 1, &Error
);
219 sys::Wait(MainEventsPPI
.PI
, 1, &Error
);
220 sys::Wait(MemEventsPPI
.PI
, 1, &Error
);
227 void DataAggregator::launchPerfProcess(StringRef Name
, PerfProcessInfo
&PPI
,
228 const char *ArgsString
, bool Wait
) {
229 SmallVector
<StringRef
, 4> Argv
;
231 outs() << "PERF2BOLT: spawning perf job to read " << Name
<< '\n';
232 Argv
.push_back(PerfPath
.data());
234 StringRef(ArgsString
).split(Argv
, ' ');
235 Argv
.push_back("-f");
236 Argv
.push_back("-i");
237 Argv
.push_back(Filename
.c_str());
239 if (std::error_code Errc
=
240 sys::fs::createTemporaryFile("perf.script", "out", PPI
.StdoutPath
)) {
241 errs() << "PERF2BOLT: failed to create temporary file " << PPI
.StdoutPath
242 << " with error " << Errc
.message() << "\n";
245 TempFiles
.push_back(PPI
.StdoutPath
.data());
247 if (std::error_code Errc
=
248 sys::fs::createTemporaryFile("perf.script", "err", PPI
.StderrPath
)) {
249 errs() << "PERF2BOLT: failed to create temporary file " << PPI
.StderrPath
250 << " with error " << Errc
.message() << "\n";
253 TempFiles
.push_back(PPI
.StderrPath
.data());
255 std::optional
<StringRef
> Redirects
[] = {
256 std::nullopt
, // Stdin
257 StringRef(PPI
.StdoutPath
.data()), // Stdout
258 StringRef(PPI
.StderrPath
.data())}; // Stderr
261 dbgs() << "Launching perf: ";
262 for (StringRef Arg
: Argv
)
263 dbgs() << Arg
<< " ";
264 dbgs() << " 1> " << PPI
.StdoutPath
.data() << " 2> " << PPI
.StderrPath
.data()
269 PPI
.PI
.ReturnCode
= sys::ExecuteAndWait(PerfPath
.data(), Argv
,
270 /*envp*/ std::nullopt
, Redirects
);
272 PPI
.PI
= sys::ExecuteNoWait(PerfPath
.data(), Argv
, /*envp*/ std::nullopt
,
276 void DataAggregator::processFileBuildID(StringRef FileBuildID
) {
277 PerfProcessInfo BuildIDProcessInfo
;
278 launchPerfProcess("buildid list",
283 if (BuildIDProcessInfo
.PI
.ReturnCode
!= 0) {
284 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
285 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo
.StderrPath
.data());
286 StringRef ErrBuf
= (*MB
)->getBuffer();
288 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo
.PI
.ReturnCode
294 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
295 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo
.StdoutPath
.data());
296 if (std::error_code EC
= MB
.getError()) {
297 errs() << "Cannot open " << BuildIDProcessInfo
.StdoutPath
.data() << ": "
298 << EC
.message() << "\n";
302 FileBuf
= std::move(*MB
);
303 ParsingBuf
= FileBuf
->getBuffer();
305 std::optional
<StringRef
> FileName
= getFileNameForBuildID(FileBuildID
);
307 if (hasAllBuildIDs()) {
308 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
309 "This indicates the input binary supplied for data aggregation "
310 "is not the same recorded by perf when collecting profiling "
311 "data, or there were no samples recorded for the binary. "
312 "Use -ignore-build-id option to override.\n";
313 if (!opts::IgnoreBuildID
)
316 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
317 "data was recorded without it\n";
320 } else if (*FileName
!= llvm::sys::path::filename(BC
->getFilename())) {
321 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
322 BuildIDBinaryName
= std::string(*FileName
);
324 outs() << "PERF2BOLT: matched build-id and file name\n";
328 bool DataAggregator::checkPerfDataMagic(StringRef FileName
) {
329 if (opts::ReadPreAggregated
)
332 Expected
<sys::fs::file_t
> FD
= sys::fs::openNativeFileForRead(FileName
);
334 consumeError(FD
.takeError());
338 char Buf
[7] = {0, 0, 0, 0, 0, 0, 0};
340 auto Close
= make_scope_exit([&] { sys::fs::closeFile(*FD
); });
341 Expected
<size_t> BytesRead
= sys::fs::readNativeFileSlice(
342 *FD
, MutableArrayRef(Buf
, sizeof(Buf
)), 0);
344 consumeError(BytesRead
.takeError());
351 if (strncmp(Buf
, "PERFILE", 7) == 0)
356 void DataAggregator::parsePreAggregated() {
359 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
360 MemoryBuffer::getFileOrSTDIN(Filename
);
361 if (std::error_code EC
= MB
.getError()) {
362 errs() << "PERF2BOLT-ERROR: cannot open " << Filename
<< ": "
363 << EC
.message() << "\n";
367 FileBuf
= std::move(*MB
);
368 ParsingBuf
= FileBuf
->getBuffer();
371 if (parsePreAggregatedLBRSamples()) {
372 errs() << "PERF2BOLT: failed to parse samples\n";
377 std::error_code
DataAggregator::writeAutoFDOData(StringRef OutputFilename
) {
378 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
379 NamedRegionTimer
T("writeAutoFDO", "Processing branch events", TimerGroupName
,
380 TimerGroupDesc
, opts::TimeAggregator
);
383 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
388 // number of unique traces
389 // from_1-to_1:count_1
390 // from_2-to_2:count_2
392 // from_n-to_n:count_n
393 // number of unique sample addresses
398 // number of unique LBR entries
399 // src_1->dst_1:count_1
400 // src_2->dst_2:count_2
402 // src_n->dst_n:count_n
404 const uint64_t FirstAllocAddress
= this->BC
->FirstAllocAddress
;
406 // AutoFDO addresses are relative to the first allocated loadable program
408 auto filterAddress
= [&FirstAllocAddress
](uint64_t Address
) -> uint64_t {
409 if (Address
< FirstAllocAddress
)
411 return Address
- FirstAllocAddress
;
414 OutFile
<< FallthroughLBRs
.size() << "\n";
415 for (const auto &[Trace
, Info
] : FallthroughLBRs
) {
416 OutFile
<< formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace
.From
),
417 filterAddress(Trace
.To
),
418 Info
.InternCount
+ Info
.ExternCount
);
421 OutFile
<< BasicSamples
.size() << "\n";
422 for (const auto [PC
, HitCount
] : BasicSamples
)
423 OutFile
<< formatv("{0:x-}:{1}\n", filterAddress(PC
), HitCount
);
425 OutFile
<< BranchLBRs
.size() << "\n";
426 for (const auto &[Trace
, Info
] : BranchLBRs
) {
427 OutFile
<< formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace
.From
),
428 filterAddress(Trace
.To
), Info
.TakenCount
);
431 outs() << "PERF2BOLT: wrote " << FallthroughLBRs
.size() << " unique traces, "
432 << BasicSamples
.size() << " sample addresses and " << BranchLBRs
.size()
433 << " unique branches to " << OutputFilename
<< "\n";
435 return std::error_code();
438 void DataAggregator::filterBinaryMMapInfo() {
439 if (opts::FilterPID
) {
440 auto MMapInfoIter
= BinaryMMapInfo
.find(opts::FilterPID
);
441 if (MMapInfoIter
!= BinaryMMapInfo
.end()) {
442 MMapInfo MMap
= MMapInfoIter
->second
;
443 BinaryMMapInfo
.clear();
444 BinaryMMapInfo
.insert(std::make_pair(MMap
.PID
, MMap
));
446 if (errs().has_colors())
447 errs().changeColor(raw_ostream::RED
);
448 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
449 << opts::FilterPID
<< "\""
450 << " for binary \"" << BC
->getFilename() << "\".";
451 assert(!BinaryMMapInfo
.empty() && "No memory map for matching binary");
452 errs() << " Profile for the following process is available:\n";
453 for (std::pair
<const uint64_t, MMapInfo
> &MMI
: BinaryMMapInfo
)
454 outs() << " " << MMI
.second
.PID
455 << (MMI
.second
.Forked
? " (forked)\n" : "\n");
457 if (errs().has_colors())
465 int DataAggregator::prepareToParse(StringRef Name
, PerfProcessInfo
&Process
,
466 PerfProcessErrorCallbackTy Callback
) {
468 outs() << "PERF2BOLT: waiting for perf " << Name
469 << " collection to finish...\n";
470 sys::ProcessInfo PI
= sys::Wait(Process
.PI
, std::nullopt
, &Error
);
472 if (!Error
.empty()) {
473 errs() << "PERF-ERROR: " << PerfPath
<< ": " << Error
<< "\n";
478 if (PI
.ReturnCode
!= 0) {
479 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> ErrorMB
=
480 MemoryBuffer::getFileOrSTDIN(Process
.StderrPath
.data());
481 StringRef ErrBuf
= (*ErrorMB
)->getBuffer();
484 Callback(PI
.ReturnCode
, ErrBuf
);
485 return PI
.ReturnCode
;
488 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
489 MemoryBuffer::getFileOrSTDIN(Process
.StdoutPath
.data());
490 if (std::error_code EC
= MB
.getError()) {
491 errs() << "Cannot open " << Process
.StdoutPath
.data() << ": "
492 << EC
.message() << "\n";
497 FileBuf
= std::move(*MB
);
498 ParsingBuf
= FileBuf
->getBuffer();
501 return PI
.ReturnCode
;
504 Error
DataAggregator::preprocessProfile(BinaryContext
&BC
) {
507 if (opts::ReadPreAggregated
) {
508 parsePreAggregated();
509 return Error::success();
512 if (std::optional
<StringRef
> FileBuildID
= BC
.getFileBuildID()) {
513 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID
<< "\n";
514 processFileBuildID(*FileBuildID
);
516 errs() << "BOLT-WARNING: build-id will not be checked because we could "
517 "not read one from input binary\n";
520 auto ErrorCallback
= [](int ReturnCode
, StringRef ErrBuf
) {
521 errs() << "PERF-ERROR: return code " << ReturnCode
<< "\n" << ErrBuf
;
525 auto MemEventsErrorCallback
= [&](int ReturnCode
, StringRef ErrBuf
) {
526 Regex
NoData("Samples for '.*' event do not have ADDR attribute set. "
527 "Cannot print 'addr' field.");
528 if (!NoData
.match(ErrBuf
))
529 ErrorCallback(ReturnCode
, ErrBuf
);
532 if (BC
.IsLinuxKernel
) {
533 // Current MMap parsing logic does not work with linux kernel.
534 // MMap entries for linux kernel uses PERF_RECORD_MMAP
535 // format instead of typical PERF_RECORD_MMAP2 format.
536 // Since linux kernel address mapping is absolute (same as
537 // in the ELF file), we avoid parsing MMap in linux kernel mode.
538 // While generating optimized linux kernel binary, we may need
539 // to parse MMap entries.
541 // In linux kernel mode, we analyze and optimize
542 // all linux kernel binary instructions, irrespective
543 // of whether they are due to system calls or due to
544 // interrupts. Therefore, we cannot ignore interrupt
545 // in Linux kernel mode.
546 opts::IgnoreInterruptLBR
= false;
548 prepareToParse("mmap events", MMapEventsPPI
, ErrorCallback
);
549 if (parseMMapEvents())
550 errs() << "PERF2BOLT: failed to parse mmap events\n";
553 prepareToParse("task events", TaskEventsPPI
, ErrorCallback
);
554 if (parseTaskEvents())
555 errs() << "PERF2BOLT: failed to parse task events\n";
557 filterBinaryMMapInfo();
558 prepareToParse("events", MainEventsPPI
, ErrorCallback
);
560 if (opts::HeatmapMode
) {
561 if (std::error_code EC
= printLBRHeatMap()) {
562 errs() << "ERROR: failed to print heat map: " << EC
.message() << '\n';
568 if ((!opts::BasicAggregation
&& parseBranchEvents()) ||
569 (opts::BasicAggregation
&& parseBasicEvents()))
570 errs() << "PERF2BOLT: failed to parse samples\n";
572 // We can finish early if the goal is just to generate data for autofdo
573 if (opts::WriteAutoFDOData
) {
574 if (std::error_code EC
= writeAutoFDOData(opts::OutputFilename
))
575 errs() << "Error writing autofdo data to file: " << EC
.message() << "\n";
581 // Special handling for memory events
582 if (prepareToParse("mem events", MemEventsPPI
, MemEventsErrorCallback
))
583 return Error::success();
585 if (const std::error_code EC
= parseMemEvents())
586 errs() << "PERF2BOLT: failed to parse memory events: " << EC
.message()
591 return Error::success();
594 Error
DataAggregator::readProfile(BinaryContext
&BC
) {
597 for (auto &BFI
: BC
.getBinaryFunctions()) {
598 BinaryFunction
&Function
= BFI
.second
;
599 convertBranchData(Function
);
602 if (opts::AggregateOnly
) {
603 if (opts::ProfileFormat
== opts::ProfileFormatKind::PF_Fdata
)
604 if (std::error_code EC
= writeAggregatedFile(opts::OutputFilename
))
605 report_error("cannot create output data file", EC
);
607 // BAT YAML is handled by DataAggregator since normal YAML output requires
608 // CFG which is not available in BAT mode.
610 if (opts::ProfileFormat
== opts::ProfileFormatKind::PF_YAML
)
611 if (std::error_code EC
= writeBATYAML(BC
, opts::OutputFilename
))
612 report_error("cannot create output data file", EC
);
613 if (!opts::SaveProfile
.empty())
614 if (std::error_code EC
= writeBATYAML(BC
, opts::SaveProfile
))
615 report_error("cannot create output data file", EC
);
619 return Error::success();
622 bool DataAggregator::mayHaveProfileData(const BinaryFunction
&Function
) {
623 return Function
.hasProfileAvailable();
626 void DataAggregator::processProfile(BinaryContext
&BC
) {
627 if (opts::ReadPreAggregated
)
628 processPreAggregated();
629 else if (opts::BasicAggregation
)
630 processBasicEvents();
632 processBranchEvents();
636 // Mark all functions with registered events as having a valid profile.
637 const auto Flags
= opts::BasicAggregation
? BinaryFunction::PF_SAMPLE
638 : BinaryFunction::PF_LBR
;
639 for (auto &BFI
: BC
.getBinaryFunctions()) {
640 BinaryFunction
&BF
= BFI
.second
;
641 FuncBranchData
*FBD
= getBranchData(BF
);
642 if (FBD
|| getFuncSampleData(BF
.getNames())) {
643 BF
.markProfiled(Flags
);
645 BF
.RawBranchCount
= FBD
->getNumExecutedBranches();
649 for (auto &FuncBranches
: NamesToBranches
)
650 llvm::stable_sort(FuncBranches
.second
.Data
);
652 for (auto &MemEvents
: NamesToMemEvents
)
653 llvm::stable_sort(MemEvents
.second
.Data
);
655 // Release intermediate storage.
657 clear(FallthroughLBRs
);
658 clear(AggregatedLBRs
);
664 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address
) const {
665 if (!BC
->containsAddress(Address
))
668 return BC
->getBinaryFunctionContainingAddress(Address
, /*CheckPastEnd=*/false,
669 /*UseMaxSize=*/true);
673 DataAggregator::getBATParentFunction(const BinaryFunction
&Func
) const {
675 if (const uint64_t HotAddr
= BAT
->fetchParentAddress(Func
.getAddress()))
676 return getBinaryFunctionContainingAddress(HotAddr
);
680 StringRef
DataAggregator::getLocationName(const BinaryFunction
&Func
,
683 return Func
.getOneName();
685 const BinaryFunction
*OrigFunc
= &Func
;
686 // If it is a local function, prefer the name containing the file name where
687 // the local function was declared
688 for (StringRef AlternativeName
: OrigFunc
->getNames()) {
689 size_t FileNameIdx
= AlternativeName
.find('/');
690 // Confirm the alternative name has the pattern Symbol/FileName/1 before
692 if (FileNameIdx
== StringRef::npos
||
693 AlternativeName
.find('/', FileNameIdx
+ 1) == StringRef::npos
)
695 return AlternativeName
;
697 return OrigFunc
->getOneName();
700 bool DataAggregator::doSample(BinaryFunction
&OrigFunc
, uint64_t Address
,
702 BinaryFunction
*ParentFunc
= getBATParentFunction(OrigFunc
);
703 BinaryFunction
&Func
= ParentFunc
? *ParentFunc
: OrigFunc
;
705 NumColdSamples
+= Count
;
707 auto I
= NamesToSamples
.find(Func
.getOneName());
708 if (I
== NamesToSamples
.end()) {
710 StringRef LocName
= getLocationName(Func
, BAT
);
711 std::tie(I
, Success
) = NamesToSamples
.insert(
712 std::make_pair(Func
.getOneName(),
713 FuncSampleData(LocName
, FuncSampleData::ContainerTy())));
716 Address
-= Func
.getAddress();
718 Address
= BAT
->translate(Func
.getAddress(), Address
, /*IsBranchSrc=*/false);
720 I
->second
.bumpCount(Address
, Count
);
724 bool DataAggregator::doIntraBranch(BinaryFunction
&Func
, uint64_t From
,
725 uint64_t To
, uint64_t Count
,
727 FuncBranchData
*AggrData
= getBranchData(Func
);
729 AggrData
= &NamesToBranches
[Func
.getOneName()];
730 AggrData
->Name
= getLocationName(Func
, BAT
);
731 setBranchData(Func
, AggrData
);
734 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
735 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func
, From
, To
));
736 AggrData
->bumpBranchCount(From
, To
, Count
, Mispreds
);
740 bool DataAggregator::doInterBranch(BinaryFunction
*FromFunc
,
741 BinaryFunction
*ToFunc
, uint64_t From
,
742 uint64_t To
, uint64_t Count
,
744 FuncBranchData
*FromAggrData
= nullptr;
745 FuncBranchData
*ToAggrData
= nullptr;
749 SrcFunc
= getLocationName(*FromFunc
, BAT
);
750 FromAggrData
= getBranchData(*FromFunc
);
752 FromAggrData
= &NamesToBranches
[FromFunc
->getOneName()];
753 FromAggrData
->Name
= SrcFunc
;
754 setBranchData(*FromFunc
, FromAggrData
);
757 recordExit(*FromFunc
, From
, Mispreds
, Count
);
760 DstFunc
= getLocationName(*ToFunc
, BAT
);
761 ToAggrData
= getBranchData(*ToFunc
);
763 ToAggrData
= &NamesToBranches
[ToFunc
->getOneName()];
764 ToAggrData
->Name
= DstFunc
;
765 setBranchData(*ToFunc
, ToAggrData
);
768 recordEntry(*ToFunc
, To
, Mispreds
, Count
);
772 FromAggrData
->bumpCallCount(From
, Location(!DstFunc
.empty(), DstFunc
, To
),
775 ToAggrData
->bumpEntryCount(Location(!SrcFunc
.empty(), SrcFunc
, From
), To
,
780 bool DataAggregator::doBranch(uint64_t From
, uint64_t To
, uint64_t Count
,
781 uint64_t Mispreds
, bool IsPreagg
) {
782 // Returns whether \p Offset in \p Func contains a return instruction.
783 auto checkReturn
= [&](const BinaryFunction
&Func
, const uint64_t Offset
) {
784 auto isReturn
= [&](auto MI
) { return MI
&& BC
->MIB
->isReturn(*MI
); };
785 return Func
.hasInstructions()
786 ? isReturn(Func
.getInstructionAtOffset(Offset
))
787 : isReturn(Func
.disassembleInstructionAtOffset(Offset
));
790 // Returns whether \p Offset in \p Func may be a call continuation excluding
791 // entry points and landing pads.
792 auto checkCallCont
= [&](const BinaryFunction
&Func
, const uint64_t Offset
) {
793 // No call continuation at a function start.
797 // FIXME: support BAT case where the function might be in empty state
798 // (split fragments declared non-simple).
802 // The offset should not be an entry point or a landing pad.
803 const BinaryBasicBlock
*ContBB
= Func
.getBasicBlockAtOffset(Offset
);
804 return ContBB
&& !ContBB
->isEntryPoint() && !ContBB
->isLandingPad();
807 // Mutates \p Addr to an offset into the containing function, performing BAT
808 // offset translation and parent lookup.
810 // Returns the containing function (or BAT parent) and whether the address
811 // corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
812 auto handleAddress
= [&](uint64_t &Addr
, bool IsFrom
) {
813 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(Addr
);
815 return std::pair
{Func
, false};
817 Addr
-= Func
->getAddress();
819 bool IsRetOrCallCont
=
820 IsFrom
? checkReturn(*Func
, Addr
) : checkCallCont(*Func
, Addr
);
823 Addr
= BAT
->translate(Func
->getAddress(), Addr
, IsFrom
);
825 BinaryFunction
*ParentFunc
= getBATParentFunction(*Func
);
827 return std::pair
{Func
, IsRetOrCallCont
};
830 NumColdSamples
+= Count
;
832 return std::pair
{ParentFunc
, IsRetOrCallCont
};
835 uint64_t ToOrig
= To
;
836 auto [FromFunc
, IsReturn
] = handleAddress(From
, /*IsFrom*/ true);
837 auto [ToFunc
, IsCallCont
] = handleAddress(To
, /*IsFrom*/ false);
838 if (!FromFunc
&& !ToFunc
)
841 // Record call to continuation trace.
842 if (IsPreagg
&& FromFunc
!= ToFunc
&& (IsReturn
|| IsCallCont
)) {
843 LBREntry First
{ToOrig
- 1, ToOrig
- 1, false};
844 LBREntry Second
{ToOrig
, ToOrig
, false};
845 return doTrace(First
, Second
, Count
);
851 // Treat recursive control transfers as inter-branches.
852 if (FromFunc
== ToFunc
&& To
!= 0) {
853 recordBranch(*FromFunc
, From
, To
, Count
, Mispreds
);
854 return doIntraBranch(*FromFunc
, From
, To
, Count
, Mispreds
);
857 return doInterBranch(FromFunc
, ToFunc
, From
, To
, Count
, Mispreds
);
860 bool DataAggregator::doTrace(const LBREntry
&First
, const LBREntry
&Second
,
862 BinaryFunction
*FromFunc
= getBinaryFunctionContainingAddress(First
.To
);
863 BinaryFunction
*ToFunc
= getBinaryFunctionContainingAddress(Second
.From
);
864 if (!FromFunc
|| !ToFunc
) {
866 dbgs() << "Out of range trace starting in ";
868 dbgs() << formatv("{0} @ {1:x}", *FromFunc
,
869 First
.To
- FromFunc
->getAddress());
871 dbgs() << Twine::utohexstr(First
.To
);
872 dbgs() << " and ending in ";
874 dbgs() << formatv("{0} @ {1:x}", *ToFunc
,
875 Second
.From
- ToFunc
->getAddress());
877 dbgs() << Twine::utohexstr(Second
.From
);
880 NumLongRangeTraces
+= Count
;
883 if (FromFunc
!= ToFunc
) {
884 NumInvalidTraces
+= Count
;
886 dbgs() << "Invalid trace starting in " << FromFunc
->getPrintName()
887 << formatv(" @ {0:x}", First
.To
- FromFunc
->getAddress())
888 << " and ending in " << ToFunc
->getPrintName()
889 << formatv(" @ {0:x}\n", Second
.From
- ToFunc
->getAddress());
894 // Set ParentFunc to BAT parent function or FromFunc itself.
895 BinaryFunction
*ParentFunc
= getBATParentFunction(*FromFunc
);
897 ParentFunc
= FromFunc
;
898 ParentFunc
->SampleCountInBytes
+= Count
* (Second
.From
- First
.To
);
900 std::optional
<BoltAddressTranslation::FallthroughListTy
> FTs
=
901 BAT
? BAT
->getFallthroughsInTrace(FromFunc
->getAddress(), First
.To
,
903 : getFallthroughsInTrace(*FromFunc
, First
, Second
, Count
);
906 dbgs() << "Invalid trace starting in " << FromFunc
->getPrintName()
907 << " @ " << Twine::utohexstr(First
.To
- FromFunc
->getAddress())
908 << " and ending in " << ToFunc
->getPrintName() << " @ "
909 << ToFunc
->getPrintName() << " @ "
910 << Twine::utohexstr(Second
.From
- ToFunc
->getAddress()) << '\n');
911 NumInvalidTraces
+= Count
;
915 LLVM_DEBUG(dbgs() << "Processing " << FTs
->size() << " fallthroughs for "
916 << FromFunc
->getPrintName() << ":"
917 << Twine::utohexstr(First
.To
) << " to "
918 << Twine::utohexstr(Second
.From
) << ".\n");
919 for (auto [From
, To
] : *FTs
) {
921 From
= BAT
->translate(FromFunc
->getAddress(), From
, /*IsBranchSrc=*/true);
922 To
= BAT
->translate(FromFunc
->getAddress(), To
, /*IsBranchSrc=*/false);
924 doIntraBranch(*ParentFunc
, From
, To
, Count
, false);
930 std::optional
<SmallVector
<std::pair
<uint64_t, uint64_t>, 16>>
931 DataAggregator::getFallthroughsInTrace(BinaryFunction
&BF
,
932 const LBREntry
&FirstLBR
,
933 const LBREntry
&SecondLBR
,
934 uint64_t Count
) const {
935 SmallVector
<std::pair
<uint64_t, uint64_t>, 16> Branches
;
937 BinaryContext
&BC
= BF
.getBinaryContext();
942 assert(BF
.hasCFG() && "can only record traces in CFG state");
944 // Offsets of the trace within this function.
945 const uint64_t From
= FirstLBR
.To
- BF
.getAddress();
946 const uint64_t To
= SecondLBR
.From
- BF
.getAddress();
951 const BinaryBasicBlock
*FromBB
= BF
.getBasicBlockContainingOffset(From
);
952 const BinaryBasicBlock
*ToBB
= BF
.getBasicBlockContainingOffset(To
);
954 if (!FromBB
|| !ToBB
)
957 // Adjust FromBB if the first LBR is a return from the last instruction in
958 // the previous block (that instruction should be a call).
959 if (From
== FromBB
->getOffset() && !BF
.containsAddress(FirstLBR
.From
) &&
960 !FromBB
->isEntryPoint() && !FromBB
->isLandingPad()) {
961 const BinaryBasicBlock
*PrevBB
=
962 BF
.getLayout().getBlock(FromBB
->getIndex() - 1);
963 if (PrevBB
->getSuccessor(FromBB
->getLabel())) {
964 const MCInst
*Instr
= PrevBB
->getLastNonPseudoInstr();
965 if (Instr
&& BC
.MIB
->isCall(*Instr
))
968 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
971 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR
<< '\n');
975 // Fill out information for fall-through edges. The From and To could be
976 // within the same basic block, e.g. when two call instructions are in the
977 // same block. In this case we skip the processing.
981 // Process blocks in the original layout order.
982 BinaryBasicBlock
*BB
= BF
.getLayout().getBlock(FromBB
->getIndex());
983 assert(BB
== FromBB
&& "index mismatch");
985 BinaryBasicBlock
*NextBB
= BF
.getLayout().getBlock(BB
->getIndex() + 1);
986 assert((NextBB
&& NextBB
->getOffset() > BB
->getOffset()) && "bad layout");
988 // Check for bad LBRs.
989 if (!BB
->getSuccessor(NextBB
->getLabel())) {
990 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
991 << " " << FirstLBR
<< '\n'
992 << " " << SecondLBR
<< '\n');
996 const MCInst
*Instr
= BB
->getLastNonPseudoInstr();
999 Offset
= BC
.MIB
->getOffsetWithDefault(*Instr
, 0);
1001 Offset
= BB
->getOffset();
1003 Branches
.emplace_back(Offset
, NextBB
->getOffset());
1008 // Record fall-through jumps
1009 for (const auto &[FromOffset
, ToOffset
] : Branches
) {
1010 BinaryBasicBlock
*FromBB
= BF
.getBasicBlockContainingOffset(FromOffset
);
1011 BinaryBasicBlock
*ToBB
= BF
.getBasicBlockAtOffset(ToOffset
);
1012 assert(FromBB
&& ToBB
);
1013 BinaryBasicBlock::BinaryBranchInfo
&BI
= FromBB
->getBranchInfo(*ToBB
);
1020 bool DataAggregator::recordEntry(BinaryFunction
&BF
, uint64_t To
, bool Mispred
,
1021 uint64_t Count
) const {
1022 if (To
> BF
.getSize())
1025 if (!BF
.hasProfile())
1026 BF
.ExecutionCount
= 0;
1028 BinaryBasicBlock
*EntryBB
= nullptr;
1030 BF
.ExecutionCount
+= Count
;
1032 EntryBB
= &BF
.front();
1033 } else if (BinaryBasicBlock
*BB
= BF
.getBasicBlockAtOffset(To
)) {
1034 if (BB
->isEntryPoint())
1039 EntryBB
->setExecutionCount(EntryBB
->getKnownExecutionCount() + Count
);
1044 bool DataAggregator::recordExit(BinaryFunction
&BF
, uint64_t From
, bool Mispred
,
1045 uint64_t Count
) const {
1046 if (!BF
.isSimple() || From
> BF
.getSize())
1049 if (!BF
.hasProfile())
1050 BF
.ExecutionCount
= 0;
1055 ErrorOr
<LBREntry
> DataAggregator::parseLBREntry() {
1057 ErrorOr
<StringRef
> FromStrRes
= parseString('/');
1058 if (std::error_code EC
= FromStrRes
.getError())
1060 StringRef OffsetStr
= FromStrRes
.get();
1061 if (OffsetStr
.getAsInteger(0, Res
.From
)) {
1062 reportError("expected hexadecimal number with From address");
1063 Diag
<< "Found: " << OffsetStr
<< "\n";
1064 return make_error_code(llvm::errc::io_error
);
1067 ErrorOr
<StringRef
> ToStrRes
= parseString('/');
1068 if (std::error_code EC
= ToStrRes
.getError())
1070 OffsetStr
= ToStrRes
.get();
1071 if (OffsetStr
.getAsInteger(0, Res
.To
)) {
1072 reportError("expected hexadecimal number with To address");
1073 Diag
<< "Found: " << OffsetStr
<< "\n";
1074 return make_error_code(llvm::errc::io_error
);
1077 ErrorOr
<StringRef
> MispredStrRes
= parseString('/');
1078 if (std::error_code EC
= MispredStrRes
.getError())
1080 StringRef MispredStr
= MispredStrRes
.get();
1081 if (MispredStr
.size() != 1 ||
1082 (MispredStr
[0] != 'P' && MispredStr
[0] != 'M' && MispredStr
[0] != '-')) {
1083 reportError("expected single char for mispred bit");
1084 Diag
<< "Found: " << MispredStr
<< "\n";
1085 return make_error_code(llvm::errc::io_error
);
1087 Res
.Mispred
= MispredStr
[0] == 'M';
1089 static bool MispredWarning
= true;
1090 if (MispredStr
[0] == '-' && MispredWarning
) {
1091 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1092 MispredWarning
= false;
1095 ErrorOr
<StringRef
> Rest
= parseString(FieldSeparator
, true);
1096 if (std::error_code EC
= Rest
.getError())
1098 if (Rest
.get().size() < 5) {
1099 reportError("expected rest of LBR entry");
1100 Diag
<< "Found: " << Rest
.get() << "\n";
1101 return make_error_code(llvm::errc::io_error
);
1106 bool DataAggregator::checkAndConsumeFS() {
1107 if (ParsingBuf
[0] != FieldSeparator
)
1110 ParsingBuf
= ParsingBuf
.drop_front(1);
1115 void DataAggregator::consumeRestOfLine() {
1116 size_t LineEnd
= ParsingBuf
.find_first_of('\n');
1117 if (LineEnd
== StringRef::npos
) {
1118 ParsingBuf
= StringRef();
1123 ParsingBuf
= ParsingBuf
.drop_front(LineEnd
+ 1);
1128 bool DataAggregator::checkNewLine() {
1129 return ParsingBuf
[0] == '\n';
1132 ErrorOr
<DataAggregator::PerfBranchSample
> DataAggregator::parseBranchSample() {
1133 PerfBranchSample Res
;
1135 while (checkAndConsumeFS()) {
1138 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1139 if (std::error_code EC
= PIDRes
.getError())
1141 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1142 if (!BC
->IsLinuxKernel
&& MMapInfoIter
== BinaryMMapInfo
.end()) {
1143 consumeRestOfLine();
1144 return make_error_code(errc::no_such_process
);
1147 while (checkAndConsumeFS()) {
1150 ErrorOr
<uint64_t> PCRes
= parseHexField(FieldSeparator
, true);
1151 if (std::error_code EC
= PCRes
.getError())
1153 Res
.PC
= PCRes
.get();
1155 if (checkAndConsumeNewLine())
1158 while (!checkAndConsumeNewLine()) {
1159 checkAndConsumeFS();
1161 ErrorOr
<LBREntry
> LBRRes
= parseLBREntry();
1162 if (std::error_code EC
= LBRRes
.getError())
1164 LBREntry LBR
= LBRRes
.get();
1165 if (ignoreKernelInterrupt(LBR
))
1167 if (!BC
->HasFixedLoadAddress
)
1168 adjustLBR(LBR
, MMapInfoIter
->second
);
1169 Res
.LBR
.push_back(LBR
);
1175 ErrorOr
<DataAggregator::PerfBasicSample
> DataAggregator::parseBasicSample() {
1176 while (checkAndConsumeFS()) {
1179 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1180 if (std::error_code EC
= PIDRes
.getError())
1183 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1184 if (MMapInfoIter
== BinaryMMapInfo
.end()) {
1185 consumeRestOfLine();
1186 return PerfBasicSample
{StringRef(), 0};
1189 while (checkAndConsumeFS()) {
1192 ErrorOr
<StringRef
> Event
= parseString(FieldSeparator
);
1193 if (std::error_code EC
= Event
.getError())
1196 while (checkAndConsumeFS()) {
1199 ErrorOr
<uint64_t> AddrRes
= parseHexField(FieldSeparator
, true);
1200 if (std::error_code EC
= AddrRes
.getError())
1203 if (!checkAndConsumeNewLine()) {
1204 reportError("expected end of line");
1205 return make_error_code(llvm::errc::io_error
);
1208 uint64_t Address
= *AddrRes
;
1209 if (!BC
->HasFixedLoadAddress
)
1210 adjustAddress(Address
, MMapInfoIter
->second
);
1212 return PerfBasicSample
{Event
.get(), Address
};
1215 ErrorOr
<DataAggregator::PerfMemSample
> DataAggregator::parseMemSample() {
1216 PerfMemSample Res
{0, 0};
1218 while (checkAndConsumeFS()) {
1221 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1222 if (std::error_code EC
= PIDRes
.getError())
1225 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1226 if (MMapInfoIter
== BinaryMMapInfo
.end()) {
1227 consumeRestOfLine();
1231 while (checkAndConsumeFS()) {
1234 ErrorOr
<StringRef
> Event
= parseString(FieldSeparator
);
1235 if (std::error_code EC
= Event
.getError())
1237 if (!Event
.get().contains("mem-loads")) {
1238 consumeRestOfLine();
1242 while (checkAndConsumeFS()) {
1245 ErrorOr
<uint64_t> AddrRes
= parseHexField(FieldSeparator
);
1246 if (std::error_code EC
= AddrRes
.getError())
1249 while (checkAndConsumeFS()) {
1252 ErrorOr
<uint64_t> PCRes
= parseHexField(FieldSeparator
, true);
1253 if (std::error_code EC
= PCRes
.getError()) {
1254 consumeRestOfLine();
1258 if (!checkAndConsumeNewLine()) {
1259 reportError("expected end of line");
1260 return make_error_code(llvm::errc::io_error
);
1263 uint64_t Address
= *AddrRes
;
1264 if (!BC
->HasFixedLoadAddress
)
1265 adjustAddress(Address
, MMapInfoIter
->second
);
1267 return PerfMemSample
{PCRes
.get(), Address
};
1270 ErrorOr
<Location
> DataAggregator::parseLocationOrOffset() {
1271 auto parseOffset
= [this]() -> ErrorOr
<Location
> {
1272 ErrorOr
<uint64_t> Res
= parseHexField(FieldSeparator
);
1273 if (std::error_code EC
= Res
.getError())
1275 return Location(Res
.get());
1278 size_t Sep
= ParsingBuf
.find_first_of(" \n");
1279 if (Sep
== StringRef::npos
)
1280 return parseOffset();
1281 StringRef LookAhead
= ParsingBuf
.substr(0, Sep
);
1282 if (!LookAhead
.contains(':'))
1283 return parseOffset();
1285 ErrorOr
<StringRef
> BuildID
= parseString(':');
1286 if (std::error_code EC
= BuildID
.getError())
1288 ErrorOr
<uint64_t> Offset
= parseHexField(FieldSeparator
);
1289 if (std::error_code EC
= Offset
.getError())
1291 return Location(true, BuildID
.get(), Offset
.get());
1294 ErrorOr
<DataAggregator::AggregatedLBREntry
>
1295 DataAggregator::parseAggregatedLBREntry() {
1296 while (checkAndConsumeFS()) {
1299 ErrorOr
<StringRef
> TypeOrErr
= parseString(FieldSeparator
);
1300 if (std::error_code EC
= TypeOrErr
.getError())
1302 auto Type
= AggregatedLBREntry::BRANCH
;
1303 if (TypeOrErr
.get() == "B") {
1304 Type
= AggregatedLBREntry::BRANCH
;
1305 } else if (TypeOrErr
.get() == "F") {
1306 Type
= AggregatedLBREntry::FT
;
1307 } else if (TypeOrErr
.get() == "f") {
1308 Type
= AggregatedLBREntry::FT_EXTERNAL_ORIGIN
;
1310 reportError("expected B, F or f");
1311 return make_error_code(llvm::errc::io_error
);
1314 while (checkAndConsumeFS()) {
1316 ErrorOr
<Location
> From
= parseLocationOrOffset();
1317 if (std::error_code EC
= From
.getError())
1320 while (checkAndConsumeFS()) {
1322 ErrorOr
<Location
> To
= parseLocationOrOffset();
1323 if (std::error_code EC
= To
.getError())
1326 while (checkAndConsumeFS()) {
1328 ErrorOr
<int64_t> Frequency
=
1329 parseNumberField(FieldSeparator
, Type
!= AggregatedLBREntry::BRANCH
);
1330 if (std::error_code EC
= Frequency
.getError())
1333 uint64_t Mispreds
= 0;
1334 if (Type
== AggregatedLBREntry::BRANCH
) {
1335 while (checkAndConsumeFS()) {
1337 ErrorOr
<int64_t> MispredsOrErr
= parseNumberField(FieldSeparator
, true);
1338 if (std::error_code EC
= MispredsOrErr
.getError())
1340 Mispreds
= static_cast<uint64_t>(MispredsOrErr
.get());
1343 if (!checkAndConsumeNewLine()) {
1344 reportError("expected end of line");
1345 return make_error_code(llvm::errc::io_error
);
1348 return AggregatedLBREntry
{From
.get(), To
.get(),
1349 static_cast<uint64_t>(Frequency
.get()), Mispreds
,
1353 bool DataAggregator::ignoreKernelInterrupt(LBREntry
&LBR
) const {
1354 return opts::IgnoreInterruptLBR
&&
1355 (LBR
.From
>= KernelBaseAddr
|| LBR
.To
>= KernelBaseAddr
);
1358 std::error_code
DataAggregator::printLBRHeatMap() {
1359 outs() << "PERF2BOLT: parse branch events...\n";
1360 NamedRegionTimer
T("parseBranch", "Parsing branch events", TimerGroupName
,
1361 TimerGroupDesc
, opts::TimeAggregator
);
1363 if (BC
->IsLinuxKernel
) {
1364 opts::HeatmapMaxAddress
= 0xffffffffffffffff;
1365 opts::HeatmapMinAddress
= KernelBaseAddr
;
1367 Heatmap
HM(opts::HeatmapBlock
, opts::HeatmapMinAddress
,
1368 opts::HeatmapMaxAddress
, getTextSections(BC
));
1369 uint64_t NumTotalSamples
= 0;
1371 if (opts::BasicAggregation
) {
1373 ErrorOr
<PerfBasicSample
> SampleRes
= parseBasicSample();
1374 if (std::error_code EC
= SampleRes
.getError()) {
1375 if (EC
== errc::no_such_process
)
1379 PerfBasicSample
&Sample
= SampleRes
.get();
1380 HM
.registerAddress(Sample
.PC
);
1383 outs() << "HEATMAP: read " << NumTotalSamples
<< " basic samples\n";
1386 ErrorOr
<PerfBranchSample
> SampleRes
= parseBranchSample();
1387 if (std::error_code EC
= SampleRes
.getError()) {
1388 if (EC
== errc::no_such_process
)
1393 PerfBranchSample
&Sample
= SampleRes
.get();
1395 // LBRs are stored in reverse execution order. NextLBR refers to the next
1396 // executed branch record.
1397 const LBREntry
*NextLBR
= nullptr;
1398 for (const LBREntry
&LBR
: Sample
.LBR
) {
1400 // Record fall-through trace.
1401 const uint64_t TraceFrom
= LBR
.To
;
1402 const uint64_t TraceTo
= NextLBR
->From
;
1403 ++FallthroughLBRs
[Trace(TraceFrom
, TraceTo
)].InternCount
;
1407 if (!Sample
.LBR
.empty()) {
1408 HM
.registerAddress(Sample
.LBR
.front().To
);
1409 HM
.registerAddress(Sample
.LBR
.back().From
);
1411 NumTotalSamples
+= Sample
.LBR
.size();
1413 outs() << "HEATMAP: read " << NumTotalSamples
<< " LBR samples\n";
1414 outs() << "HEATMAP: " << FallthroughLBRs
.size() << " unique traces\n";
1417 if (!NumTotalSamples
) {
1418 if (opts::BasicAggregation
) {
1419 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1420 "Cannot build heatmap.";
1422 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1423 "Cannot build heatmap. Use -nl for building heatmap from "
1429 outs() << "HEATMAP: building heat map...\n";
1431 for (const auto &LBR
: FallthroughLBRs
) {
1432 const Trace
&Trace
= LBR
.first
;
1433 const FTInfo
&Info
= LBR
.second
;
1434 HM
.registerAddressRange(Trace
.From
, Trace
.To
, Info
.InternCount
);
1437 if (HM
.getNumInvalidRanges())
1438 outs() << "HEATMAP: invalid traces: " << HM
.getNumInvalidRanges() << '\n';
1441 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1445 HM
.print(opts::OutputFilename
);
1446 if (opts::OutputFilename
== "-")
1447 HM
.printCDF(opts::OutputFilename
);
1449 HM
.printCDF(opts::OutputFilename
+ ".csv");
1450 if (opts::OutputFilename
== "-")
1451 HM
.printSectionHotness(opts::OutputFilename
);
1453 HM
.printSectionHotness(opts::OutputFilename
+ "-section-hotness.csv");
1455 return std::error_code();
1458 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample
&Sample
,
1459 bool NeedsSkylakeFix
) {
1460 uint64_t NumTraces
{0};
1461 // LBRs are stored in reverse execution order. NextPC refers to the next
1462 // recorded executed PC.
1463 uint64_t NextPC
= opts::UseEventPC
? Sample
.PC
: 0;
1464 uint32_t NumEntry
= 0;
1465 for (const LBREntry
&LBR
: Sample
.LBR
) {
1467 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1468 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1469 // us to likely record an invalid trace and generate a stale function for
1470 // BAT mode (non BAT disassembles the function and is able to ignore this
1471 // trace at aggregation time). Drop first 2 entries (last two, in
1472 // chronological order)
1473 if (NeedsSkylakeFix
&& NumEntry
<= 2)
1476 // Record fall-through trace.
1477 const uint64_t TraceFrom
= LBR
.To
;
1478 const uint64_t TraceTo
= NextPC
;
1479 const BinaryFunction
*TraceBF
=
1480 getBinaryFunctionContainingAddress(TraceFrom
);
1481 if (TraceBF
&& TraceBF
->containsAddress(TraceTo
)) {
1482 FTInfo
&Info
= FallthroughLBRs
[Trace(TraceFrom
, TraceTo
)];
1483 if (TraceBF
->containsAddress(LBR
.From
))
1488 const BinaryFunction
*ToFunc
=
1489 getBinaryFunctionContainingAddress(TraceTo
);
1490 if (TraceBF
&& ToFunc
) {
1492 dbgs() << "Invalid trace starting in " << TraceBF
->getPrintName()
1493 << formatv(" @ {0:x}", TraceFrom
- TraceBF
->getAddress())
1494 << formatv(" and ending @ {0:x}\n", TraceTo
);
1499 dbgs() << "Out of range trace starting in "
1500 << (TraceBF
? TraceBF
->getPrintName() : "None")
1501 << formatv(" @ {0:x}",
1502 TraceFrom
- (TraceBF
? TraceBF
->getAddress() : 0))
1503 << " and ending in "
1504 << (ToFunc
? ToFunc
->getPrintName() : "None")
1505 << formatv(" @ {0:x}\n",
1506 TraceTo
- (ToFunc
? ToFunc
->getAddress() : 0));
1508 ++NumLongRangeTraces
;
1515 uint64_t From
= getBinaryFunctionContainingAddress(LBR
.From
) ? LBR
.From
: 0;
1516 uint64_t To
= getBinaryFunctionContainingAddress(LBR
.To
) ? LBR
.To
: 0;
1519 TakenBranchInfo
&Info
= BranchLBRs
[Trace(From
, To
)];
1521 Info
.MispredCount
+= LBR
.Mispred
;
1526 std::error_code
DataAggregator::parseBranchEvents() {
1527 outs() << "PERF2BOLT: parse branch events...\n";
1528 NamedRegionTimer
T("parseBranch", "Parsing branch events", TimerGroupName
,
1529 TimerGroupDesc
, opts::TimeAggregator
);
1531 uint64_t NumTotalSamples
= 0;
1532 uint64_t NumEntries
= 0;
1533 uint64_t NumSamples
= 0;
1534 uint64_t NumSamplesNoLBR
= 0;
1535 uint64_t NumTraces
= 0;
1536 bool NeedsSkylakeFix
= false;
1538 while (hasData() && NumTotalSamples
< opts::MaxSamples
) {
1541 ErrorOr
<PerfBranchSample
> SampleRes
= parseBranchSample();
1542 if (std::error_code EC
= SampleRes
.getError()) {
1543 if (EC
== errc::no_such_process
)
1549 PerfBranchSample
&Sample
= SampleRes
.get();
1550 if (opts::WriteAutoFDOData
)
1551 ++BasicSamples
[Sample
.PC
];
1553 if (Sample
.LBR
.empty()) {
1558 NumEntries
+= Sample
.LBR
.size();
1559 if (BAT
&& Sample
.LBR
.size() == 32 && !NeedsSkylakeFix
) {
1560 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1561 NeedsSkylakeFix
= true;
1564 NumTraces
+= parseLBRSample(Sample
, NeedsSkylakeFix
);
1567 for (const Trace
&Trace
: llvm::make_first_range(BranchLBRs
))
1568 for (const uint64_t Addr
: {Trace
.From
, Trace
.To
})
1569 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Addr
))
1570 BF
->setHasProfileAvailable();
1572 auto printColored
= [](raw_ostream
&OS
, float Percent
, float T1
, float T2
) {
1574 if (OS
.has_colors()) {
1576 OS
.changeColor(raw_ostream::RED
);
1577 else if (Percent
> T1
)
1578 OS
.changeColor(raw_ostream::YELLOW
);
1580 OS
.changeColor(raw_ostream::GREEN
);
1582 OS
<< format("%.1f%%", Percent
);
1583 if (OS
.has_colors())
1588 outs() << "PERF2BOLT: read " << NumSamples
<< " samples and " << NumEntries
1589 << " LBR entries\n";
1590 if (NumTotalSamples
) {
1591 if (NumSamples
&& NumSamplesNoLBR
== NumSamples
) {
1592 // Note: we don't know if perf2bolt is being used to parse memory samples
1593 // at this point. In this case, it is OK to parse zero LBRs.
1594 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1595 "LBR. Record profile with perf record -j any or run perf2bolt "
1596 "in no-LBR mode with -nl (the performance improvement in -nl "
1597 "mode may be limited)\n";
1599 const uint64_t IgnoredSamples
= NumTotalSamples
- NumSamples
;
1600 const float PercentIgnored
= 100.0f
* IgnoredSamples
/ NumTotalSamples
;
1601 outs() << "PERF2BOLT: " << IgnoredSamples
<< " samples";
1602 printColored(outs(), PercentIgnored
, 20, 50);
1603 outs() << " were ignored\n";
1604 if (PercentIgnored
> 50.0f
)
1605 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1606 "were attributed to the input binary\n";
1609 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1610 << NumInvalidTraces
;
1612 if (NumTraces
> 0) {
1613 Perc
= NumInvalidTraces
* 100.0f
/ NumTraces
;
1614 printColored(outs(), Perc
, 5, 10);
1618 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1619 "binary is probably not the same binary used during profiling "
1620 "collection. The generated data may be ineffective for improving "
1623 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1624 << NumLongRangeTraces
;
1626 outs() << format(" (%.1f%%)", NumLongRangeTraces
* 100.0f
/ NumTraces
);
1629 if (NumColdSamples
> 0) {
1630 const float ColdSamples
= NumColdSamples
* 100.0f
/ NumTotalSamples
;
1631 outs() << "PERF2BOLT: " << NumColdSamples
1632 << format(" (%.1f%%)", ColdSamples
)
1633 << " samples recorded in cold regions of split functions.\n";
1634 if (ColdSamples
> 5.0f
)
1636 << "WARNING: The BOLT-processed binary where samples were collected "
1637 "likely used bad data or your service observed a large shift in "
1638 "profile. You may want to audit this.\n";
1641 return std::error_code();
1644 void DataAggregator::processBranchEvents() {
1645 outs() << "PERF2BOLT: processing branch events...\n";
1646 NamedRegionTimer
T("processBranch", "Processing branch events",
1647 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1649 for (const auto &AggrLBR
: FallthroughLBRs
) {
1650 const Trace
&Loc
= AggrLBR
.first
;
1651 const FTInfo
&Info
= AggrLBR
.second
;
1652 LBREntry First
{Loc
.From
, Loc
.From
, false};
1653 LBREntry Second
{Loc
.To
, Loc
.To
, false};
1654 if (Info
.InternCount
)
1655 doTrace(First
, Second
, Info
.InternCount
);
1656 if (Info
.ExternCount
) {
1658 doTrace(First
, Second
, Info
.ExternCount
);
1662 for (const auto &AggrLBR
: BranchLBRs
) {
1663 const Trace
&Loc
= AggrLBR
.first
;
1664 const TakenBranchInfo
&Info
= AggrLBR
.second
;
1665 doBranch(Loc
.From
, Loc
.To
, Info
.TakenCount
, Info
.MispredCount
,
1666 /*IsPreagg*/ false);
1670 std::error_code
DataAggregator::parseBasicEvents() {
1671 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1672 NamedRegionTimer
T("parseBasic", "Parsing basic events", TimerGroupName
,
1673 TimerGroupDesc
, opts::TimeAggregator
);
1675 ErrorOr
<PerfBasicSample
> Sample
= parseBasicSample();
1676 if (std::error_code EC
= Sample
.getError())
1682 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Sample
->PC
))
1683 BF
->setHasProfileAvailable();
1685 ++BasicSamples
[Sample
->PC
];
1686 EventNames
.insert(Sample
->EventName
);
1689 return std::error_code();
1692 void DataAggregator::processBasicEvents() {
1693 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1694 NamedRegionTimer
T("processBasic", "Processing basic events", TimerGroupName
,
1695 TimerGroupDesc
, opts::TimeAggregator
);
1696 uint64_t OutOfRangeSamples
= 0;
1697 uint64_t NumSamples
= 0;
1698 for (auto &Sample
: BasicSamples
) {
1699 const uint64_t PC
= Sample
.first
;
1700 const uint64_t HitCount
= Sample
.second
;
1701 NumSamples
+= HitCount
;
1702 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(PC
);
1704 OutOfRangeSamples
+= HitCount
;
1708 doSample(*Func
, PC
, HitCount
);
1710 outs() << "PERF2BOLT: read " << NumSamples
<< " samples\n";
1712 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1713 << OutOfRangeSamples
;
1715 if (NumSamples
> 0) {
1717 Perc
= OutOfRangeSamples
* 100.0f
/ NumSamples
;
1718 if (outs().has_colors()) {
1720 outs().changeColor(raw_ostream::RED
);
1721 else if (Perc
> 40.0f
)
1722 outs().changeColor(raw_ostream::YELLOW
);
1724 outs().changeColor(raw_ostream::GREEN
);
1726 outs() << format("%.1f%%", Perc
);
1727 if (outs().has_colors())
1728 outs().resetColor();
1733 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1734 "binary is probably not the same binary used during profiling "
1735 "collection. The generated data may be ineffective for improving "
1739 std::error_code
DataAggregator::parseMemEvents() {
1740 outs() << "PERF2BOLT: parsing memory events...\n";
1741 NamedRegionTimer
T("parseMemEvents", "Parsing mem events", TimerGroupName
,
1742 TimerGroupDesc
, opts::TimeAggregator
);
1744 ErrorOr
<PerfMemSample
> Sample
= parseMemSample();
1745 if (std::error_code EC
= Sample
.getError())
1748 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Sample
->PC
))
1749 BF
->setHasProfileAvailable();
1751 MemSamples
.emplace_back(std::move(Sample
.get()));
1754 return std::error_code();
1757 void DataAggregator::processMemEvents() {
1758 NamedRegionTimer
T("ProcessMemEvents", "Processing mem events",
1759 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1760 for (const PerfMemSample
&Sample
: MemSamples
) {
1761 uint64_t PC
= Sample
.PC
;
1762 uint64_t Addr
= Sample
.Addr
;
1766 // Try to resolve symbol for PC
1767 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(PC
);
1769 LLVM_DEBUG(if (PC
!= 0) {
1770 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC
, Addr
);
1775 FuncName
= Func
->getOneName();
1776 PC
-= Func
->getAddress();
1778 // Try to resolve symbol for memory load
1779 if (BinaryData
*BD
= BC
->getBinaryDataContainingAddress(Addr
)) {
1780 MemName
= BD
->getName();
1781 Addr
-= BD
->getAddress();
1782 } else if (opts::FilterMemProfile
) {
1783 // Filter out heap/stack accesses
1787 const Location
FuncLoc(!FuncName
.empty(), FuncName
, PC
);
1788 const Location
AddrLoc(!MemName
.empty(), MemName
, Addr
);
1790 FuncMemData
*MemData
= &NamesToMemEvents
[FuncName
];
1791 MemData
->Name
= FuncName
;
1792 setMemData(*Func
, MemData
);
1793 MemData
->update(FuncLoc
, AddrLoc
);
1794 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc
<< " = " << AddrLoc
<< "\n");
1798 std::error_code
DataAggregator::parsePreAggregatedLBRSamples() {
1799 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1800 NamedRegionTimer
T("parseAggregated", "Parsing aggregated branch events",
1801 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1803 ErrorOr
<AggregatedLBREntry
> AggrEntry
= parseAggregatedLBREntry();
1804 if (std::error_code EC
= AggrEntry
.getError())
1807 for (const uint64_t Addr
: {AggrEntry
->From
.Offset
, AggrEntry
->To
.Offset
})
1808 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Addr
))
1809 BF
->setHasProfileAvailable();
1811 AggregatedLBRs
.emplace_back(std::move(AggrEntry
.get()));
1814 return std::error_code();
1817 void DataAggregator::processPreAggregated() {
1818 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1819 NamedRegionTimer
T("processAggregated", "Processing aggregated branch events",
1820 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1822 uint64_t NumTraces
= 0;
1823 for (const AggregatedLBREntry
&AggrEntry
: AggregatedLBRs
) {
1824 switch (AggrEntry
.EntryType
) {
1825 case AggregatedLBREntry::BRANCH
:
1826 doBranch(AggrEntry
.From
.Offset
, AggrEntry
.To
.Offset
, AggrEntry
.Count
,
1827 AggrEntry
.Mispreds
, /*IsPreagg*/ true);
1829 case AggregatedLBREntry::FT
:
1830 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN
: {
1831 LBREntry First
{AggrEntry
.EntryType
== AggregatedLBREntry::FT
1832 ? AggrEntry
.From
.Offset
1834 AggrEntry
.From
.Offset
, false};
1835 LBREntry Second
{AggrEntry
.To
.Offset
, AggrEntry
.To
.Offset
, false};
1836 doTrace(First
, Second
, AggrEntry
.Count
);
1837 NumTraces
+= AggrEntry
.Count
;
1843 outs() << "PERF2BOLT: read " << AggregatedLBRs
.size()
1844 << " aggregated LBR entries\n";
1845 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1846 << NumInvalidTraces
;
1848 if (NumTraces
> 0) {
1850 Perc
= NumInvalidTraces
* 100.0f
/ NumTraces
;
1851 if (outs().has_colors()) {
1853 outs().changeColor(raw_ostream::RED
);
1854 else if (Perc
> 5.0f
)
1855 outs().changeColor(raw_ostream::YELLOW
);
1857 outs().changeColor(raw_ostream::GREEN
);
1859 outs() << format("%.1f%%", Perc
);
1860 if (outs().has_colors())
1861 outs().resetColor();
1866 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1867 "binary is probably not the same binary used during profiling "
1868 "collection. The generated data may be ineffective for improving "
1871 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1872 << NumLongRangeTraces
;
1874 outs() << format(" (%.1f%%)", NumLongRangeTraces
* 100.0f
/ NumTraces
);
1878 std::optional
<int32_t> DataAggregator::parseCommExecEvent() {
1879 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1880 if (LineEnd
== StringRef::npos
) {
1881 reportError("expected rest of line");
1882 Diag
<< "Found: " << ParsingBuf
<< "\n";
1883 return std::nullopt
;
1885 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1887 size_t Pos
= Line
.find("PERF_RECORD_COMM exec");
1888 if (Pos
== StringRef::npos
)
1889 return std::nullopt
;
1890 Line
= Line
.drop_front(Pos
);
1893 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1894 StringRef PIDStr
= Line
.rsplit(':').second
.split('/').first
;
1896 if (PIDStr
.getAsInteger(10, PID
)) {
1897 reportError("expected PID");
1898 Diag
<< "Found: " << PIDStr
<< "in '" << Line
<< "'\n";
1899 return std::nullopt
;
1906 std::optional
<uint64_t> parsePerfTime(const StringRef TimeStr
) {
1907 const StringRef SecTimeStr
= TimeStr
.split('.').first
;
1908 const StringRef USecTimeStr
= TimeStr
.split('.').second
;
1911 if (SecTimeStr
.getAsInteger(10, SecTime
) ||
1912 USecTimeStr
.getAsInteger(10, USecTime
))
1913 return std::nullopt
;
1914 return SecTime
* 1000000ULL + USecTime
;
1918 std::optional
<DataAggregator::ForkInfo
> DataAggregator::parseForkEvent() {
1919 while (checkAndConsumeFS()) {
1922 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1923 if (LineEnd
== StringRef::npos
) {
1924 reportError("expected rest of line");
1925 Diag
<< "Found: " << ParsingBuf
<< "\n";
1926 return std::nullopt
;
1928 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1930 size_t Pos
= Line
.find("PERF_RECORD_FORK");
1931 if (Pos
== StringRef::npos
) {
1932 consumeRestOfLine();
1933 return std::nullopt
;
1938 const StringRef TimeStr
=
1939 Line
.substr(0, Pos
).rsplit(':').first
.rsplit(FieldSeparator
).second
;
1940 if (std::optional
<uint64_t> TimeRes
= parsePerfTime(TimeStr
)) {
1944 Line
= Line
.drop_front(Pos
);
1947 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1948 const StringRef ChildPIDStr
= Line
.split('(').second
.split(':').first
;
1949 if (ChildPIDStr
.getAsInteger(10, FI
.ChildPID
)) {
1950 reportError("expected PID");
1951 Diag
<< "Found: " << ChildPIDStr
<< "in '" << Line
<< "'\n";
1952 return std::nullopt
;
1955 const StringRef ParentPIDStr
= Line
.rsplit('(').second
.split(':').first
;
1956 if (ParentPIDStr
.getAsInteger(10, FI
.ParentPID
)) {
1957 reportError("expected PID");
1958 Diag
<< "Found: " << ParentPIDStr
<< "in '" << Line
<< "'\n";
1959 return std::nullopt
;
1962 consumeRestOfLine();
1967 ErrorOr
<std::pair
<StringRef
, DataAggregator::MMapInfo
>>
1968 DataAggregator::parseMMapEvent() {
1969 while (checkAndConsumeFS()) {
1972 MMapInfo ParsedInfo
;
1974 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1975 if (LineEnd
== StringRef::npos
) {
1976 reportError("expected rest of line");
1977 Diag
<< "Found: " << ParsingBuf
<< "\n";
1978 return make_error_code(llvm::errc::io_error
);
1980 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1982 size_t Pos
= Line
.find("PERF_RECORD_MMAP2");
1983 if (Pos
== StringRef::npos
) {
1984 consumeRestOfLine();
1985 return std::make_pair(StringRef(), ParsedInfo
);
1989 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1991 const StringRef TimeStr
=
1992 Line
.substr(0, Pos
).rsplit(':').first
.rsplit(FieldSeparator
).second
;
1993 if (std::optional
<uint64_t> TimeRes
= parsePerfTime(TimeStr
))
1994 ParsedInfo
.Time
= *TimeRes
;
1996 Line
= Line
.drop_front(Pos
);
1999 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
2001 StringRef FileName
= Line
.rsplit(FieldSeparator
).second
;
2002 if (FileName
.starts_with("//") || FileName
.starts_with("[")) {
2003 consumeRestOfLine();
2004 return std::make_pair(StringRef(), ParsedInfo
);
2006 FileName
= sys::path::filename(FileName
);
2008 const StringRef PIDStr
= Line
.split(FieldSeparator
).second
.split('/').first
;
2009 if (PIDStr
.getAsInteger(10, ParsedInfo
.PID
)) {
2010 reportError("expected PID");
2011 Diag
<< "Found: " << PIDStr
<< "in '" << Line
<< "'\n";
2012 return make_error_code(llvm::errc::io_error
);
2015 const StringRef BaseAddressStr
= Line
.split('[').second
.split('(').first
;
2016 if (BaseAddressStr
.getAsInteger(0, ParsedInfo
.MMapAddress
)) {
2017 reportError("expected base address");
2018 Diag
<< "Found: " << BaseAddressStr
<< "in '" << Line
<< "'\n";
2019 return make_error_code(llvm::errc::io_error
);
2022 const StringRef SizeStr
= Line
.split('(').second
.split(')').first
;
2023 if (SizeStr
.getAsInteger(0, ParsedInfo
.Size
)) {
2024 reportError("expected mmaped size");
2025 Diag
<< "Found: " << SizeStr
<< "in '" << Line
<< "'\n";
2026 return make_error_code(llvm::errc::io_error
);
2029 const StringRef OffsetStr
=
2030 Line
.split('@').second
.ltrim().split(FieldSeparator
).first
;
2031 if (OffsetStr
.getAsInteger(0, ParsedInfo
.Offset
)) {
2032 reportError("expected mmaped page-aligned offset");
2033 Diag
<< "Found: " << OffsetStr
<< "in '" << Line
<< "'\n";
2034 return make_error_code(llvm::errc::io_error
);
2037 consumeRestOfLine();
2039 return std::make_pair(FileName
, ParsedInfo
);
2042 std::error_code
DataAggregator::parseMMapEvents() {
2043 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
2044 NamedRegionTimer
T("parseMMapEvents", "Parsing mmap events", TimerGroupName
,
2045 TimerGroupDesc
, opts::TimeAggregator
);
2047 std::multimap
<StringRef
, MMapInfo
> GlobalMMapInfo
;
2049 ErrorOr
<std::pair
<StringRef
, MMapInfo
>> FileMMapInfoRes
= parseMMapEvent();
2050 if (std::error_code EC
= FileMMapInfoRes
.getError())
2053 std::pair
<StringRef
, MMapInfo
> FileMMapInfo
= FileMMapInfoRes
.get();
2054 if (FileMMapInfo
.second
.PID
== -1)
2056 if (FileMMapInfo
.first
== "(deleted)")
2059 // Consider only the first mapping of the file for any given PID
2060 auto Range
= GlobalMMapInfo
.equal_range(FileMMapInfo
.first
);
2061 bool PIDExists
= llvm::any_of(make_range(Range
), [&](const auto &MI
) {
2062 return MI
.second
.PID
== FileMMapInfo
.second
.PID
;
2068 GlobalMMapInfo
.insert(FileMMapInfo
);
2072 dbgs() << "FileName -> mmap info:\n"
2073 << " Filename : PID [MMapAddr, Size, Offset]\n";
2074 for (const auto &[Name
, MMap
] : GlobalMMapInfo
)
2075 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name
, MMap
.PID
,
2076 MMap
.MMapAddress
, MMap
.Size
, MMap
.Offset
);
2079 StringRef NameToUse
= llvm::sys::path::filename(BC
->getFilename());
2080 if (GlobalMMapInfo
.count(NameToUse
) == 0 && !BuildIDBinaryName
.empty()) {
2081 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2082 << "\" for profile matching\n";
2083 NameToUse
= BuildIDBinaryName
;
2086 auto Range
= GlobalMMapInfo
.equal_range(NameToUse
);
2087 for (MMapInfo
&MMapInfo
: llvm::make_second_range(make_range(Range
))) {
2088 if (BC
->HasFixedLoadAddress
&& MMapInfo
.MMapAddress
) {
2089 // Check that the binary mapping matches one of the segments.
2090 bool MatchFound
= llvm::any_of(
2091 llvm::make_second_range(BC
->SegmentMapInfo
),
2092 [&](SegmentInfo
&SegInfo
) {
2093 // The mapping is page-aligned and hence the MMapAddress could be
2094 // different from the segment start address. We cannot know the page
2095 // size of the mapping, but we know it should not exceed the segment
2096 // alignment value. Hence we are performing an approximate check.
2097 return SegInfo
.Address
>= MMapInfo
.MMapAddress
&&
2098 SegInfo
.Address
- MMapInfo
.MMapAddress
< SegInfo
.Alignment
&&
2099 SegInfo
.IsExecutable
;
2102 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2103 << " at 0x" << Twine::utohexstr(MMapInfo
.MMapAddress
) << '\n';
2108 // Set base address for shared objects.
2109 if (!BC
->HasFixedLoadAddress
) {
2110 std::optional
<uint64_t> BaseAddress
=
2111 BC
->getBaseAddressForMapping(MMapInfo
.MMapAddress
, MMapInfo
.Offset
);
2113 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2114 "binary when memory mapped at 0x"
2115 << Twine::utohexstr(MMapInfo
.MMapAddress
)
2116 << " using file offset 0x" << Twine::utohexstr(MMapInfo
.Offset
)
2117 << ". Ignoring profile data for this mapping\n";
2120 MMapInfo
.BaseAddress
= *BaseAddress
;
2124 BinaryMMapInfo
.insert(std::make_pair(MMapInfo
.PID
, MMapInfo
));
2127 if (BinaryMMapInfo
.empty()) {
2128 if (errs().has_colors())
2129 errs().changeColor(raw_ostream::RED
);
2130 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2131 << BC
->getFilename() << "\".";
2132 if (!GlobalMMapInfo
.empty()) {
2133 errs() << " Profile for the following binary name(s) is available:\n";
2134 for (auto I
= GlobalMMapInfo
.begin(), IE
= GlobalMMapInfo
.end(); I
!= IE
;
2135 I
= GlobalMMapInfo
.upper_bound(I
->first
))
2136 errs() << " " << I
->first
<< '\n';
2137 errs() << "Please rename the input binary.\n";
2139 errs() << " Failed to extract any binary name from a profile.\n";
2141 if (errs().has_colors())
2142 errs().resetColor();
2147 return std::error_code();
2150 std::error_code
DataAggregator::parseTaskEvents() {
2151 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2152 NamedRegionTimer
T("parseTaskEvents", "Parsing task events", TimerGroupName
,
2153 TimerGroupDesc
, opts::TimeAggregator
);
2156 if (std::optional
<int32_t> CommInfo
= parseCommExecEvent()) {
2157 // Remove forked child that ran execve
2158 auto MMapInfoIter
= BinaryMMapInfo
.find(*CommInfo
);
2159 if (MMapInfoIter
!= BinaryMMapInfo
.end() && MMapInfoIter
->second
.Forked
)
2160 BinaryMMapInfo
.erase(MMapInfoIter
);
2161 consumeRestOfLine();
2165 std::optional
<ForkInfo
> ForkInfo
= parseForkEvent();
2169 if (ForkInfo
->ParentPID
== ForkInfo
->ChildPID
)
2172 if (ForkInfo
->Time
== 0) {
2173 // Process was forked and mmaped before perf ran. In this case the child
2174 // should have its own mmap entry unless it was execve'd.
2178 auto MMapInfoIter
= BinaryMMapInfo
.find(ForkInfo
->ParentPID
);
2179 if (MMapInfoIter
== BinaryMMapInfo
.end())
2182 MMapInfo MMapInfo
= MMapInfoIter
->second
;
2183 MMapInfo
.PID
= ForkInfo
->ChildPID
;
2184 MMapInfo
.Forked
= true;
2185 BinaryMMapInfo
.insert(std::make_pair(MMapInfo
.PID
, MMapInfo
));
2188 outs() << "PERF2BOLT: input binary is associated with "
2189 << BinaryMMapInfo
.size() << " PID(s)\n";
2192 for (const MMapInfo
&MMI
: llvm::make_second_range(BinaryMMapInfo
))
2193 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI
.PID
,
2194 (MMI
.Forked
? " (forked)" : ""), MMI
.MMapAddress
,
2198 return std::error_code();
2201 std::optional
<std::pair
<StringRef
, StringRef
>>
2202 DataAggregator::parseNameBuildIDPair() {
2203 while (checkAndConsumeFS()) {
2206 ErrorOr
<StringRef
> BuildIDStr
= parseString(FieldSeparator
, true);
2207 if (std::error_code EC
= BuildIDStr
.getError())
2208 return std::nullopt
;
2210 // If one of the strings is missing, don't issue a parsing error, but still
2211 // do not return a value.
2212 consumeAllRemainingFS();
2214 return std::nullopt
;
2216 ErrorOr
<StringRef
> NameStr
= parseString(FieldSeparator
, true);
2217 if (std::error_code EC
= NameStr
.getError())
2218 return std::nullopt
;
2220 consumeRestOfLine();
2221 return std::make_pair(NameStr
.get(), BuildIDStr
.get());
2224 bool DataAggregator::hasAllBuildIDs() {
2225 const StringRef SavedParsingBuf
= ParsingBuf
;
2230 bool HasInvalidEntries
= false;
2232 if (!parseNameBuildIDPair()) {
2233 HasInvalidEntries
= true;
2238 ParsingBuf
= SavedParsingBuf
;
2240 return !HasInvalidEntries
;
2243 std::optional
<StringRef
>
2244 DataAggregator::getFileNameForBuildID(StringRef FileBuildID
) {
2245 const StringRef SavedParsingBuf
= ParsingBuf
;
2249 std::optional
<std::pair
<StringRef
, StringRef
>> IDPair
=
2250 parseNameBuildIDPair();
2252 consumeRestOfLine();
2256 if (IDPair
->second
.starts_with(FileBuildID
)) {
2257 FileName
= sys::path::filename(IDPair
->first
);
2262 ParsingBuf
= SavedParsingBuf
;
2264 if (!FileName
.empty())
2267 return std::nullopt
;
2271 DataAggregator::writeAggregatedFile(StringRef OutputFilename
) const {
2273 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
2277 bool WriteMemLocs
= false;
2279 auto writeLocation
= [&OutFile
, &WriteMemLocs
](const Location
&Loc
) {
2281 OutFile
<< (Loc
.IsSymbol
? "4 " : "3 ");
2283 OutFile
<< (Loc
.IsSymbol
? "1 " : "0 ");
2284 OutFile
<< (Loc
.Name
.empty() ? "[unknown]" : getEscapedName(Loc
.Name
))
2285 << " " << Twine::utohexstr(Loc
.Offset
) << FieldSeparator
;
2288 uint64_t BranchValues
= 0;
2289 uint64_t MemValues
= 0;
2292 OutFile
<< "boltedcollection\n";
2293 if (opts::BasicAggregation
) {
2294 OutFile
<< "no_lbr";
2295 for (const StringMapEntry
<std::nullopt_t
> &Entry
: EventNames
)
2296 OutFile
<< " " << Entry
.getKey();
2299 for (const auto &KV
: NamesToSamples
) {
2300 const FuncSampleData
&FSD
= KV
.second
;
2301 for (const SampleInfo
&SI
: FSD
.Data
) {
2302 writeLocation(SI
.Loc
);
2303 OutFile
<< SI
.Hits
<< "\n";
2308 for (const auto &KV
: NamesToBranches
) {
2309 const FuncBranchData
&FBD
= KV
.second
;
2310 for (const BranchInfo
&BI
: FBD
.Data
) {
2311 writeLocation(BI
.From
);
2312 writeLocation(BI
.To
);
2313 OutFile
<< BI
.Mispreds
<< " " << BI
.Branches
<< "\n";
2316 for (const BranchInfo
&BI
: FBD
.EntryData
) {
2317 // Do not output if source is a known symbol, since this was already
2318 // accounted for in the source function
2319 if (BI
.From
.IsSymbol
)
2321 writeLocation(BI
.From
);
2322 writeLocation(BI
.To
);
2323 OutFile
<< BI
.Mispreds
<< " " << BI
.Branches
<< "\n";
2328 WriteMemLocs
= true;
2329 for (const auto &KV
: NamesToMemEvents
) {
2330 const FuncMemData
&FMD
= KV
.second
;
2331 for (const MemInfo
&MemEvent
: FMD
.Data
) {
2332 writeLocation(MemEvent
.Offset
);
2333 writeLocation(MemEvent
.Addr
);
2334 OutFile
<< MemEvent
.Count
<< "\n";
2340 outs() << "PERF2BOLT: wrote " << BranchValues
<< " objects and " << MemValues
2341 << " memory objects to " << OutputFilename
<< "\n";
2343 return std::error_code();
2346 std::error_code
DataAggregator::writeBATYAML(BinaryContext
&BC
,
2347 StringRef OutputFilename
) const {
2349 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
2353 yaml::bolt::BinaryProfile BP
;
2355 const MCPseudoProbeDecoder
*PseudoProbeDecoder
=
2356 opts::ProfileWritePseudoProbes
? BC
.getPseudoProbeDecoder() : nullptr;
2358 // Fill out the header info.
2359 BP
.Header
.Version
= 1;
2360 BP
.Header
.FileName
= std::string(BC
.getFilename());
2361 std::optional
<StringRef
> BuildID
= BC
.getFileBuildID();
2362 BP
.Header
.Id
= BuildID
? std::string(*BuildID
) : "<unknown>";
2363 BP
.Header
.Origin
= std::string(getReaderName());
2364 // Only the input binary layout order is supported.
2365 BP
.Header
.IsDFSOrder
= false;
2366 // FIXME: Need to match hash function used to produce BAT hashes.
2367 BP
.Header
.HashFunction
= HashFunction::Default
;
2369 ListSeparator
LS(",");
2370 raw_string_ostream
EventNamesOS(BP
.Header
.EventNames
);
2371 for (const StringMapEntry
<std::nullopt_t
> &EventEntry
: EventNames
)
2372 EventNamesOS
<< LS
<< EventEntry
.first().str();
2374 BP
.Header
.Flags
= opts::BasicAggregation
? BinaryFunction::PF_SAMPLE
2375 : BinaryFunction::PF_LBR
;
2377 // Add probe inline tree nodes.
2378 YAMLProfileWriter::InlineTreeDesc InlineTree
;
2379 if (PseudoProbeDecoder
)
2380 std::tie(BP
.PseudoProbeDesc
, InlineTree
) =
2381 YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder
);
2383 if (!opts::BasicAggregation
) {
2384 // Convert profile for functions not covered by BAT
2385 for (auto &BFI
: BC
.getBinaryFunctions()) {
2386 BinaryFunction
&Function
= BFI
.second
;
2387 if (!Function
.hasProfile())
2389 if (BAT
->isBATFunction(Function
.getAddress()))
2391 BP
.Functions
.emplace_back(YAMLProfileWriter::convert(
2392 Function
, /*UseDFS=*/false, InlineTree
, BAT
));
2395 for (const auto &KV
: NamesToBranches
) {
2396 const StringRef FuncName
= KV
.first
;
2397 const FuncBranchData
&Branches
= KV
.second
;
2398 yaml::bolt::BinaryFunctionProfile YamlBF
;
2399 BinaryData
*BD
= BC
.getBinaryDataByName(FuncName
);
2401 uint64_t FuncAddress
= BD
->getAddress();
2402 if (!BAT
->isBATFunction(FuncAddress
))
2404 BinaryFunction
*BF
= BC
.getBinaryFunctionAtAddress(FuncAddress
);
2406 YamlBF
.Name
= getLocationName(*BF
, BAT
);
2407 YamlBF
.Id
= BF
->getFunctionNumber();
2408 YamlBF
.Hash
= BAT
->getBFHash(FuncAddress
);
2409 YamlBF
.ExecCount
= BF
->getKnownExecutionCount();
2410 YamlBF
.NumBasicBlocks
= BAT
->getNumBasicBlocks(FuncAddress
);
2411 const BoltAddressTranslation::BBHashMapTy
&BlockMap
=
2412 BAT
->getBBHashMap(FuncAddress
);
2413 YamlBF
.Blocks
.resize(YamlBF
.NumBasicBlocks
);
2415 for (auto &&[Entry
, YamlBB
] : llvm::zip(BlockMap
, YamlBF
.Blocks
)) {
2416 const auto &Block
= Entry
.second
;
2417 YamlBB
.Hash
= Block
.Hash
;
2418 YamlBB
.Index
= Block
.Index
;
2421 // Lookup containing basic block offset and index
2422 auto getBlock
= [&BlockMap
](uint32_t Offset
) {
2423 auto BlockIt
= BlockMap
.upper_bound(Offset
);
2424 if (LLVM_UNLIKELY(BlockIt
== BlockMap
.begin())) {
2425 errs() << "BOLT-ERROR: invalid BAT section\n";
2429 return std::pair(BlockIt
->first
, BlockIt
->second
.Index
);
2432 for (const BranchInfo
&BI
: Branches
.Data
) {
2433 using namespace yaml::bolt
;
2434 const auto &[BlockOffset
, BlockIndex
] = getBlock(BI
.From
.Offset
);
2435 BinaryBasicBlockProfile
&YamlBB
= YamlBF
.Blocks
[BlockIndex
];
2436 if (BI
.To
.IsSymbol
&& BI
.To
.Name
== BI
.From
.Name
&& BI
.To
.Offset
!= 0) {
2438 const unsigned SuccIndex
= getBlock(BI
.To
.Offset
).second
;
2439 auto &SI
= YamlBB
.Successors
.emplace_back(SuccessorInfo
{SuccIndex
});
2440 SI
.Count
= BI
.Branches
;
2441 SI
.Mispreds
= BI
.Mispreds
;
2444 const uint32_t Offset
= BI
.From
.Offset
- BlockOffset
;
2445 auto &CSI
= YamlBB
.CallSites
.emplace_back(CallSiteInfo
{Offset
});
2446 CSI
.Count
= BI
.Branches
;
2447 CSI
.Mispreds
= BI
.Mispreds
;
2448 if (const BinaryData
*BD
= BC
.getBinaryDataByName(BI
.To
.Name
))
2449 YAMLProfileWriter::setCSIDestination(BC
, CSI
, BD
->getSymbol(), BAT
,
2453 // Set entry counts, similar to DataReader::readProfile.
2454 for (const BranchInfo
&BI
: Branches
.EntryData
) {
2455 if (!BlockMap
.isInputBlock(BI
.To
.Offset
)) {
2456 if (opts::Verbosity
>= 1)
2457 errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName
2458 << " at 0x" << Twine::utohexstr(BI
.To
.Offset
) << '\n';
2461 const unsigned BlockIndex
= BlockMap
.getBBIndex(BI
.To
.Offset
);
2462 YamlBF
.Blocks
[BlockIndex
].ExecCount
+= BI
.Branches
;
2464 if (PseudoProbeDecoder
) {
2465 DenseMap
<const MCDecodedPseudoProbeInlineTree
*, uint32_t>
2467 if (BF
->getGUID()) {
2468 std::tie(YamlBF
.InlineTree
, InlineTreeNodeId
) =
2469 YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder
,
2470 InlineTree
, BF
->getGUID());
2472 // Fetch probes belonging to all fragments
2473 const AddressProbesMap
&ProbeMap
=
2474 PseudoProbeDecoder
->getAddress2ProbesMap();
2475 BinaryFunction::FragmentsSetTy
Fragments(BF
->Fragments
);
2476 Fragments
.insert(BF
);
2479 std::vector
<std::reference_wrapper
<const MCDecodedPseudoProbe
>>>
2481 for (const BinaryFunction
*F
: Fragments
) {
2482 const uint64_t FuncAddr
= F
->getAddress();
2483 for (const MCDecodedPseudoProbe
&Probe
:
2484 ProbeMap
.find(FuncAddr
, FuncAddr
+ F
->getSize())) {
2485 const uint32_t OutputAddress
= Probe
.getAddress();
2486 const uint32_t InputOffset
= BAT
->translate(
2487 FuncAddr
, OutputAddress
- FuncAddr
, /*IsBranchSrc=*/true);
2488 const unsigned BlockIndex
= getBlock(InputOffset
).second
;
2489 BlockProbes
[BlockIndex
].emplace_back(Probe
);
2493 for (auto &[Block
, Probes
] : BlockProbes
) {
2494 YamlBF
.Blocks
[Block
].PseudoProbes
=
2495 YAMLProfileWriter::writeBlockProbes(Probes
, InlineTreeNodeId
);
2498 // Skip printing if there's no profile data
2500 YamlBF
.Blocks
, [](const yaml::bolt::BinaryBasicBlockProfile
&YamlBB
) {
2501 auto HasCount
= [](const auto &SI
) { return SI
.Count
; };
2502 bool HasAnyCount
= YamlBB
.ExecCount
||
2503 llvm::any_of(YamlBB
.Successors
, HasCount
) ||
2504 llvm::any_of(YamlBB
.CallSites
, HasCount
);
2505 return !HasAnyCount
;
2507 BP
.Functions
.emplace_back(YamlBF
);
2511 // Write the profile.
2512 yaml::Output
Out(OutFile
, nullptr, 0);
2514 return std::error_code();
2517 void DataAggregator::dump() const { DataReader::dump(); }
2519 void DataAggregator::dump(const LBREntry
&LBR
) const {
2520 Diag
<< "From: " << Twine::utohexstr(LBR
.From
)
2521 << " To: " << Twine::utohexstr(LBR
.To
) << " Mispred? " << LBR
.Mispred
2525 void DataAggregator::dump(const PerfBranchSample
&Sample
) const {
2526 Diag
<< "Sample LBR entries: " << Sample
.LBR
.size() << "\n";
2527 for (const LBREntry
&LBR
: Sample
.LBR
)
2531 void DataAggregator::dump(const PerfMemSample
&Sample
) const {
2532 Diag
<< "Sample mem entries: " << Sample
.PC
<< ": " << Sample
.Addr
<< "\n";