1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/ScopeExit.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Process.h"
28 #include "llvm/Support/Program.h"
29 #include "llvm/Support/Regex.h"
30 #include "llvm/Support/Timer.h"
31 #include "llvm/Support/raw_ostream.h"
34 #include <unordered_map>
37 #define DEBUG_TYPE "aggregator"
45 BasicAggregation("nl",
46 cl::desc("aggregate basic samples (without LBR info)"),
47 cl::cat(AggregatorCategory
));
49 static cl::opt
<std::string
>
50 ITraceAggregation("itrace",
51 cl::desc("Generate LBR info with perf itrace argument"),
52 cl::cat(AggregatorCategory
));
55 FilterMemProfile("filter-mem-profile",
56 cl::desc("if processing a memory profile, filter out stack or heap accesses "
57 "that won't be useful for BOLT to reduce profile file size"),
59 cl::cat(AggregatorCategory
));
61 static cl::opt
<unsigned long long>
63 cl::desc("only use samples from process with specified PID"),
66 cl::cat(AggregatorCategory
));
69 IgnoreBuildID("ignore-build-id",
70 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
72 cl::cat(AggregatorCategory
));
74 static cl::opt
<bool> IgnoreInterruptLBR(
75 "ignore-interrupt-lbr",
76 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
77 cl::init(true), cl::cat(AggregatorCategory
));
79 static cl::opt
<unsigned long long>
80 MaxSamples("max-samples",
82 cl::desc("maximum number of samples to read from LBR profile"),
85 cl::cat(AggregatorCategory
));
87 extern cl::opt
<opts::ProfileFormatKind
> ProfileFormat
;
89 cl::opt
<bool> ReadPreAggregated(
90 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
91 cl::cat(AggregatorCategory
));
94 TimeAggregator("time-aggr",
95 cl::desc("time BOLT aggregator"),
98 cl::cat(AggregatorCategory
));
101 UseEventPC("use-event-pc",
102 cl::desc("use event PC in combination with LBR sampling"),
103 cl::cat(AggregatorCategory
));
105 static cl::opt
<bool> WriteAutoFDOData(
106 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
107 cl::cat(AggregatorCategory
));
113 const char TimerGroupName
[] = "aggregator";
114 const char TimerGroupDesc
[] = "Aggregator";
116 std::vector
<SectionNameAndRange
> getTextSections(const BinaryContext
*BC
) {
117 std::vector
<SectionNameAndRange
> sections
;
118 for (BinarySection
&Section
: BC
->sections()) {
119 if (!Section
.isText())
121 if (Section
.getSize() == 0)
124 {Section
.getName(), Section
.getAddress(), Section
.getEndAddress()});
127 [](const SectionNameAndRange
&A
, const SectionNameAndRange
&B
) {
128 return A
.BeginAddress
< B
.BeginAddress
;
134 constexpr uint64_t DataAggregator::KernelBaseAddr
;
136 DataAggregator::~DataAggregator() { deleteTempFiles(); }
139 void deleteTempFile(const std::string
&FileName
) {
140 if (std::error_code Errc
= sys::fs::remove(FileName
.c_str()))
141 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
142 << " with error " << Errc
.message() << "\n";
146 void DataAggregator::deleteTempFiles() {
147 for (std::string
&FileName
: TempFiles
)
148 deleteTempFile(FileName
);
152 void DataAggregator::findPerfExecutable() {
153 std::optional
<std::string
> PerfExecutable
=
154 sys::Process::FindInEnvPath("PATH", "perf");
155 if (!PerfExecutable
) {
156 outs() << "PERF2BOLT: No perf executable found!\n";
159 PerfPath
= *PerfExecutable
;
162 void DataAggregator::start() {
163 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename
<< "\n";
165 // Don't launch perf for pre-aggregated files
166 if (opts::ReadPreAggregated
)
169 findPerfExecutable();
171 if (opts::BasicAggregation
) {
172 launchPerfProcess("events without LBR",
174 "script -F pid,event,ip",
176 } else if (!opts::ITraceAggregation
.empty()) {
177 std::string ItracePerfScriptArgs
= llvm::formatv(
178 "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation
);
179 launchPerfProcess("branch events with itrace", MainEventsPPI
,
180 ItracePerfScriptArgs
.c_str(),
183 launchPerfProcess("branch events",
185 "script -F pid,ip,brstack",
189 // Note: we launch script for mem events regardless of the option, as the
190 // command fails fairly fast if mem events were not collected.
191 launchPerfProcess("mem events",
193 "script -F pid,event,addr,ip",
196 launchPerfProcess("process events", MMapEventsPPI
,
197 "script --show-mmap-events --no-itrace",
200 launchPerfProcess("task events", TaskEventsPPI
,
201 "script --show-task-events --no-itrace",
205 void DataAggregator::abort() {
206 if (opts::ReadPreAggregated
)
211 // Kill subprocesses in case they are not finished
212 sys::Wait(TaskEventsPPI
.PI
, 1, &Error
);
213 sys::Wait(MMapEventsPPI
.PI
, 1, &Error
);
214 sys::Wait(MainEventsPPI
.PI
, 1, &Error
);
215 sys::Wait(MemEventsPPI
.PI
, 1, &Error
);
222 void DataAggregator::launchPerfProcess(StringRef Name
, PerfProcessInfo
&PPI
,
223 const char *ArgsString
, bool Wait
) {
224 SmallVector
<StringRef
, 4> Argv
;
226 outs() << "PERF2BOLT: spawning perf job to read " << Name
<< '\n';
227 Argv
.push_back(PerfPath
.data());
229 StringRef(ArgsString
).split(Argv
, ' ');
230 Argv
.push_back("-f");
231 Argv
.push_back("-i");
232 Argv
.push_back(Filename
.c_str());
234 if (std::error_code Errc
=
235 sys::fs::createTemporaryFile("perf.script", "out", PPI
.StdoutPath
)) {
236 errs() << "PERF2BOLT: failed to create temporary file " << PPI
.StdoutPath
237 << " with error " << Errc
.message() << "\n";
240 TempFiles
.push_back(PPI
.StdoutPath
.data());
242 if (std::error_code Errc
=
243 sys::fs::createTemporaryFile("perf.script", "err", PPI
.StderrPath
)) {
244 errs() << "PERF2BOLT: failed to create temporary file " << PPI
.StderrPath
245 << " with error " << Errc
.message() << "\n";
248 TempFiles
.push_back(PPI
.StderrPath
.data());
250 std::optional
<StringRef
> Redirects
[] = {
251 std::nullopt
, // Stdin
252 StringRef(PPI
.StdoutPath
.data()), // Stdout
253 StringRef(PPI
.StderrPath
.data())}; // Stderr
256 dbgs() << "Launching perf: ";
257 for (StringRef Arg
: Argv
)
258 dbgs() << Arg
<< " ";
259 dbgs() << " 1> " << PPI
.StdoutPath
.data() << " 2> " << PPI
.StderrPath
.data()
264 PPI
.PI
.ReturnCode
= sys::ExecuteAndWait(PerfPath
.data(), Argv
,
265 /*envp*/ std::nullopt
, Redirects
);
267 PPI
.PI
= sys::ExecuteNoWait(PerfPath
.data(), Argv
, /*envp*/ std::nullopt
,
271 void DataAggregator::processFileBuildID(StringRef FileBuildID
) {
272 PerfProcessInfo BuildIDProcessInfo
;
273 launchPerfProcess("buildid list",
278 if (BuildIDProcessInfo
.PI
.ReturnCode
!= 0) {
279 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
280 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo
.StderrPath
.data());
281 StringRef ErrBuf
= (*MB
)->getBuffer();
283 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo
.PI
.ReturnCode
289 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
290 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo
.StdoutPath
.data());
291 if (std::error_code EC
= MB
.getError()) {
292 errs() << "Cannot open " << BuildIDProcessInfo
.StdoutPath
.data() << ": "
293 << EC
.message() << "\n";
297 FileBuf
= std::move(*MB
);
298 ParsingBuf
= FileBuf
->getBuffer();
300 std::optional
<StringRef
> FileName
= getFileNameForBuildID(FileBuildID
);
302 if (hasAllBuildIDs()) {
303 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
304 "This indicates the input binary supplied for data aggregation "
305 "is not the same recorded by perf when collecting profiling "
306 "data, or there were no samples recorded for the binary. "
307 "Use -ignore-build-id option to override.\n";
308 if (!opts::IgnoreBuildID
)
311 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
312 "data was recorded without it\n";
315 } else if (*FileName
!= llvm::sys::path::filename(BC
->getFilename())) {
316 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
317 BuildIDBinaryName
= std::string(*FileName
);
319 outs() << "PERF2BOLT: matched build-id and file name\n";
323 bool DataAggregator::checkPerfDataMagic(StringRef FileName
) {
324 if (opts::ReadPreAggregated
)
327 Expected
<sys::fs::file_t
> FD
= sys::fs::openNativeFileForRead(FileName
);
329 consumeError(FD
.takeError());
333 char Buf
[7] = {0, 0, 0, 0, 0, 0, 0};
335 auto Close
= make_scope_exit([&] { sys::fs::closeFile(*FD
); });
336 Expected
<size_t> BytesRead
= sys::fs::readNativeFileSlice(
337 *FD
, MutableArrayRef(Buf
, sizeof(Buf
)), 0);
339 consumeError(BytesRead
.takeError());
346 if (strncmp(Buf
, "PERFILE", 7) == 0)
351 void DataAggregator::parsePreAggregated() {
354 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
355 MemoryBuffer::getFileOrSTDIN(Filename
);
356 if (std::error_code EC
= MB
.getError()) {
357 errs() << "PERF2BOLT-ERROR: cannot open " << Filename
<< ": "
358 << EC
.message() << "\n";
362 FileBuf
= std::move(*MB
);
363 ParsingBuf
= FileBuf
->getBuffer();
366 if (parsePreAggregatedLBRSamples()) {
367 errs() << "PERF2BOLT: failed to parse samples\n";
372 std::error_code
DataAggregator::writeAutoFDOData(StringRef OutputFilename
) {
373 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
374 NamedRegionTimer
T("writeAutoFDO", "Processing branch events", TimerGroupName
,
375 TimerGroupDesc
, opts::TimeAggregator
);
378 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
383 // number of unique traces
384 // from_1-to_1:count_1
385 // from_2-to_2:count_2
387 // from_n-to_n:count_n
388 // number of unique sample addresses
393 // number of unique LBR entries
394 // src_1->dst_1:count_1
395 // src_2->dst_2:count_2
397 // src_n->dst_n:count_n
399 const uint64_t FirstAllocAddress
= this->BC
->FirstAllocAddress
;
401 // AutoFDO addresses are relative to the first allocated loadable program
403 auto filterAddress
= [&FirstAllocAddress
](uint64_t Address
) -> uint64_t {
404 if (Address
< FirstAllocAddress
)
406 return Address
- FirstAllocAddress
;
409 OutFile
<< FallthroughLBRs
.size() << "\n";
410 for (const auto &[Trace
, Info
] : FallthroughLBRs
) {
411 OutFile
<< formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace
.From
),
412 filterAddress(Trace
.To
),
413 Info
.InternCount
+ Info
.ExternCount
);
416 OutFile
<< BasicSamples
.size() << "\n";
417 for (const auto [PC
, HitCount
] : BasicSamples
)
418 OutFile
<< formatv("{0:x-}:{1}\n", filterAddress(PC
), HitCount
);
420 OutFile
<< BranchLBRs
.size() << "\n";
421 for (const auto &[Trace
, Info
] : BranchLBRs
) {
422 OutFile
<< formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace
.From
),
423 filterAddress(Trace
.To
), Info
.TakenCount
);
426 outs() << "PERF2BOLT: wrote " << FallthroughLBRs
.size() << " unique traces, "
427 << BasicSamples
.size() << " sample addresses and " << BranchLBRs
.size()
428 << " unique branches to " << OutputFilename
<< "\n";
430 return std::error_code();
433 void DataAggregator::filterBinaryMMapInfo() {
434 if (opts::FilterPID
) {
435 auto MMapInfoIter
= BinaryMMapInfo
.find(opts::FilterPID
);
436 if (MMapInfoIter
!= BinaryMMapInfo
.end()) {
437 MMapInfo MMap
= MMapInfoIter
->second
;
438 BinaryMMapInfo
.clear();
439 BinaryMMapInfo
.insert(std::make_pair(MMap
.PID
, MMap
));
441 if (errs().has_colors())
442 errs().changeColor(raw_ostream::RED
);
443 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
444 << opts::FilterPID
<< "\""
445 << " for binary \"" << BC
->getFilename() << "\".";
446 assert(!BinaryMMapInfo
.empty() && "No memory map for matching binary");
447 errs() << " Profile for the following process is available:\n";
448 for (std::pair
<const uint64_t, MMapInfo
> &MMI
: BinaryMMapInfo
)
449 outs() << " " << MMI
.second
.PID
450 << (MMI
.second
.Forked
? " (forked)\n" : "\n");
452 if (errs().has_colors())
460 int DataAggregator::prepareToParse(StringRef Name
, PerfProcessInfo
&Process
,
461 PerfProcessErrorCallbackTy Callback
) {
463 outs() << "PERF2BOLT: waiting for perf " << Name
464 << " collection to finish...\n";
465 sys::ProcessInfo PI
= sys::Wait(Process
.PI
, std::nullopt
, &Error
);
467 if (!Error
.empty()) {
468 errs() << "PERF-ERROR: " << PerfPath
<< ": " << Error
<< "\n";
473 if (PI
.ReturnCode
!= 0) {
474 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> ErrorMB
=
475 MemoryBuffer::getFileOrSTDIN(Process
.StderrPath
.data());
476 StringRef ErrBuf
= (*ErrorMB
)->getBuffer();
479 Callback(PI
.ReturnCode
, ErrBuf
);
480 return PI
.ReturnCode
;
483 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
484 MemoryBuffer::getFileOrSTDIN(Process
.StdoutPath
.data());
485 if (std::error_code EC
= MB
.getError()) {
486 errs() << "Cannot open " << Process
.StdoutPath
.data() << ": "
487 << EC
.message() << "\n";
492 FileBuf
= std::move(*MB
);
493 ParsingBuf
= FileBuf
->getBuffer();
496 return PI
.ReturnCode
;
499 Error
DataAggregator::preprocessProfile(BinaryContext
&BC
) {
502 if (opts::ReadPreAggregated
) {
503 parsePreAggregated();
504 return Error::success();
507 if (std::optional
<StringRef
> FileBuildID
= BC
.getFileBuildID()) {
508 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID
<< "\n";
509 processFileBuildID(*FileBuildID
);
511 errs() << "BOLT-WARNING: build-id will not be checked because we could "
512 "not read one from input binary\n";
515 auto ErrorCallback
= [](int ReturnCode
, StringRef ErrBuf
) {
516 errs() << "PERF-ERROR: return code " << ReturnCode
<< "\n" << ErrBuf
;
520 auto MemEventsErrorCallback
= [&](int ReturnCode
, StringRef ErrBuf
) {
521 Regex
NoData("Samples for '.*' event do not have ADDR attribute set. "
522 "Cannot print 'addr' field.");
523 if (!NoData
.match(ErrBuf
))
524 ErrorCallback(ReturnCode
, ErrBuf
);
527 if (opts::LinuxKernelMode
) {
528 // Current MMap parsing logic does not work with linux kernel.
529 // MMap entries for linux kernel uses PERF_RECORD_MMAP
530 // format instead of typical PERF_RECORD_MMAP2 format.
531 // Since linux kernel address mapping is absolute (same as
532 // in the ELF file), we avoid parsing MMap in linux kernel mode.
533 // While generating optimized linux kernel binary, we may need
534 // to parse MMap entries.
536 // In linux kernel mode, we analyze and optimize
537 // all linux kernel binary instructions, irrespective
538 // of whether they are due to system calls or due to
539 // interrupts. Therefore, we cannot ignore interrupt
540 // in Linux kernel mode.
541 opts::IgnoreInterruptLBR
= false;
543 prepareToParse("mmap events", MMapEventsPPI
, ErrorCallback
);
544 if (parseMMapEvents())
545 errs() << "PERF2BOLT: failed to parse mmap events\n";
548 prepareToParse("task events", TaskEventsPPI
, ErrorCallback
);
549 if (parseTaskEvents())
550 errs() << "PERF2BOLT: failed to parse task events\n";
552 filterBinaryMMapInfo();
553 prepareToParse("events", MainEventsPPI
, ErrorCallback
);
555 if (opts::HeatmapMode
) {
556 if (std::error_code EC
= printLBRHeatMap()) {
557 errs() << "ERROR: failed to print heat map: " << EC
.message() << '\n';
563 if ((!opts::BasicAggregation
&& parseBranchEvents()) ||
564 (opts::BasicAggregation
&& parseBasicEvents()))
565 errs() << "PERF2BOLT: failed to parse samples\n";
567 // We can finish early if the goal is just to generate data for autofdo
568 if (opts::WriteAutoFDOData
) {
569 if (std::error_code EC
= writeAutoFDOData(opts::OutputFilename
))
570 errs() << "Error writing autofdo data to file: " << EC
.message() << "\n";
576 // Special handling for memory events
577 if (prepareToParse("mem events", MemEventsPPI
, MemEventsErrorCallback
))
578 return Error::success();
580 if (const std::error_code EC
= parseMemEvents())
581 errs() << "PERF2BOLT: failed to parse memory events: " << EC
.message()
586 return Error::success();
589 Error
DataAggregator::readProfile(BinaryContext
&BC
) {
592 for (auto &BFI
: BC
.getBinaryFunctions()) {
593 BinaryFunction
&Function
= BFI
.second
;
594 convertBranchData(Function
);
597 if (opts::AggregateOnly
&&
598 opts::ProfileFormat
== opts::ProfileFormatKind::PF_Fdata
) {
599 if (std::error_code EC
= writeAggregatedFile(opts::OutputFilename
))
600 report_error("cannot create output data file", EC
);
603 return Error::success();
606 bool DataAggregator::mayHaveProfileData(const BinaryFunction
&Function
) {
607 return Function
.hasProfileAvailable();
610 void DataAggregator::processProfile(BinaryContext
&BC
) {
611 if (opts::ReadPreAggregated
)
612 processPreAggregated();
613 else if (opts::BasicAggregation
)
614 processBasicEvents();
616 processBranchEvents();
620 // Mark all functions with registered events as having a valid profile.
621 const auto Flags
= opts::BasicAggregation
? BinaryFunction::PF_SAMPLE
622 : BinaryFunction::PF_LBR
;
623 for (auto &BFI
: BC
.getBinaryFunctions()) {
624 BinaryFunction
&BF
= BFI
.second
;
625 if (getBranchData(BF
) || getFuncSampleData(BF
.getNames()))
626 BF
.markProfiled(Flags
);
629 for (auto &FuncBranches
: NamesToBranches
)
630 llvm::stable_sort(FuncBranches
.second
.Data
);
632 for (auto &MemEvents
: NamesToMemEvents
)
633 llvm::stable_sort(MemEvents
.second
.Data
);
635 // Release intermediate storage.
637 clear(FallthroughLBRs
);
638 clear(AggregatedLBRs
);
644 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address
) const {
645 if (!BC
->containsAddress(Address
))
648 return BC
->getBinaryFunctionContainingAddress(Address
, /*CheckPastEnd=*/false,
649 /*UseMaxSize=*/true);
652 StringRef
DataAggregator::getLocationName(BinaryFunction
&Func
,
655 return Func
.getOneName();
657 const BinaryFunction
*OrigFunc
= &Func
;
658 if (const uint64_t HotAddr
= BAT
->fetchParentAddress(Func
.getAddress())) {
659 NumColdSamples
+= Count
;
660 BinaryFunction
*HotFunc
= getBinaryFunctionContainingAddress(HotAddr
);
664 // If it is a local function, prefer the name containing the file name where
665 // the local function was declared
666 for (StringRef AlternativeName
: OrigFunc
->getNames()) {
667 size_t FileNameIdx
= AlternativeName
.find('/');
668 // Confirm the alternative name has the pattern Symbol/FileName/1 before
670 if (FileNameIdx
== StringRef::npos
||
671 AlternativeName
.find('/', FileNameIdx
+ 1) == StringRef::npos
)
673 return AlternativeName
;
675 return OrigFunc
->getOneName();
678 bool DataAggregator::doSample(BinaryFunction
&Func
, uint64_t Address
,
680 auto I
= NamesToSamples
.find(Func
.getOneName());
681 if (I
== NamesToSamples
.end()) {
683 StringRef LocName
= getLocationName(Func
, Count
);
684 std::tie(I
, Success
) = NamesToSamples
.insert(
685 std::make_pair(Func
.getOneName(),
686 FuncSampleData(LocName
, FuncSampleData::ContainerTy())));
689 Address
-= Func
.getAddress();
691 Address
= BAT
->translate(Func
.getAddress(), Address
, /*IsBranchSrc=*/false);
693 I
->second
.bumpCount(Address
, Count
);
697 bool DataAggregator::doIntraBranch(BinaryFunction
&Func
, uint64_t From
,
698 uint64_t To
, uint64_t Count
,
700 FuncBranchData
*AggrData
= getBranchData(Func
);
702 AggrData
= &NamesToBranches
[Func
.getOneName()];
703 AggrData
->Name
= getLocationName(Func
, Count
);
704 setBranchData(Func
, AggrData
);
707 From
-= Func
.getAddress();
708 To
-= Func
.getAddress();
709 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
710 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func
, From
, To
));
712 From
= BAT
->translate(Func
.getAddress(), From
, /*IsBranchSrc=*/true);
713 To
= BAT
->translate(Func
.getAddress(), To
, /*IsBranchSrc=*/false);
715 dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
716 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func
, From
, To
));
719 AggrData
->bumpBranchCount(From
, To
, Count
, Mispreds
);
723 bool DataAggregator::doInterBranch(BinaryFunction
*FromFunc
,
724 BinaryFunction
*ToFunc
, uint64_t From
,
725 uint64_t To
, uint64_t Count
,
727 FuncBranchData
*FromAggrData
= nullptr;
728 FuncBranchData
*ToAggrData
= nullptr;
732 SrcFunc
= getLocationName(*FromFunc
, Count
);
733 FromAggrData
= getBranchData(*FromFunc
);
735 FromAggrData
= &NamesToBranches
[FromFunc
->getOneName()];
736 FromAggrData
->Name
= SrcFunc
;
737 setBranchData(*FromFunc
, FromAggrData
);
739 From
-= FromFunc
->getAddress();
741 From
= BAT
->translate(FromFunc
->getAddress(), From
, /*IsBranchSrc=*/true);
743 recordExit(*FromFunc
, From
, Mispreds
, Count
);
746 DstFunc
= getLocationName(*ToFunc
, 0);
747 ToAggrData
= getBranchData(*ToFunc
);
749 ToAggrData
= &NamesToBranches
[ToFunc
->getOneName()];
750 ToAggrData
->Name
= DstFunc
;
751 setBranchData(*ToFunc
, ToAggrData
);
753 To
-= ToFunc
->getAddress();
755 To
= BAT
->translate(ToFunc
->getAddress(), To
, /*IsBranchSrc=*/false);
757 recordEntry(*ToFunc
, To
, Mispreds
, Count
);
761 FromAggrData
->bumpCallCount(From
, Location(!DstFunc
.empty(), DstFunc
, To
),
764 ToAggrData
->bumpEntryCount(Location(!SrcFunc
.empty(), SrcFunc
, From
), To
,
769 bool DataAggregator::doBranch(uint64_t From
, uint64_t To
, uint64_t Count
,
771 BinaryFunction
*FromFunc
= getBinaryFunctionContainingAddress(From
);
772 BinaryFunction
*ToFunc
= getBinaryFunctionContainingAddress(To
);
773 if (!FromFunc
&& !ToFunc
)
776 // Treat recursive control transfers as inter-branches.
777 if (FromFunc
== ToFunc
&& (To
!= ToFunc
->getAddress())) {
778 recordBranch(*FromFunc
, From
- FromFunc
->getAddress(),
779 To
- FromFunc
->getAddress(), Count
, Mispreds
);
780 return doIntraBranch(*FromFunc
, From
, To
, Count
, Mispreds
);
783 return doInterBranch(FromFunc
, ToFunc
, From
, To
, Count
, Mispreds
);
786 bool DataAggregator::doTrace(const LBREntry
&First
, const LBREntry
&Second
,
788 BinaryFunction
*FromFunc
= getBinaryFunctionContainingAddress(First
.To
);
789 BinaryFunction
*ToFunc
= getBinaryFunctionContainingAddress(Second
.From
);
790 if (!FromFunc
|| !ToFunc
) {
792 dbgs() << "Out of range trace starting in " << FromFunc
->getPrintName()
793 << formatv(" @ {0:x}", First
.To
- FromFunc
->getAddress())
794 << " and ending in " << ToFunc
->getPrintName()
795 << formatv(" @ {0:x}\n", Second
.From
- ToFunc
->getAddress());
797 NumLongRangeTraces
+= Count
;
800 if (FromFunc
!= ToFunc
) {
801 NumInvalidTraces
+= Count
;
803 dbgs() << "Invalid trace starting in " << FromFunc
->getPrintName()
804 << formatv(" @ {0:x}", First
.To
- FromFunc
->getAddress())
805 << " and ending in " << ToFunc
->getPrintName()
806 << formatv(" @ {0:x}\n", Second
.From
- ToFunc
->getAddress());
811 std::optional
<BoltAddressTranslation::FallthroughListTy
> FTs
=
812 BAT
? BAT
->getFallthroughsInTrace(FromFunc
->getAddress(), First
.To
,
814 : getFallthroughsInTrace(*FromFunc
, First
, Second
, Count
);
817 dbgs() << "Invalid trace starting in " << FromFunc
->getPrintName()
818 << " @ " << Twine::utohexstr(First
.To
- FromFunc
->getAddress())
819 << " and ending in " << ToFunc
->getPrintName() << " @ "
820 << ToFunc
->getPrintName() << " @ "
821 << Twine::utohexstr(Second
.From
- ToFunc
->getAddress()) << '\n');
822 NumInvalidTraces
+= Count
;
826 LLVM_DEBUG(dbgs() << "Processing " << FTs
->size() << " fallthroughs for "
827 << FromFunc
->getPrintName() << ":"
828 << Twine::utohexstr(First
.To
) << " to "
829 << Twine::utohexstr(Second
.From
) << ".\n");
830 for (const std::pair
<uint64_t, uint64_t> &Pair
: *FTs
)
831 doIntraBranch(*FromFunc
, Pair
.first
+ FromFunc
->getAddress(),
832 Pair
.second
+ FromFunc
->getAddress(), Count
, false);
837 bool DataAggregator::recordTrace(
838 BinaryFunction
&BF
, const LBREntry
&FirstLBR
, const LBREntry
&SecondLBR
,
840 SmallVector
<std::pair
<uint64_t, uint64_t>, 16> &Branches
) const {
841 BinaryContext
&BC
= BF
.getBinaryContext();
846 assert(BF
.hasCFG() && "can only record traces in CFG state");
848 // Offsets of the trace within this function.
849 const uint64_t From
= FirstLBR
.To
- BF
.getAddress();
850 const uint64_t To
= SecondLBR
.From
- BF
.getAddress();
855 const BinaryBasicBlock
*FromBB
= BF
.getBasicBlockContainingOffset(From
);
856 const BinaryBasicBlock
*ToBB
= BF
.getBasicBlockContainingOffset(To
);
858 if (!FromBB
|| !ToBB
)
861 // Adjust FromBB if the first LBR is a return from the last instruction in
862 // the previous block (that instruction should be a call).
863 if (From
== FromBB
->getOffset() && !BF
.containsAddress(FirstLBR
.From
) &&
864 !FromBB
->isEntryPoint() && !FromBB
->isLandingPad()) {
865 const BinaryBasicBlock
*PrevBB
=
866 BF
.getLayout().getBlock(FromBB
->getIndex() - 1);
867 if (PrevBB
->getSuccessor(FromBB
->getLabel())) {
868 const MCInst
*Instr
= PrevBB
->getLastNonPseudoInstr();
869 if (Instr
&& BC
.MIB
->isCall(*Instr
))
872 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
875 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR
<< '\n');
879 // Fill out information for fall-through edges. The From and To could be
880 // within the same basic block, e.g. when two call instructions are in the
881 // same block. In this case we skip the processing.
885 // Process blocks in the original layout order.
886 BinaryBasicBlock
*BB
= BF
.getLayout().getBlock(FromBB
->getIndex());
887 assert(BB
== FromBB
&& "index mismatch");
889 BinaryBasicBlock
*NextBB
= BF
.getLayout().getBlock(BB
->getIndex() + 1);
890 assert((NextBB
&& NextBB
->getOffset() > BB
->getOffset()) && "bad layout");
892 // Check for bad LBRs.
893 if (!BB
->getSuccessor(NextBB
->getLabel())) {
894 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
895 << " " << FirstLBR
<< '\n'
896 << " " << SecondLBR
<< '\n');
900 const MCInst
*Instr
= BB
->getLastNonPseudoInstr();
903 Offset
= BC
.MIB
->getOffsetWithDefault(*Instr
, 0);
905 Offset
= BB
->getOffset();
907 Branches
.emplace_back(Offset
, NextBB
->getOffset());
912 // Record fall-through jumps
913 for (const auto &[FromOffset
, ToOffset
] : Branches
) {
914 BinaryBasicBlock
*FromBB
= BF
.getBasicBlockContainingOffset(FromOffset
);
915 BinaryBasicBlock
*ToBB
= BF
.getBasicBlockAtOffset(ToOffset
);
916 assert(FromBB
&& ToBB
);
917 BinaryBasicBlock::BinaryBranchInfo
&BI
= FromBB
->getBranchInfo(*ToBB
);
924 std::optional
<SmallVector
<std::pair
<uint64_t, uint64_t>, 16>>
925 DataAggregator::getFallthroughsInTrace(BinaryFunction
&BF
,
926 const LBREntry
&FirstLBR
,
927 const LBREntry
&SecondLBR
,
928 uint64_t Count
) const {
929 SmallVector
<std::pair
<uint64_t, uint64_t>, 16> Res
;
931 if (!recordTrace(BF
, FirstLBR
, SecondLBR
, Count
, Res
))
937 bool DataAggregator::recordEntry(BinaryFunction
&BF
, uint64_t To
, bool Mispred
,
938 uint64_t Count
) const {
939 if (To
> BF
.getSize())
942 if (!BF
.hasProfile())
943 BF
.ExecutionCount
= 0;
945 BinaryBasicBlock
*EntryBB
= nullptr;
947 BF
.ExecutionCount
+= Count
;
949 EntryBB
= &BF
.front();
950 } else if (BinaryBasicBlock
*BB
= BF
.getBasicBlockAtOffset(To
)) {
951 if (BB
->isEntryPoint())
956 EntryBB
->setExecutionCount(EntryBB
->getKnownExecutionCount() + Count
);
961 bool DataAggregator::recordExit(BinaryFunction
&BF
, uint64_t From
, bool Mispred
,
962 uint64_t Count
) const {
963 if (!BF
.isSimple() || From
> BF
.getSize())
966 if (!BF
.hasProfile())
967 BF
.ExecutionCount
= 0;
972 ErrorOr
<LBREntry
> DataAggregator::parseLBREntry() {
974 ErrorOr
<StringRef
> FromStrRes
= parseString('/');
975 if (std::error_code EC
= FromStrRes
.getError())
977 StringRef OffsetStr
= FromStrRes
.get();
978 if (OffsetStr
.getAsInteger(0, Res
.From
)) {
979 reportError("expected hexadecimal number with From address");
980 Diag
<< "Found: " << OffsetStr
<< "\n";
981 return make_error_code(llvm::errc::io_error
);
984 ErrorOr
<StringRef
> ToStrRes
= parseString('/');
985 if (std::error_code EC
= ToStrRes
.getError())
987 OffsetStr
= ToStrRes
.get();
988 if (OffsetStr
.getAsInteger(0, Res
.To
)) {
989 reportError("expected hexadecimal number with To address");
990 Diag
<< "Found: " << OffsetStr
<< "\n";
991 return make_error_code(llvm::errc::io_error
);
994 ErrorOr
<StringRef
> MispredStrRes
= parseString('/');
995 if (std::error_code EC
= MispredStrRes
.getError())
997 StringRef MispredStr
= MispredStrRes
.get();
998 if (MispredStr
.size() != 1 ||
999 (MispredStr
[0] != 'P' && MispredStr
[0] != 'M' && MispredStr
[0] != '-')) {
1000 reportError("expected single char for mispred bit");
1001 Diag
<< "Found: " << MispredStr
<< "\n";
1002 return make_error_code(llvm::errc::io_error
);
1004 Res
.Mispred
= MispredStr
[0] == 'M';
1006 static bool MispredWarning
= true;
1007 if (MispredStr
[0] == '-' && MispredWarning
) {
1008 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1009 MispredWarning
= false;
1012 ErrorOr
<StringRef
> Rest
= parseString(FieldSeparator
, true);
1013 if (std::error_code EC
= Rest
.getError())
1015 if (Rest
.get().size() < 5) {
1016 reportError("expected rest of LBR entry");
1017 Diag
<< "Found: " << Rest
.get() << "\n";
1018 return make_error_code(llvm::errc::io_error
);
1023 bool DataAggregator::checkAndConsumeFS() {
1024 if (ParsingBuf
[0] != FieldSeparator
)
1027 ParsingBuf
= ParsingBuf
.drop_front(1);
1032 void DataAggregator::consumeRestOfLine() {
1033 size_t LineEnd
= ParsingBuf
.find_first_of('\n');
1034 if (LineEnd
== StringRef::npos
) {
1035 ParsingBuf
= StringRef();
1040 ParsingBuf
= ParsingBuf
.drop_front(LineEnd
+ 1);
1045 bool DataAggregator::checkNewLine() {
1046 return ParsingBuf
[0] == '\n';
1049 ErrorOr
<DataAggregator::PerfBranchSample
> DataAggregator::parseBranchSample() {
1050 PerfBranchSample Res
;
1052 while (checkAndConsumeFS()) {
1055 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1056 if (std::error_code EC
= PIDRes
.getError())
1058 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1059 if (!opts::LinuxKernelMode
&& MMapInfoIter
== BinaryMMapInfo
.end()) {
1060 consumeRestOfLine();
1061 return make_error_code(errc::no_such_process
);
1064 while (checkAndConsumeFS()) {
1067 ErrorOr
<uint64_t> PCRes
= parseHexField(FieldSeparator
, true);
1068 if (std::error_code EC
= PCRes
.getError())
1070 Res
.PC
= PCRes
.get();
1072 if (checkAndConsumeNewLine())
1075 while (!checkAndConsumeNewLine()) {
1076 checkAndConsumeFS();
1078 ErrorOr
<LBREntry
> LBRRes
= parseLBREntry();
1079 if (std::error_code EC
= LBRRes
.getError())
1081 LBREntry LBR
= LBRRes
.get();
1082 if (ignoreKernelInterrupt(LBR
))
1084 if (!BC
->HasFixedLoadAddress
)
1085 adjustLBR(LBR
, MMapInfoIter
->second
);
1086 Res
.LBR
.push_back(LBR
);
1092 ErrorOr
<DataAggregator::PerfBasicSample
> DataAggregator::parseBasicSample() {
1093 while (checkAndConsumeFS()) {
1096 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1097 if (std::error_code EC
= PIDRes
.getError())
1100 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1101 if (MMapInfoIter
== BinaryMMapInfo
.end()) {
1102 consumeRestOfLine();
1103 return PerfBasicSample
{StringRef(), 0};
1106 while (checkAndConsumeFS()) {
1109 ErrorOr
<StringRef
> Event
= parseString(FieldSeparator
);
1110 if (std::error_code EC
= Event
.getError())
1113 while (checkAndConsumeFS()) {
1116 ErrorOr
<uint64_t> AddrRes
= parseHexField(FieldSeparator
, true);
1117 if (std::error_code EC
= AddrRes
.getError())
1120 if (!checkAndConsumeNewLine()) {
1121 reportError("expected end of line");
1122 return make_error_code(llvm::errc::io_error
);
1125 uint64_t Address
= *AddrRes
;
1126 if (!BC
->HasFixedLoadAddress
)
1127 adjustAddress(Address
, MMapInfoIter
->second
);
1129 return PerfBasicSample
{Event
.get(), Address
};
1132 ErrorOr
<DataAggregator::PerfMemSample
> DataAggregator::parseMemSample() {
1133 PerfMemSample Res
{0, 0};
1135 while (checkAndConsumeFS()) {
1138 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1139 if (std::error_code EC
= PIDRes
.getError())
1142 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1143 if (MMapInfoIter
== BinaryMMapInfo
.end()) {
1144 consumeRestOfLine();
1148 while (checkAndConsumeFS()) {
1151 ErrorOr
<StringRef
> Event
= parseString(FieldSeparator
);
1152 if (std::error_code EC
= Event
.getError())
1154 if (!Event
.get().contains("mem-loads")) {
1155 consumeRestOfLine();
1159 while (checkAndConsumeFS()) {
1162 ErrorOr
<uint64_t> AddrRes
= parseHexField(FieldSeparator
);
1163 if (std::error_code EC
= AddrRes
.getError())
1166 while (checkAndConsumeFS()) {
1169 ErrorOr
<uint64_t> PCRes
= parseHexField(FieldSeparator
, true);
1170 if (std::error_code EC
= PCRes
.getError()) {
1171 consumeRestOfLine();
1175 if (!checkAndConsumeNewLine()) {
1176 reportError("expected end of line");
1177 return make_error_code(llvm::errc::io_error
);
1180 uint64_t Address
= *AddrRes
;
1181 if (!BC
->HasFixedLoadAddress
)
1182 adjustAddress(Address
, MMapInfoIter
->second
);
1184 return PerfMemSample
{PCRes
.get(), Address
};
1187 ErrorOr
<Location
> DataAggregator::parseLocationOrOffset() {
1188 auto parseOffset
= [this]() -> ErrorOr
<Location
> {
1189 ErrorOr
<uint64_t> Res
= parseHexField(FieldSeparator
);
1190 if (std::error_code EC
= Res
.getError())
1192 return Location(Res
.get());
1195 size_t Sep
= ParsingBuf
.find_first_of(" \n");
1196 if (Sep
== StringRef::npos
)
1197 return parseOffset();
1198 StringRef LookAhead
= ParsingBuf
.substr(0, Sep
);
1199 if (LookAhead
.find_first_of(":") == StringRef::npos
)
1200 return parseOffset();
1202 ErrorOr
<StringRef
> BuildID
= parseString(':');
1203 if (std::error_code EC
= BuildID
.getError())
1205 ErrorOr
<uint64_t> Offset
= parseHexField(FieldSeparator
);
1206 if (std::error_code EC
= Offset
.getError())
1208 return Location(true, BuildID
.get(), Offset
.get());
1211 ErrorOr
<DataAggregator::AggregatedLBREntry
>
1212 DataAggregator::parseAggregatedLBREntry() {
1213 while (checkAndConsumeFS()) {
1216 ErrorOr
<StringRef
> TypeOrErr
= parseString(FieldSeparator
);
1217 if (std::error_code EC
= TypeOrErr
.getError())
1219 auto Type
= AggregatedLBREntry::BRANCH
;
1220 if (TypeOrErr
.get() == "B") {
1221 Type
= AggregatedLBREntry::BRANCH
;
1222 } else if (TypeOrErr
.get() == "F") {
1223 Type
= AggregatedLBREntry::FT
;
1224 } else if (TypeOrErr
.get() == "f") {
1225 Type
= AggregatedLBREntry::FT_EXTERNAL_ORIGIN
;
1227 reportError("expected B, F or f");
1228 return make_error_code(llvm::errc::io_error
);
1231 while (checkAndConsumeFS()) {
1233 ErrorOr
<Location
> From
= parseLocationOrOffset();
1234 if (std::error_code EC
= From
.getError())
1237 while (checkAndConsumeFS()) {
1239 ErrorOr
<Location
> To
= parseLocationOrOffset();
1240 if (std::error_code EC
= To
.getError())
1243 while (checkAndConsumeFS()) {
1245 ErrorOr
<int64_t> Frequency
=
1246 parseNumberField(FieldSeparator
, Type
!= AggregatedLBREntry::BRANCH
);
1247 if (std::error_code EC
= Frequency
.getError())
1250 uint64_t Mispreds
= 0;
1251 if (Type
== AggregatedLBREntry::BRANCH
) {
1252 while (checkAndConsumeFS()) {
1254 ErrorOr
<int64_t> MispredsOrErr
= parseNumberField(FieldSeparator
, true);
1255 if (std::error_code EC
= MispredsOrErr
.getError())
1257 Mispreds
= static_cast<uint64_t>(MispredsOrErr
.get());
1260 if (!checkAndConsumeNewLine()) {
1261 reportError("expected end of line");
1262 return make_error_code(llvm::errc::io_error
);
1265 return AggregatedLBREntry
{From
.get(), To
.get(),
1266 static_cast<uint64_t>(Frequency
.get()), Mispreds
,
1270 bool DataAggregator::ignoreKernelInterrupt(LBREntry
&LBR
) const {
1271 return opts::IgnoreInterruptLBR
&&
1272 (LBR
.From
>= KernelBaseAddr
|| LBR
.To
>= KernelBaseAddr
);
1275 std::error_code
DataAggregator::printLBRHeatMap() {
1276 outs() << "PERF2BOLT: parse branch events...\n";
1277 NamedRegionTimer
T("parseBranch", "Parsing branch events", TimerGroupName
,
1278 TimerGroupDesc
, opts::TimeAggregator
);
1280 if (opts::LinuxKernelMode
) {
1281 opts::HeatmapMaxAddress
= 0xffffffffffffffff;
1282 opts::HeatmapMinAddress
= KernelBaseAddr
;
1284 Heatmap
HM(opts::HeatmapBlock
, opts::HeatmapMinAddress
,
1285 opts::HeatmapMaxAddress
, getTextSections(BC
));
1286 uint64_t NumTotalSamples
= 0;
1288 if (opts::BasicAggregation
) {
1290 ErrorOr
<PerfBasicSample
> SampleRes
= parseBasicSample();
1291 if (std::error_code EC
= SampleRes
.getError()) {
1292 if (EC
== errc::no_such_process
)
1296 PerfBasicSample
&Sample
= SampleRes
.get();
1297 HM
.registerAddress(Sample
.PC
);
1300 outs() << "HEATMAP: read " << NumTotalSamples
<< " basic samples\n";
1303 ErrorOr
<PerfBranchSample
> SampleRes
= parseBranchSample();
1304 if (std::error_code EC
= SampleRes
.getError()) {
1305 if (EC
== errc::no_such_process
)
1310 PerfBranchSample
&Sample
= SampleRes
.get();
1312 // LBRs are stored in reverse execution order. NextLBR refers to the next
1313 // executed branch record.
1314 const LBREntry
*NextLBR
= nullptr;
1315 for (const LBREntry
&LBR
: Sample
.LBR
) {
1317 // Record fall-through trace.
1318 const uint64_t TraceFrom
= LBR
.To
;
1319 const uint64_t TraceTo
= NextLBR
->From
;
1320 ++FallthroughLBRs
[Trace(TraceFrom
, TraceTo
)].InternCount
;
1324 if (!Sample
.LBR
.empty()) {
1325 HM
.registerAddress(Sample
.LBR
.front().To
);
1326 HM
.registerAddress(Sample
.LBR
.back().From
);
1328 NumTotalSamples
+= Sample
.LBR
.size();
1330 outs() << "HEATMAP: read " << NumTotalSamples
<< " LBR samples\n";
1331 outs() << "HEATMAP: " << FallthroughLBRs
.size() << " unique traces\n";
1334 if (!NumTotalSamples
) {
1335 if (opts::BasicAggregation
) {
1336 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1337 "Cannot build heatmap.";
1339 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1340 "Cannot build heatmap. Use -nl for building heatmap from "
1346 outs() << "HEATMAP: building heat map...\n";
1348 for (const auto &LBR
: FallthroughLBRs
) {
1349 const Trace
&Trace
= LBR
.first
;
1350 const FTInfo
&Info
= LBR
.second
;
1351 HM
.registerAddressRange(Trace
.From
, Trace
.To
, Info
.InternCount
);
1354 if (HM
.getNumInvalidRanges())
1355 outs() << "HEATMAP: invalid traces: " << HM
.getNumInvalidRanges() << '\n';
1358 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1362 HM
.print(opts::OutputFilename
);
1363 if (opts::OutputFilename
== "-")
1364 HM
.printCDF(opts::OutputFilename
);
1366 HM
.printCDF(opts::OutputFilename
+ ".csv");
1367 if (opts::OutputFilename
== "-")
1368 HM
.printSectionHotness(opts::OutputFilename
);
1370 HM
.printSectionHotness(opts::OutputFilename
+ "-section-hotness.csv");
1372 return std::error_code();
1375 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample
&Sample
,
1376 bool NeedsSkylakeFix
) {
1377 uint64_t NumTraces
{0};
1378 // LBRs are stored in reverse execution order. NextPC refers to the next
1379 // recorded executed PC.
1380 uint64_t NextPC
= opts::UseEventPC
? Sample
.PC
: 0;
1381 uint32_t NumEntry
= 0;
1382 for (const LBREntry
&LBR
: Sample
.LBR
) {
1384 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1385 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1386 // us to likely record an invalid trace and generate a stale function for
1387 // BAT mode (non BAT disassembles the function and is able to ignore this
1388 // trace at aggregation time). Drop first 2 entries (last two, in
1389 // chronological order)
1390 if (NeedsSkylakeFix
&& NumEntry
<= 2)
1393 // Record fall-through trace.
1394 const uint64_t TraceFrom
= LBR
.To
;
1395 const uint64_t TraceTo
= NextPC
;
1396 const BinaryFunction
*TraceBF
=
1397 getBinaryFunctionContainingAddress(TraceFrom
);
1398 if (TraceBF
&& TraceBF
->containsAddress(TraceTo
)) {
1399 FTInfo
&Info
= FallthroughLBRs
[Trace(TraceFrom
, TraceTo
)];
1400 if (TraceBF
->containsAddress(LBR
.From
))
1405 const BinaryFunction
*ToFunc
=
1406 getBinaryFunctionContainingAddress(TraceTo
);
1407 if (TraceBF
&& ToFunc
) {
1409 dbgs() << "Invalid trace starting in " << TraceBF
->getPrintName()
1410 << formatv(" @ {0:x}", TraceFrom
- TraceBF
->getAddress())
1411 << formatv(" and ending @ {0:x}\n", TraceTo
);
1416 dbgs() << "Out of range trace starting in "
1417 << (TraceBF
? TraceBF
->getPrintName() : "None")
1418 << formatv(" @ {0:x}",
1419 TraceFrom
- (TraceBF
? TraceBF
->getAddress() : 0))
1420 << " and ending in "
1421 << (ToFunc
? ToFunc
->getPrintName() : "None")
1422 << formatv(" @ {0:x}\n",
1423 TraceTo
- (ToFunc
? ToFunc
->getAddress() : 0));
1425 ++NumLongRangeTraces
;
1432 uint64_t From
= getBinaryFunctionContainingAddress(LBR
.From
) ? LBR
.From
: 0;
1433 uint64_t To
= getBinaryFunctionContainingAddress(LBR
.To
) ? LBR
.To
: 0;
1436 BranchInfo
&Info
= BranchLBRs
[Trace(From
, To
)];
1438 Info
.MispredCount
+= LBR
.Mispred
;
1443 std::error_code
DataAggregator::parseBranchEvents() {
1444 outs() << "PERF2BOLT: parse branch events...\n";
1445 NamedRegionTimer
T("parseBranch", "Parsing branch events", TimerGroupName
,
1446 TimerGroupDesc
, opts::TimeAggregator
);
1448 uint64_t NumTotalSamples
= 0;
1449 uint64_t NumEntries
= 0;
1450 uint64_t NumSamples
= 0;
1451 uint64_t NumSamplesNoLBR
= 0;
1452 uint64_t NumTraces
= 0;
1453 bool NeedsSkylakeFix
= false;
1455 while (hasData() && NumTotalSamples
< opts::MaxSamples
) {
1458 ErrorOr
<PerfBranchSample
> SampleRes
= parseBranchSample();
1459 if (std::error_code EC
= SampleRes
.getError()) {
1460 if (EC
== errc::no_such_process
)
1466 PerfBranchSample
&Sample
= SampleRes
.get();
1467 if (opts::WriteAutoFDOData
)
1468 ++BasicSamples
[Sample
.PC
];
1470 if (Sample
.LBR
.empty()) {
1475 NumEntries
+= Sample
.LBR
.size();
1476 if (BAT
&& Sample
.LBR
.size() == 32 && !NeedsSkylakeFix
) {
1477 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1478 NeedsSkylakeFix
= true;
1481 NumTraces
+= parseLBRSample(Sample
, NeedsSkylakeFix
);
1484 for (const Trace
&Trace
: llvm::make_first_range(BranchLBRs
))
1485 for (const uint64_t Addr
: {Trace
.From
, Trace
.To
})
1486 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Addr
))
1487 BF
->setHasProfileAvailable();
1489 auto printColored
= [](raw_ostream
&OS
, float Percent
, float T1
, float T2
) {
1491 if (OS
.has_colors()) {
1493 OS
.changeColor(raw_ostream::RED
);
1494 else if (Percent
> T1
)
1495 OS
.changeColor(raw_ostream::YELLOW
);
1497 OS
.changeColor(raw_ostream::GREEN
);
1499 OS
<< format("%.1f%%", Percent
);
1500 if (OS
.has_colors())
1505 outs() << "PERF2BOLT: read " << NumSamples
<< " samples and " << NumEntries
1506 << " LBR entries\n";
1507 if (NumTotalSamples
) {
1508 if (NumSamples
&& NumSamplesNoLBR
== NumSamples
) {
1509 // Note: we don't know if perf2bolt is being used to parse memory samples
1510 // at this point. In this case, it is OK to parse zero LBRs.
1511 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1512 "LBR. Record profile with perf record -j any or run perf2bolt "
1513 "in no-LBR mode with -nl (the performance improvement in -nl "
1514 "mode may be limited)\n";
1516 const uint64_t IgnoredSamples
= NumTotalSamples
- NumSamples
;
1517 const float PercentIgnored
= 100.0f
* IgnoredSamples
/ NumTotalSamples
;
1518 outs() << "PERF2BOLT: " << IgnoredSamples
<< " samples";
1519 printColored(outs(), PercentIgnored
, 20, 50);
1520 outs() << " were ignored\n";
1521 if (PercentIgnored
> 50.0f
)
1522 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1523 "were attributed to the input binary\n";
1526 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1527 << NumInvalidTraces
;
1529 if (NumTraces
> 0) {
1530 Perc
= NumInvalidTraces
* 100.0f
/ NumTraces
;
1531 printColored(outs(), Perc
, 5, 10);
1535 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1536 "binary is probably not the same binary used during profiling "
1537 "collection. The generated data may be ineffective for improving "
1540 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1541 << NumLongRangeTraces
;
1543 outs() << format(" (%.1f%%)", NumLongRangeTraces
* 100.0f
/ NumTraces
);
1546 if (NumColdSamples
> 0) {
1547 const float ColdSamples
= NumColdSamples
* 100.0f
/ NumTotalSamples
;
1548 outs() << "PERF2BOLT: " << NumColdSamples
1549 << format(" (%.1f%%)", ColdSamples
)
1550 << " samples recorded in cold regions of split functions.\n";
1551 if (ColdSamples
> 5.0f
)
1553 << "WARNING: The BOLT-processed binary where samples were collected "
1554 "likely used bad data or your service observed a large shift in "
1555 "profile. You may want to audit this.\n";
1558 return std::error_code();
1561 void DataAggregator::processBranchEvents() {
1562 outs() << "PERF2BOLT: processing branch events...\n";
1563 NamedRegionTimer
T("processBranch", "Processing branch events",
1564 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1566 for (const auto &AggrLBR
: FallthroughLBRs
) {
1567 const Trace
&Loc
= AggrLBR
.first
;
1568 const FTInfo
&Info
= AggrLBR
.second
;
1569 LBREntry First
{Loc
.From
, Loc
.From
, false};
1570 LBREntry Second
{Loc
.To
, Loc
.To
, false};
1571 if (Info
.InternCount
)
1572 doTrace(First
, Second
, Info
.InternCount
);
1573 if (Info
.ExternCount
) {
1575 doTrace(First
, Second
, Info
.ExternCount
);
1579 for (const auto &AggrLBR
: BranchLBRs
) {
1580 const Trace
&Loc
= AggrLBR
.first
;
1581 const BranchInfo
&Info
= AggrLBR
.second
;
1582 doBranch(Loc
.From
, Loc
.To
, Info
.TakenCount
, Info
.MispredCount
);
1586 std::error_code
DataAggregator::parseBasicEvents() {
1587 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1588 NamedRegionTimer
T("parseBasic", "Parsing basic events", TimerGroupName
,
1589 TimerGroupDesc
, opts::TimeAggregator
);
1591 ErrorOr
<PerfBasicSample
> Sample
= parseBasicSample();
1592 if (std::error_code EC
= Sample
.getError())
1598 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Sample
->PC
))
1599 BF
->setHasProfileAvailable();
1601 ++BasicSamples
[Sample
->PC
];
1602 EventNames
.insert(Sample
->EventName
);
1605 return std::error_code();
1608 void DataAggregator::processBasicEvents() {
1609 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1610 NamedRegionTimer
T("processBasic", "Processing basic events", TimerGroupName
,
1611 TimerGroupDesc
, opts::TimeAggregator
);
1612 uint64_t OutOfRangeSamples
= 0;
1613 uint64_t NumSamples
= 0;
1614 for (auto &Sample
: BasicSamples
) {
1615 const uint64_t PC
= Sample
.first
;
1616 const uint64_t HitCount
= Sample
.second
;
1617 NumSamples
+= HitCount
;
1618 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(PC
);
1620 OutOfRangeSamples
+= HitCount
;
1624 doSample(*Func
, PC
, HitCount
);
1626 outs() << "PERF2BOLT: read " << NumSamples
<< " samples\n";
1628 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1629 << OutOfRangeSamples
;
1631 if (NumSamples
> 0) {
1633 Perc
= OutOfRangeSamples
* 100.0f
/ NumSamples
;
1634 if (outs().has_colors()) {
1636 outs().changeColor(raw_ostream::RED
);
1637 else if (Perc
> 40.0f
)
1638 outs().changeColor(raw_ostream::YELLOW
);
1640 outs().changeColor(raw_ostream::GREEN
);
1642 outs() << format("%.1f%%", Perc
);
1643 if (outs().has_colors())
1644 outs().resetColor();
1649 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1650 "binary is probably not the same binary used during profiling "
1651 "collection. The generated data may be ineffective for improving "
1655 std::error_code
DataAggregator::parseMemEvents() {
1656 outs() << "PERF2BOLT: parsing memory events...\n";
1657 NamedRegionTimer
T("parseMemEvents", "Parsing mem events", TimerGroupName
,
1658 TimerGroupDesc
, opts::TimeAggregator
);
1660 ErrorOr
<PerfMemSample
> Sample
= parseMemSample();
1661 if (std::error_code EC
= Sample
.getError())
1664 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Sample
->PC
))
1665 BF
->setHasProfileAvailable();
1667 MemSamples
.emplace_back(std::move(Sample
.get()));
1670 return std::error_code();
1673 void DataAggregator::processMemEvents() {
1674 NamedRegionTimer
T("ProcessMemEvents", "Processing mem events",
1675 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1676 for (const PerfMemSample
&Sample
: MemSamples
) {
1677 uint64_t PC
= Sample
.PC
;
1678 uint64_t Addr
= Sample
.Addr
;
1682 // Try to resolve symbol for PC
1683 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(PC
);
1685 LLVM_DEBUG(if (PC
!= 0) {
1686 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC
, Addr
);
1691 FuncName
= Func
->getOneName();
1692 PC
-= Func
->getAddress();
1694 // Try to resolve symbol for memory load
1695 if (BinaryData
*BD
= BC
->getBinaryDataContainingAddress(Addr
)) {
1696 MemName
= BD
->getName();
1697 Addr
-= BD
->getAddress();
1698 } else if (opts::FilterMemProfile
) {
1699 // Filter out heap/stack accesses
1703 const Location
FuncLoc(!FuncName
.empty(), FuncName
, PC
);
1704 const Location
AddrLoc(!MemName
.empty(), MemName
, Addr
);
1706 FuncMemData
*MemData
= &NamesToMemEvents
[FuncName
];
1707 MemData
->Name
= FuncName
;
1708 setMemData(*Func
, MemData
);
1709 MemData
->update(FuncLoc
, AddrLoc
);
1710 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc
<< " = " << AddrLoc
<< "\n");
1714 std::error_code
DataAggregator::parsePreAggregatedLBRSamples() {
1715 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1716 NamedRegionTimer
T("parseAggregated", "Parsing aggregated branch events",
1717 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1719 ErrorOr
<AggregatedLBREntry
> AggrEntry
= parseAggregatedLBREntry();
1720 if (std::error_code EC
= AggrEntry
.getError())
1723 for (const uint64_t Addr
: {AggrEntry
->From
.Offset
, AggrEntry
->To
.Offset
})
1724 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Addr
))
1725 BF
->setHasProfileAvailable();
1727 AggregatedLBRs
.emplace_back(std::move(AggrEntry
.get()));
1730 return std::error_code();
1733 void DataAggregator::processPreAggregated() {
1734 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1735 NamedRegionTimer
T("processAggregated", "Processing aggregated branch events",
1736 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1738 uint64_t NumTraces
= 0;
1739 for (const AggregatedLBREntry
&AggrEntry
: AggregatedLBRs
) {
1740 switch (AggrEntry
.EntryType
) {
1741 case AggregatedLBREntry::BRANCH
:
1742 doBranch(AggrEntry
.From
.Offset
, AggrEntry
.To
.Offset
, AggrEntry
.Count
,
1743 AggrEntry
.Mispreds
);
1745 case AggregatedLBREntry::FT
:
1746 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN
: {
1747 LBREntry First
{AggrEntry
.EntryType
== AggregatedLBREntry::FT
1748 ? AggrEntry
.From
.Offset
1750 AggrEntry
.From
.Offset
, false};
1751 LBREntry Second
{AggrEntry
.To
.Offset
, AggrEntry
.To
.Offset
, false};
1752 doTrace(First
, Second
, AggrEntry
.Count
);
1753 NumTraces
+= AggrEntry
.Count
;
1759 outs() << "PERF2BOLT: read " << AggregatedLBRs
.size()
1760 << " aggregated LBR entries\n";
1761 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1762 << NumInvalidTraces
;
1764 if (NumTraces
> 0) {
1766 Perc
= NumInvalidTraces
* 100.0f
/ NumTraces
;
1767 if (outs().has_colors()) {
1769 outs().changeColor(raw_ostream::RED
);
1770 else if (Perc
> 5.0f
)
1771 outs().changeColor(raw_ostream::YELLOW
);
1773 outs().changeColor(raw_ostream::GREEN
);
1775 outs() << format("%.1f%%", Perc
);
1776 if (outs().has_colors())
1777 outs().resetColor();
1782 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1783 "binary is probably not the same binary used during profiling "
1784 "collection. The generated data may be ineffective for improving "
1787 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1788 << NumLongRangeTraces
;
1790 outs() << format(" (%.1f%%)", NumLongRangeTraces
* 100.0f
/ NumTraces
);
1794 std::optional
<int32_t> DataAggregator::parseCommExecEvent() {
1795 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1796 if (LineEnd
== StringRef::npos
) {
1797 reportError("expected rest of line");
1798 Diag
<< "Found: " << ParsingBuf
<< "\n";
1799 return std::nullopt
;
1801 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1803 size_t Pos
= Line
.find("PERF_RECORD_COMM exec");
1804 if (Pos
== StringRef::npos
)
1805 return std::nullopt
;
1806 Line
= Line
.drop_front(Pos
);
1809 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1810 StringRef PIDStr
= Line
.rsplit(':').second
.split('/').first
;
1812 if (PIDStr
.getAsInteger(10, PID
)) {
1813 reportError("expected PID");
1814 Diag
<< "Found: " << PIDStr
<< "in '" << Line
<< "'\n";
1815 return std::nullopt
;
1822 std::optional
<uint64_t> parsePerfTime(const StringRef TimeStr
) {
1823 const StringRef SecTimeStr
= TimeStr
.split('.').first
;
1824 const StringRef USecTimeStr
= TimeStr
.split('.').second
;
1827 if (SecTimeStr
.getAsInteger(10, SecTime
) ||
1828 USecTimeStr
.getAsInteger(10, USecTime
))
1829 return std::nullopt
;
1830 return SecTime
* 1000000ULL + USecTime
;
1834 std::optional
<DataAggregator::ForkInfo
> DataAggregator::parseForkEvent() {
1835 while (checkAndConsumeFS()) {
1838 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1839 if (LineEnd
== StringRef::npos
) {
1840 reportError("expected rest of line");
1841 Diag
<< "Found: " << ParsingBuf
<< "\n";
1842 return std::nullopt
;
1844 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1846 size_t Pos
= Line
.find("PERF_RECORD_FORK");
1847 if (Pos
== StringRef::npos
) {
1848 consumeRestOfLine();
1849 return std::nullopt
;
1854 const StringRef TimeStr
=
1855 Line
.substr(0, Pos
).rsplit(':').first
.rsplit(FieldSeparator
).second
;
1856 if (std::optional
<uint64_t> TimeRes
= parsePerfTime(TimeStr
)) {
1860 Line
= Line
.drop_front(Pos
);
1863 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1864 const StringRef ChildPIDStr
= Line
.split('(').second
.split(':').first
;
1865 if (ChildPIDStr
.getAsInteger(10, FI
.ChildPID
)) {
1866 reportError("expected PID");
1867 Diag
<< "Found: " << ChildPIDStr
<< "in '" << Line
<< "'\n";
1868 return std::nullopt
;
1871 const StringRef ParentPIDStr
= Line
.rsplit('(').second
.split(':').first
;
1872 if (ParentPIDStr
.getAsInteger(10, FI
.ParentPID
)) {
1873 reportError("expected PID");
1874 Diag
<< "Found: " << ParentPIDStr
<< "in '" << Line
<< "'\n";
1875 return std::nullopt
;
1878 consumeRestOfLine();
1883 ErrorOr
<std::pair
<StringRef
, DataAggregator::MMapInfo
>>
1884 DataAggregator::parseMMapEvent() {
1885 while (checkAndConsumeFS()) {
1888 MMapInfo ParsedInfo
;
1890 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1891 if (LineEnd
== StringRef::npos
) {
1892 reportError("expected rest of line");
1893 Diag
<< "Found: " << ParsingBuf
<< "\n";
1894 return make_error_code(llvm::errc::io_error
);
1896 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1898 size_t Pos
= Line
.find("PERF_RECORD_MMAP2");
1899 if (Pos
== StringRef::npos
) {
1900 consumeRestOfLine();
1901 return std::make_pair(StringRef(), ParsedInfo
);
1905 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1907 const StringRef TimeStr
=
1908 Line
.substr(0, Pos
).rsplit(':').first
.rsplit(FieldSeparator
).second
;
1909 if (std::optional
<uint64_t> TimeRes
= parsePerfTime(TimeStr
))
1910 ParsedInfo
.Time
= *TimeRes
;
1912 Line
= Line
.drop_front(Pos
);
1915 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1917 StringRef FileName
= Line
.rsplit(FieldSeparator
).second
;
1918 if (FileName
.starts_with("//") || FileName
.starts_with("[")) {
1919 consumeRestOfLine();
1920 return std::make_pair(StringRef(), ParsedInfo
);
1922 FileName
= sys::path::filename(FileName
);
1924 const StringRef PIDStr
= Line
.split(FieldSeparator
).second
.split('/').first
;
1925 if (PIDStr
.getAsInteger(10, ParsedInfo
.PID
)) {
1926 reportError("expected PID");
1927 Diag
<< "Found: " << PIDStr
<< "in '" << Line
<< "'\n";
1928 return make_error_code(llvm::errc::io_error
);
1931 const StringRef BaseAddressStr
= Line
.split('[').second
.split('(').first
;
1932 if (BaseAddressStr
.getAsInteger(0, ParsedInfo
.MMapAddress
)) {
1933 reportError("expected base address");
1934 Diag
<< "Found: " << BaseAddressStr
<< "in '" << Line
<< "'\n";
1935 return make_error_code(llvm::errc::io_error
);
1938 const StringRef SizeStr
= Line
.split('(').second
.split(')').first
;
1939 if (SizeStr
.getAsInteger(0, ParsedInfo
.Size
)) {
1940 reportError("expected mmaped size");
1941 Diag
<< "Found: " << SizeStr
<< "in '" << Line
<< "'\n";
1942 return make_error_code(llvm::errc::io_error
);
1945 const StringRef OffsetStr
=
1946 Line
.split('@').second
.ltrim().split(FieldSeparator
).first
;
1947 if (OffsetStr
.getAsInteger(0, ParsedInfo
.Offset
)) {
1948 reportError("expected mmaped page-aligned offset");
1949 Diag
<< "Found: " << OffsetStr
<< "in '" << Line
<< "'\n";
1950 return make_error_code(llvm::errc::io_error
);
1953 consumeRestOfLine();
1955 return std::make_pair(FileName
, ParsedInfo
);
1958 std::error_code
DataAggregator::parseMMapEvents() {
1959 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1960 NamedRegionTimer
T("parseMMapEvents", "Parsing mmap events", TimerGroupName
,
1961 TimerGroupDesc
, opts::TimeAggregator
);
1963 std::multimap
<StringRef
, MMapInfo
> GlobalMMapInfo
;
1965 ErrorOr
<std::pair
<StringRef
, MMapInfo
>> FileMMapInfoRes
= parseMMapEvent();
1966 if (std::error_code EC
= FileMMapInfoRes
.getError())
1969 std::pair
<StringRef
, MMapInfo
> FileMMapInfo
= FileMMapInfoRes
.get();
1970 if (FileMMapInfo
.second
.PID
== -1)
1972 if (FileMMapInfo
.first
.equals("(deleted)"))
1975 // Consider only the first mapping of the file for any given PID
1976 auto Range
= GlobalMMapInfo
.equal_range(FileMMapInfo
.first
);
1977 bool PIDExists
= llvm::any_of(make_range(Range
), [&](const auto &MI
) {
1978 return MI
.second
.PID
== FileMMapInfo
.second
.PID
;
1984 GlobalMMapInfo
.insert(FileMMapInfo
);
1988 dbgs() << "FileName -> mmap info:\n"
1989 << " Filename : PID [MMapAddr, Size, Offset]\n";
1990 for (const auto &[Name
, MMap
] : GlobalMMapInfo
)
1991 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name
, MMap
.PID
,
1992 MMap
.MMapAddress
, MMap
.Size
, MMap
.Offset
);
1995 StringRef NameToUse
= llvm::sys::path::filename(BC
->getFilename());
1996 if (GlobalMMapInfo
.count(NameToUse
) == 0 && !BuildIDBinaryName
.empty()) {
1997 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1998 << "\" for profile matching\n";
1999 NameToUse
= BuildIDBinaryName
;
2002 auto Range
= GlobalMMapInfo
.equal_range(NameToUse
);
2003 for (MMapInfo
&MMapInfo
: llvm::make_second_range(make_range(Range
))) {
2004 if (BC
->HasFixedLoadAddress
&& MMapInfo
.MMapAddress
) {
2005 // Check that the binary mapping matches one of the segments.
2006 bool MatchFound
= llvm::any_of(
2007 llvm::make_second_range(BC
->SegmentMapInfo
),
2008 [&](SegmentInfo
&SegInfo
) {
2009 // The mapping is page-aligned and hence the MMapAddress could be
2010 // different from the segment start address. We cannot know the page
2011 // size of the mapping, but we know it should not exceed the segment
2012 // alignment value. Hence we are performing an approximate check.
2013 return SegInfo
.Address
>= MMapInfo
.MMapAddress
&&
2014 SegInfo
.Address
- MMapInfo
.MMapAddress
< SegInfo
.Alignment
;
2017 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2018 << " at 0x" << Twine::utohexstr(MMapInfo
.MMapAddress
) << '\n';
2023 // Set base address for shared objects.
2024 if (!BC
->HasFixedLoadAddress
) {
2025 std::optional
<uint64_t> BaseAddress
=
2026 BC
->getBaseAddressForMapping(MMapInfo
.MMapAddress
, MMapInfo
.Offset
);
2028 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2029 "binary when memory mapped at 0x"
2030 << Twine::utohexstr(MMapInfo
.MMapAddress
)
2031 << " using file offset 0x" << Twine::utohexstr(MMapInfo
.Offset
)
2032 << ". Ignoring profile data for this mapping\n";
2035 MMapInfo
.BaseAddress
= *BaseAddress
;
2039 BinaryMMapInfo
.insert(std::make_pair(MMapInfo
.PID
, MMapInfo
));
2042 if (BinaryMMapInfo
.empty()) {
2043 if (errs().has_colors())
2044 errs().changeColor(raw_ostream::RED
);
2045 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2046 << BC
->getFilename() << "\".";
2047 if (!GlobalMMapInfo
.empty()) {
2048 errs() << " Profile for the following binary name(s) is available:\n";
2049 for (auto I
= GlobalMMapInfo
.begin(), IE
= GlobalMMapInfo
.end(); I
!= IE
;
2050 I
= GlobalMMapInfo
.upper_bound(I
->first
))
2051 errs() << " " << I
->first
<< '\n';
2052 errs() << "Please rename the input binary.\n";
2054 errs() << " Failed to extract any binary name from a profile.\n";
2056 if (errs().has_colors())
2057 errs().resetColor();
2062 return std::error_code();
2065 std::error_code
DataAggregator::parseTaskEvents() {
2066 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2067 NamedRegionTimer
T("parseTaskEvents", "Parsing task events", TimerGroupName
,
2068 TimerGroupDesc
, opts::TimeAggregator
);
2071 if (std::optional
<int32_t> CommInfo
= parseCommExecEvent()) {
2072 // Remove forked child that ran execve
2073 auto MMapInfoIter
= BinaryMMapInfo
.find(*CommInfo
);
2074 if (MMapInfoIter
!= BinaryMMapInfo
.end() && MMapInfoIter
->second
.Forked
)
2075 BinaryMMapInfo
.erase(MMapInfoIter
);
2076 consumeRestOfLine();
2080 std::optional
<ForkInfo
> ForkInfo
= parseForkEvent();
2084 if (ForkInfo
->ParentPID
== ForkInfo
->ChildPID
)
2087 if (ForkInfo
->Time
== 0) {
2088 // Process was forked and mmaped before perf ran. In this case the child
2089 // should have its own mmap entry unless it was execve'd.
2093 auto MMapInfoIter
= BinaryMMapInfo
.find(ForkInfo
->ParentPID
);
2094 if (MMapInfoIter
== BinaryMMapInfo
.end())
2097 MMapInfo MMapInfo
= MMapInfoIter
->second
;
2098 MMapInfo
.PID
= ForkInfo
->ChildPID
;
2099 MMapInfo
.Forked
= true;
2100 BinaryMMapInfo
.insert(std::make_pair(MMapInfo
.PID
, MMapInfo
));
2103 outs() << "PERF2BOLT: input binary is associated with "
2104 << BinaryMMapInfo
.size() << " PID(s)\n";
2107 for (const MMapInfo
&MMI
: llvm::make_second_range(BinaryMMapInfo
))
2108 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI
.PID
,
2109 (MMI
.Forked
? " (forked)" : ""), MMI
.MMapAddress
,
2113 return std::error_code();
2116 std::optional
<std::pair
<StringRef
, StringRef
>>
2117 DataAggregator::parseNameBuildIDPair() {
2118 while (checkAndConsumeFS()) {
2121 ErrorOr
<StringRef
> BuildIDStr
= parseString(FieldSeparator
, true);
2122 if (std::error_code EC
= BuildIDStr
.getError())
2123 return std::nullopt
;
2125 // If one of the strings is missing, don't issue a parsing error, but still
2126 // do not return a value.
2127 consumeAllRemainingFS();
2129 return std::nullopt
;
2131 ErrorOr
<StringRef
> NameStr
= parseString(FieldSeparator
, true);
2132 if (std::error_code EC
= NameStr
.getError())
2133 return std::nullopt
;
2135 consumeRestOfLine();
2136 return std::make_pair(NameStr
.get(), BuildIDStr
.get());
2139 bool DataAggregator::hasAllBuildIDs() {
2140 const StringRef SavedParsingBuf
= ParsingBuf
;
2145 bool HasInvalidEntries
= false;
2147 if (!parseNameBuildIDPair()) {
2148 HasInvalidEntries
= true;
2153 ParsingBuf
= SavedParsingBuf
;
2155 return !HasInvalidEntries
;
2158 std::optional
<StringRef
>
2159 DataAggregator::getFileNameForBuildID(StringRef FileBuildID
) {
2160 const StringRef SavedParsingBuf
= ParsingBuf
;
2164 std::optional
<std::pair
<StringRef
, StringRef
>> IDPair
=
2165 parseNameBuildIDPair();
2167 consumeRestOfLine();
2171 if (IDPair
->second
.starts_with(FileBuildID
)) {
2172 FileName
= sys::path::filename(IDPair
->first
);
2177 ParsingBuf
= SavedParsingBuf
;
2179 if (!FileName
.empty())
2182 return std::nullopt
;
2186 DataAggregator::writeAggregatedFile(StringRef OutputFilename
) const {
2188 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
2192 bool WriteMemLocs
= false;
2194 auto writeLocation
= [&OutFile
, &WriteMemLocs
](const Location
&Loc
) {
2196 OutFile
<< (Loc
.IsSymbol
? "4 " : "3 ");
2198 OutFile
<< (Loc
.IsSymbol
? "1 " : "0 ");
2199 OutFile
<< (Loc
.Name
.empty() ? "[unknown]" : getEscapedName(Loc
.Name
))
2200 << " " << Twine::utohexstr(Loc
.Offset
) << FieldSeparator
;
2203 uint64_t BranchValues
= 0;
2204 uint64_t MemValues
= 0;
2207 OutFile
<< "boltedcollection\n";
2208 if (opts::BasicAggregation
) {
2209 OutFile
<< "no_lbr";
2210 for (const StringMapEntry
<std::nullopt_t
> &Entry
: EventNames
)
2211 OutFile
<< " " << Entry
.getKey();
2214 for (const auto &KV
: NamesToSamples
) {
2215 const FuncSampleData
&FSD
= KV
.second
;
2216 for (const SampleInfo
&SI
: FSD
.Data
) {
2217 writeLocation(SI
.Loc
);
2218 OutFile
<< SI
.Hits
<< "\n";
2223 for (const auto &KV
: NamesToBranches
) {
2224 const FuncBranchData
&FBD
= KV
.second
;
2225 for (const llvm::bolt::BranchInfo
&BI
: FBD
.Data
) {
2226 writeLocation(BI
.From
);
2227 writeLocation(BI
.To
);
2228 OutFile
<< BI
.Mispreds
<< " " << BI
.Branches
<< "\n";
2231 for (const llvm::bolt::BranchInfo
&BI
: FBD
.EntryData
) {
2232 // Do not output if source is a known symbol, since this was already
2233 // accounted for in the source function
2234 if (BI
.From
.IsSymbol
)
2236 writeLocation(BI
.From
);
2237 writeLocation(BI
.To
);
2238 OutFile
<< BI
.Mispreds
<< " " << BI
.Branches
<< "\n";
2243 WriteMemLocs
= true;
2244 for (const auto &KV
: NamesToMemEvents
) {
2245 const FuncMemData
&FMD
= KV
.second
;
2246 for (const MemInfo
&MemEvent
: FMD
.Data
) {
2247 writeLocation(MemEvent
.Offset
);
2248 writeLocation(MemEvent
.Addr
);
2249 OutFile
<< MemEvent
.Count
<< "\n";
2255 outs() << "PERF2BOLT: wrote " << BranchValues
<< " objects and " << MemValues
2256 << " memory objects to " << OutputFilename
<< "\n";
2258 return std::error_code();
2261 void DataAggregator::dump() const { DataReader::dump(); }
2263 void DataAggregator::dump(const LBREntry
&LBR
) const {
2264 Diag
<< "From: " << Twine::utohexstr(LBR
.From
)
2265 << " To: " << Twine::utohexstr(LBR
.To
) << " Mispred? " << LBR
.Mispred
2269 void DataAggregator::dump(const PerfBranchSample
&Sample
) const {
2270 Diag
<< "Sample LBR entries: " << Sample
.LBR
.size() << "\n";
2271 for (const LBREntry
&LBR
: Sample
.LBR
)
2275 void DataAggregator::dump(const PerfMemSample
&Sample
) const {
2276 Diag
<< "Sample mem entries: " << Sample
.PC
<< ": " << Sample
.Addr
<< "\n";