1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/ScopeExit.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Process.h"
28 #include "llvm/Support/Program.h"
29 #include "llvm/Support/Regex.h"
30 #include "llvm/Support/Timer.h"
31 #include "llvm/Support/raw_ostream.h"
34 #include <unordered_map>
37 #define DEBUG_TYPE "aggregator"
45 BasicAggregation("nl",
46 cl::desc("aggregate basic samples (without LBR info)"),
47 cl::cat(AggregatorCategory
));
50 FilterMemProfile("filter-mem-profile",
51 cl::desc("if processing a memory profile, filter out stack or heap accesses "
52 "that won't be useful for BOLT to reduce profile file size"),
54 cl::cat(AggregatorCategory
));
56 static cl::opt
<unsigned long long>
58 cl::desc("only use samples from process with specified PID"),
61 cl::cat(AggregatorCategory
));
64 IgnoreBuildID("ignore-build-id",
65 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
67 cl::cat(AggregatorCategory
));
69 static cl::opt
<bool> IgnoreInterruptLBR(
70 "ignore-interrupt-lbr",
71 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
72 cl::init(true), cl::cat(AggregatorCategory
));
74 static cl::opt
<unsigned long long>
75 MaxSamples("max-samples",
77 cl::desc("maximum number of samples to read from LBR profile"),
80 cl::cat(AggregatorCategory
));
82 extern cl::opt
<opts::ProfileFormatKind
> ProfileFormat
;
84 cl::opt
<bool> ReadPreAggregated(
85 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
86 cl::cat(AggregatorCategory
));
89 TimeAggregator("time-aggr",
90 cl::desc("time BOLT aggregator"),
93 cl::cat(AggregatorCategory
));
96 UseEventPC("use-event-pc",
97 cl::desc("use event PC in combination with LBR sampling"),
98 cl::cat(AggregatorCategory
));
100 static cl::opt
<bool> WriteAutoFDOData(
101 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
102 cl::cat(AggregatorCategory
));
108 const char TimerGroupName
[] = "aggregator";
109 const char TimerGroupDesc
[] = "Aggregator";
111 std::vector
<SectionNameAndRange
> getTextSections(const BinaryContext
*BC
) {
112 std::vector
<SectionNameAndRange
> sections
;
113 for (BinarySection
&Section
: BC
->sections()) {
114 if (!Section
.isText())
116 if (Section
.getSize() == 0)
119 {Section
.getName(), Section
.getAddress(), Section
.getEndAddress()});
122 [](const SectionNameAndRange
&A
, const SectionNameAndRange
&B
) {
123 return A
.BeginAddress
< B
.BeginAddress
;
129 constexpr uint64_t DataAggregator::KernelBaseAddr
;
131 DataAggregator::~DataAggregator() { deleteTempFiles(); }
134 void deleteTempFile(const std::string
&FileName
) {
135 if (std::error_code Errc
= sys::fs::remove(FileName
.c_str()))
136 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
137 << " with error " << Errc
.message() << "\n";
141 void DataAggregator::deleteTempFiles() {
142 for (std::string
&FileName
: TempFiles
)
143 deleteTempFile(FileName
);
147 void DataAggregator::findPerfExecutable() {
148 std::optional
<std::string
> PerfExecutable
=
149 sys::Process::FindInEnvPath("PATH", "perf");
150 if (!PerfExecutable
) {
151 outs() << "PERF2BOLT: No perf executable found!\n";
154 PerfPath
= *PerfExecutable
;
157 void DataAggregator::start() {
158 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename
<< "\n";
160 // Don't launch perf for pre-aggregated files
161 if (opts::ReadPreAggregated
)
164 findPerfExecutable();
166 if (opts::BasicAggregation
)
167 launchPerfProcess("events without LBR",
169 "script -F pid,event,ip",
172 launchPerfProcess("branch events",
174 "script -F pid,ip,brstack",
177 // Note: we launch script for mem events regardless of the option, as the
178 // command fails fairly fast if mem events were not collected.
179 launchPerfProcess("mem events",
181 "script -F pid,event,addr,ip",
184 launchPerfProcess("process events", MMapEventsPPI
,
185 "script --show-mmap-events --no-itrace",
188 launchPerfProcess("task events", TaskEventsPPI
,
189 "script --show-task-events --no-itrace",
193 void DataAggregator::abort() {
194 if (opts::ReadPreAggregated
)
199 // Kill subprocesses in case they are not finished
200 sys::Wait(TaskEventsPPI
.PI
, 1, &Error
);
201 sys::Wait(MMapEventsPPI
.PI
, 1, &Error
);
202 sys::Wait(MainEventsPPI
.PI
, 1, &Error
);
203 sys::Wait(MemEventsPPI
.PI
, 1, &Error
);
210 void DataAggregator::launchPerfProcess(StringRef Name
, PerfProcessInfo
&PPI
,
211 const char *ArgsString
, bool Wait
) {
212 SmallVector
<StringRef
, 4> Argv
;
214 outs() << "PERF2BOLT: spawning perf job to read " << Name
<< '\n';
215 Argv
.push_back(PerfPath
.data());
217 StringRef(ArgsString
).split(Argv
, ' ');
218 Argv
.push_back("-f");
219 Argv
.push_back("-i");
220 Argv
.push_back(Filename
.c_str());
222 if (std::error_code Errc
=
223 sys::fs::createTemporaryFile("perf.script", "out", PPI
.StdoutPath
)) {
224 errs() << "PERF2BOLT: failed to create temporary file " << PPI
.StdoutPath
225 << " with error " << Errc
.message() << "\n";
228 TempFiles
.push_back(PPI
.StdoutPath
.data());
230 if (std::error_code Errc
=
231 sys::fs::createTemporaryFile("perf.script", "err", PPI
.StderrPath
)) {
232 errs() << "PERF2BOLT: failed to create temporary file " << PPI
.StderrPath
233 << " with error " << Errc
.message() << "\n";
236 TempFiles
.push_back(PPI
.StderrPath
.data());
238 std::optional
<StringRef
> Redirects
[] = {
239 std::nullopt
, // Stdin
240 StringRef(PPI
.StdoutPath
.data()), // Stdout
241 StringRef(PPI
.StderrPath
.data())}; // Stderr
244 dbgs() << "Launching perf: ";
245 for (StringRef Arg
: Argv
)
246 dbgs() << Arg
<< " ";
247 dbgs() << " 1> " << PPI
.StdoutPath
.data() << " 2> " << PPI
.StderrPath
.data()
252 PPI
.PI
.ReturnCode
= sys::ExecuteAndWait(PerfPath
.data(), Argv
,
253 /*envp*/ std::nullopt
, Redirects
);
255 PPI
.PI
= sys::ExecuteNoWait(PerfPath
.data(), Argv
, /*envp*/ std::nullopt
,
259 void DataAggregator::processFileBuildID(StringRef FileBuildID
) {
260 PerfProcessInfo BuildIDProcessInfo
;
261 launchPerfProcess("buildid list",
266 if (BuildIDProcessInfo
.PI
.ReturnCode
!= 0) {
267 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
268 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo
.StderrPath
.data());
269 StringRef ErrBuf
= (*MB
)->getBuffer();
271 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo
.PI
.ReturnCode
277 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
278 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo
.StdoutPath
.data());
279 if (std::error_code EC
= MB
.getError()) {
280 errs() << "Cannot open " << BuildIDProcessInfo
.StdoutPath
.data() << ": "
281 << EC
.message() << "\n";
285 FileBuf
= std::move(*MB
);
286 ParsingBuf
= FileBuf
->getBuffer();
288 std::optional
<StringRef
> FileName
= getFileNameForBuildID(FileBuildID
);
290 if (hasAllBuildIDs()) {
291 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
292 "This indicates the input binary supplied for data aggregation "
293 "is not the same recorded by perf when collecting profiling "
294 "data, or there were no samples recorded for the binary. "
295 "Use -ignore-build-id option to override.\n";
296 if (!opts::IgnoreBuildID
)
299 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
300 "data was recorded without it\n";
303 } else if (*FileName
!= llvm::sys::path::filename(BC
->getFilename())) {
304 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
305 BuildIDBinaryName
= std::string(*FileName
);
307 outs() << "PERF2BOLT: matched build-id and file name\n";
311 bool DataAggregator::checkPerfDataMagic(StringRef FileName
) {
312 if (opts::ReadPreAggregated
)
315 Expected
<sys::fs::file_t
> FD
= sys::fs::openNativeFileForRead(FileName
);
317 consumeError(FD
.takeError());
321 char Buf
[7] = {0, 0, 0, 0, 0, 0, 0};
323 auto Close
= make_scope_exit([&] { sys::fs::closeFile(*FD
); });
324 Expected
<size_t> BytesRead
= sys::fs::readNativeFileSlice(
325 *FD
, MutableArrayRef(Buf
, sizeof(Buf
)), 0);
327 consumeError(BytesRead
.takeError());
334 if (strncmp(Buf
, "PERFILE", 7) == 0)
339 void DataAggregator::parsePreAggregated() {
342 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
343 MemoryBuffer::getFileOrSTDIN(Filename
);
344 if (std::error_code EC
= MB
.getError()) {
345 errs() << "PERF2BOLT-ERROR: cannot open " << Filename
<< ": "
346 << EC
.message() << "\n";
350 FileBuf
= std::move(*MB
);
351 ParsingBuf
= FileBuf
->getBuffer();
354 if (parsePreAggregatedLBRSamples()) {
355 errs() << "PERF2BOLT: failed to parse samples\n";
360 std::error_code
DataAggregator::writeAutoFDOData(StringRef OutputFilename
) {
361 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
362 NamedRegionTimer
T("writeAutoFDO", "Processing branch events", TimerGroupName
,
363 TimerGroupDesc
, opts::TimeAggregator
);
366 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
371 // number of unique traces
372 // from_1-to_1:count_1
373 // from_2-to_2:count_2
375 // from_n-to_n:count_n
376 // number of unique sample addresses
381 // number of unique LBR entries
382 // src_1->dst_1:count_1
383 // src_2->dst_2:count_2
385 // src_n->dst_n:count_n
387 const uint64_t FirstAllocAddress
= this->BC
->FirstAllocAddress
;
389 // AutoFDO addresses are relative to the first allocated loadable program
391 auto filterAddress
= [&FirstAllocAddress
](uint64_t Address
) -> uint64_t {
392 if (Address
< FirstAllocAddress
)
394 return Address
- FirstAllocAddress
;
397 OutFile
<< FallthroughLBRs
.size() << "\n";
398 for (const auto &[Trace
, Info
] : FallthroughLBRs
) {
399 OutFile
<< formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace
.From
),
400 filterAddress(Trace
.To
),
401 Info
.InternCount
+ Info
.ExternCount
);
404 OutFile
<< BasicSamples
.size() << "\n";
405 for (const auto [PC
, HitCount
] : BasicSamples
)
406 OutFile
<< formatv("{0:x-}:{1}\n", filterAddress(PC
), HitCount
);
408 OutFile
<< BranchLBRs
.size() << "\n";
409 for (const auto &[Trace
, Info
] : BranchLBRs
) {
410 OutFile
<< formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace
.From
),
411 filterAddress(Trace
.To
), Info
.TakenCount
);
414 outs() << "PERF2BOLT: wrote " << FallthroughLBRs
.size() << " unique traces, "
415 << BasicSamples
.size() << " sample addresses and " << BranchLBRs
.size()
416 << " unique branches to " << OutputFilename
<< "\n";
418 return std::error_code();
421 void DataAggregator::filterBinaryMMapInfo() {
422 if (opts::FilterPID
) {
423 auto MMapInfoIter
= BinaryMMapInfo
.find(opts::FilterPID
);
424 if (MMapInfoIter
!= BinaryMMapInfo
.end()) {
425 MMapInfo MMap
= MMapInfoIter
->second
;
426 BinaryMMapInfo
.clear();
427 BinaryMMapInfo
.insert(std::make_pair(MMap
.PID
, MMap
));
429 if (errs().has_colors())
430 errs().changeColor(raw_ostream::RED
);
431 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
432 << opts::FilterPID
<< "\""
433 << " for binary \"" << BC
->getFilename() << "\".";
434 assert(!BinaryMMapInfo
.empty() && "No memory map for matching binary");
435 errs() << " Profile for the following process is available:\n";
436 for (std::pair
<const uint64_t, MMapInfo
> &MMI
: BinaryMMapInfo
)
437 outs() << " " << MMI
.second
.PID
438 << (MMI
.second
.Forked
? " (forked)\n" : "\n");
440 if (errs().has_colors())
448 int DataAggregator::prepareToParse(StringRef Name
, PerfProcessInfo
&Process
,
449 PerfProcessErrorCallbackTy Callback
) {
451 outs() << "PERF2BOLT: waiting for perf " << Name
452 << " collection to finish...\n";
453 sys::ProcessInfo PI
= sys::Wait(Process
.PI
, std::nullopt
, &Error
);
455 if (!Error
.empty()) {
456 errs() << "PERF-ERROR: " << PerfPath
<< ": " << Error
<< "\n";
461 if (PI
.ReturnCode
!= 0) {
462 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> ErrorMB
=
463 MemoryBuffer::getFileOrSTDIN(Process
.StderrPath
.data());
464 StringRef ErrBuf
= (*ErrorMB
)->getBuffer();
467 Callback(PI
.ReturnCode
, ErrBuf
);
468 return PI
.ReturnCode
;
471 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
472 MemoryBuffer::getFileOrSTDIN(Process
.StdoutPath
.data());
473 if (std::error_code EC
= MB
.getError()) {
474 errs() << "Cannot open " << Process
.StdoutPath
.data() << ": "
475 << EC
.message() << "\n";
480 FileBuf
= std::move(*MB
);
481 ParsingBuf
= FileBuf
->getBuffer();
484 return PI
.ReturnCode
;
487 Error
DataAggregator::preprocessProfile(BinaryContext
&BC
) {
490 if (opts::ReadPreAggregated
) {
491 parsePreAggregated();
492 return Error::success();
495 if (std::optional
<StringRef
> FileBuildID
= BC
.getFileBuildID()) {
496 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID
<< "\n";
497 processFileBuildID(*FileBuildID
);
499 errs() << "BOLT-WARNING: build-id will not be checked because we could "
500 "not read one from input binary\n";
503 auto ErrorCallback
= [](int ReturnCode
, StringRef ErrBuf
) {
504 errs() << "PERF-ERROR: return code " << ReturnCode
<< "\n" << ErrBuf
;
508 auto MemEventsErrorCallback
= [&](int ReturnCode
, StringRef ErrBuf
) {
509 Regex
NoData("Samples for '.*' event do not have ADDR attribute set. "
510 "Cannot print 'addr' field.");
511 if (!NoData
.match(ErrBuf
))
512 ErrorCallback(ReturnCode
, ErrBuf
);
515 if (opts::LinuxKernelMode
) {
516 // Current MMap parsing logic does not work with linux kernel.
517 // MMap entries for linux kernel uses PERF_RECORD_MMAP
518 // format instead of typical PERF_RECORD_MMAP2 format.
519 // Since linux kernel address mapping is absolute (same as
520 // in the ELF file), we avoid parsing MMap in linux kernel mode.
521 // While generating optimized linux kernel binary, we may need
522 // to parse MMap entries.
524 // In linux kernel mode, we analyze and optimize
525 // all linux kernel binary instructions, irrespective
526 // of whether they are due to system calls or due to
527 // interrupts. Therefore, we cannot ignore interrupt
528 // in Linux kernel mode.
529 opts::IgnoreInterruptLBR
= false;
531 prepareToParse("mmap events", MMapEventsPPI
, ErrorCallback
);
532 if (parseMMapEvents())
533 errs() << "PERF2BOLT: failed to parse mmap events\n";
536 prepareToParse("task events", TaskEventsPPI
, ErrorCallback
);
537 if (parseTaskEvents())
538 errs() << "PERF2BOLT: failed to parse task events\n";
540 filterBinaryMMapInfo();
541 prepareToParse("events", MainEventsPPI
, ErrorCallback
);
543 if (opts::HeatmapMode
) {
544 if (std::error_code EC
= printLBRHeatMap()) {
545 errs() << "ERROR: failed to print heat map: " << EC
.message() << '\n';
551 if ((!opts::BasicAggregation
&& parseBranchEvents()) ||
552 (opts::BasicAggregation
&& parseBasicEvents()))
553 errs() << "PERF2BOLT: failed to parse samples\n";
555 // We can finish early if the goal is just to generate data for autofdo
556 if (opts::WriteAutoFDOData
) {
557 if (std::error_code EC
= writeAutoFDOData(opts::OutputFilename
))
558 errs() << "Error writing autofdo data to file: " << EC
.message() << "\n";
564 // Special handling for memory events
565 if (prepareToParse("mem events", MemEventsPPI
, MemEventsErrorCallback
))
566 return Error::success();
568 if (const std::error_code EC
= parseMemEvents())
569 errs() << "PERF2BOLT: failed to parse memory events: " << EC
.message()
574 return Error::success();
577 Error
DataAggregator::readProfile(BinaryContext
&BC
) {
580 for (auto &BFI
: BC
.getBinaryFunctions()) {
581 BinaryFunction
&Function
= BFI
.second
;
582 convertBranchData(Function
);
585 if (opts::AggregateOnly
&&
586 opts::ProfileFormat
== opts::ProfileFormatKind::PF_Fdata
) {
587 if (std::error_code EC
= writeAggregatedFile(opts::OutputFilename
))
588 report_error("cannot create output data file", EC
);
591 return Error::success();
594 bool DataAggregator::mayHaveProfileData(const BinaryFunction
&Function
) {
595 return Function
.hasProfileAvailable();
598 void DataAggregator::processProfile(BinaryContext
&BC
) {
599 if (opts::ReadPreAggregated
)
600 processPreAggregated();
601 else if (opts::BasicAggregation
)
602 processBasicEvents();
604 processBranchEvents();
608 // Mark all functions with registered events as having a valid profile.
609 const auto Flags
= opts::BasicAggregation
? BinaryFunction::PF_SAMPLE
610 : BinaryFunction::PF_LBR
;
611 for (auto &BFI
: BC
.getBinaryFunctions()) {
612 BinaryFunction
&BF
= BFI
.second
;
613 if (getBranchData(BF
) || getFuncSampleData(BF
.getNames()))
614 BF
.markProfiled(Flags
);
617 for (auto &FuncBranches
: NamesToBranches
)
618 llvm::stable_sort(FuncBranches
.second
.Data
);
620 for (auto &MemEvents
: NamesToMemEvents
)
621 llvm::stable_sort(MemEvents
.second
.Data
);
623 // Release intermediate storage.
625 clear(FallthroughLBRs
);
626 clear(AggregatedLBRs
);
632 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address
) const {
633 if (!BC
->containsAddress(Address
))
636 return BC
->getBinaryFunctionContainingAddress(Address
, /*CheckPastEnd=*/false,
637 /*UseMaxSize=*/true);
640 StringRef
DataAggregator::getLocationName(BinaryFunction
&Func
,
643 return Func
.getOneName();
645 const BinaryFunction
*OrigFunc
= &Func
;
646 if (const uint64_t HotAddr
= BAT
->fetchParentAddress(Func
.getAddress())) {
647 NumColdSamples
+= Count
;
648 BinaryFunction
*HotFunc
= getBinaryFunctionContainingAddress(HotAddr
);
652 // If it is a local function, prefer the name containing the file name where
653 // the local function was declared
654 for (StringRef AlternativeName
: OrigFunc
->getNames()) {
655 size_t FileNameIdx
= AlternativeName
.find('/');
656 // Confirm the alternative name has the pattern Symbol/FileName/1 before
658 if (FileNameIdx
== StringRef::npos
||
659 AlternativeName
.find('/', FileNameIdx
+ 1) == StringRef::npos
)
661 return AlternativeName
;
663 return OrigFunc
->getOneName();
666 bool DataAggregator::doSample(BinaryFunction
&Func
, uint64_t Address
,
668 auto I
= NamesToSamples
.find(Func
.getOneName());
669 if (I
== NamesToSamples
.end()) {
671 StringRef LocName
= getLocationName(Func
, Count
);
672 std::tie(I
, Success
) = NamesToSamples
.insert(
673 std::make_pair(Func
.getOneName(),
674 FuncSampleData(LocName
, FuncSampleData::ContainerTy())));
677 Address
-= Func
.getAddress();
679 Address
= BAT
->translate(Func
.getAddress(), Address
, /*IsBranchSrc=*/false);
681 I
->second
.bumpCount(Address
, Count
);
685 bool DataAggregator::doIntraBranch(BinaryFunction
&Func
, uint64_t From
,
686 uint64_t To
, uint64_t Count
,
688 FuncBranchData
*AggrData
= getBranchData(Func
);
690 AggrData
= &NamesToBranches
[Func
.getOneName()];
691 AggrData
->Name
= getLocationName(Func
, Count
);
692 setBranchData(Func
, AggrData
);
695 From
-= Func
.getAddress();
696 To
-= Func
.getAddress();
697 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
698 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func
, From
, To
));
700 From
= BAT
->translate(Func
.getAddress(), From
, /*IsBranchSrc=*/true);
701 To
= BAT
->translate(Func
.getAddress(), To
, /*IsBranchSrc=*/false);
703 dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
704 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func
, From
, To
));
707 AggrData
->bumpBranchCount(From
, To
, Count
, Mispreds
);
711 bool DataAggregator::doInterBranch(BinaryFunction
*FromFunc
,
712 BinaryFunction
*ToFunc
, uint64_t From
,
713 uint64_t To
, uint64_t Count
,
715 FuncBranchData
*FromAggrData
= nullptr;
716 FuncBranchData
*ToAggrData
= nullptr;
720 SrcFunc
= getLocationName(*FromFunc
, Count
);
721 FromAggrData
= getBranchData(*FromFunc
);
723 FromAggrData
= &NamesToBranches
[FromFunc
->getOneName()];
724 FromAggrData
->Name
= SrcFunc
;
725 setBranchData(*FromFunc
, FromAggrData
);
727 From
-= FromFunc
->getAddress();
729 From
= BAT
->translate(FromFunc
->getAddress(), From
, /*IsBranchSrc=*/true);
731 recordExit(*FromFunc
, From
, Mispreds
, Count
);
734 DstFunc
= getLocationName(*ToFunc
, 0);
735 ToAggrData
= getBranchData(*ToFunc
);
737 ToAggrData
= &NamesToBranches
[ToFunc
->getOneName()];
738 ToAggrData
->Name
= DstFunc
;
739 setBranchData(*ToFunc
, ToAggrData
);
741 To
-= ToFunc
->getAddress();
743 To
= BAT
->translate(ToFunc
->getAddress(), To
, /*IsBranchSrc=*/false);
745 recordEntry(*ToFunc
, To
, Mispreds
, Count
);
749 FromAggrData
->bumpCallCount(From
, Location(!DstFunc
.empty(), DstFunc
, To
),
752 ToAggrData
->bumpEntryCount(Location(!SrcFunc
.empty(), SrcFunc
, From
), To
,
757 bool DataAggregator::doBranch(uint64_t From
, uint64_t To
, uint64_t Count
,
759 BinaryFunction
*FromFunc
= getBinaryFunctionContainingAddress(From
);
760 BinaryFunction
*ToFunc
= getBinaryFunctionContainingAddress(To
);
761 if (!FromFunc
&& !ToFunc
)
764 // Treat recursive control transfers as inter-branches.
765 if (FromFunc
== ToFunc
&& (To
!= ToFunc
->getAddress())) {
766 recordBranch(*FromFunc
, From
- FromFunc
->getAddress(),
767 To
- FromFunc
->getAddress(), Count
, Mispreds
);
768 return doIntraBranch(*FromFunc
, From
, To
, Count
, Mispreds
);
771 return doInterBranch(FromFunc
, ToFunc
, From
, To
, Count
, Mispreds
);
774 bool DataAggregator::doTrace(const LBREntry
&First
, const LBREntry
&Second
,
776 BinaryFunction
*FromFunc
= getBinaryFunctionContainingAddress(First
.To
);
777 BinaryFunction
*ToFunc
= getBinaryFunctionContainingAddress(Second
.From
);
778 if (!FromFunc
|| !ToFunc
) {
780 dbgs() << "Out of range trace starting in " << FromFunc
->getPrintName()
781 << formatv(" @ {0:x}", First
.To
- FromFunc
->getAddress())
782 << " and ending in " << ToFunc
->getPrintName()
783 << formatv(" @ {0:x}\n", Second
.From
- ToFunc
->getAddress());
785 NumLongRangeTraces
+= Count
;
788 if (FromFunc
!= ToFunc
) {
789 NumInvalidTraces
+= Count
;
791 dbgs() << "Invalid trace starting in " << FromFunc
->getPrintName()
792 << formatv(" @ {0:x}", First
.To
- FromFunc
->getAddress())
793 << " and ending in " << ToFunc
->getPrintName()
794 << formatv(" @ {0:x}\n", Second
.From
- ToFunc
->getAddress());
799 std::optional
<BoltAddressTranslation::FallthroughListTy
> FTs
=
800 BAT
? BAT
->getFallthroughsInTrace(FromFunc
->getAddress(), First
.To
,
802 : getFallthroughsInTrace(*FromFunc
, First
, Second
, Count
);
805 dbgs() << "Invalid trace starting in " << FromFunc
->getPrintName()
806 << " @ " << Twine::utohexstr(First
.To
- FromFunc
->getAddress())
807 << " and ending in " << ToFunc
->getPrintName() << " @ "
808 << ToFunc
->getPrintName() << " @ "
809 << Twine::utohexstr(Second
.From
- ToFunc
->getAddress()) << '\n');
810 NumInvalidTraces
+= Count
;
814 LLVM_DEBUG(dbgs() << "Processing " << FTs
->size() << " fallthroughs for "
815 << FromFunc
->getPrintName() << ":"
816 << Twine::utohexstr(First
.To
) << " to "
817 << Twine::utohexstr(Second
.From
) << ".\n");
818 for (const std::pair
<uint64_t, uint64_t> &Pair
: *FTs
)
819 doIntraBranch(*FromFunc
, Pair
.first
+ FromFunc
->getAddress(),
820 Pair
.second
+ FromFunc
->getAddress(), Count
, false);
825 bool DataAggregator::recordTrace(
826 BinaryFunction
&BF
, const LBREntry
&FirstLBR
, const LBREntry
&SecondLBR
,
828 SmallVector
<std::pair
<uint64_t, uint64_t>, 16> &Branches
) const {
829 BinaryContext
&BC
= BF
.getBinaryContext();
834 assert(BF
.hasCFG() && "can only record traces in CFG state");
836 // Offsets of the trace within this function.
837 const uint64_t From
= FirstLBR
.To
- BF
.getAddress();
838 const uint64_t To
= SecondLBR
.From
- BF
.getAddress();
843 const BinaryBasicBlock
*FromBB
= BF
.getBasicBlockContainingOffset(From
);
844 const BinaryBasicBlock
*ToBB
= BF
.getBasicBlockContainingOffset(To
);
846 if (!FromBB
|| !ToBB
)
849 // Adjust FromBB if the first LBR is a return from the last instruction in
850 // the previous block (that instruction should be a call).
851 if (From
== FromBB
->getOffset() && !BF
.containsAddress(FirstLBR
.From
) &&
852 !FromBB
->isEntryPoint() && !FromBB
->isLandingPad()) {
853 const BinaryBasicBlock
*PrevBB
=
854 BF
.getLayout().getBlock(FromBB
->getIndex() - 1);
855 if (PrevBB
->getSuccessor(FromBB
->getLabel())) {
856 const MCInst
*Instr
= PrevBB
->getLastNonPseudoInstr();
857 if (Instr
&& BC
.MIB
->isCall(*Instr
))
860 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
863 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR
<< '\n');
867 // Fill out information for fall-through edges. The From and To could be
868 // within the same basic block, e.g. when two call instructions are in the
869 // same block. In this case we skip the processing.
873 // Process blocks in the original layout order.
874 BinaryBasicBlock
*BB
= BF
.getLayout().getBlock(FromBB
->getIndex());
875 assert(BB
== FromBB
&& "index mismatch");
877 BinaryBasicBlock
*NextBB
= BF
.getLayout().getBlock(BB
->getIndex() + 1);
878 assert((NextBB
&& NextBB
->getOffset() > BB
->getOffset()) && "bad layout");
880 // Check for bad LBRs.
881 if (!BB
->getSuccessor(NextBB
->getLabel())) {
882 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
883 << " " << FirstLBR
<< '\n'
884 << " " << SecondLBR
<< '\n');
888 const MCInst
*Instr
= BB
->getLastNonPseudoInstr();
891 Offset
= BC
.MIB
->getOffsetWithDefault(*Instr
, 0);
893 Offset
= BB
->getOffset();
895 Branches
.emplace_back(Offset
, NextBB
->getOffset());
900 // Record fall-through jumps
901 for (const auto &[FromOffset
, ToOffset
] : Branches
) {
902 BinaryBasicBlock
*FromBB
= BF
.getBasicBlockContainingOffset(FromOffset
);
903 BinaryBasicBlock
*ToBB
= BF
.getBasicBlockAtOffset(ToOffset
);
904 assert(FromBB
&& ToBB
);
905 BinaryBasicBlock::BinaryBranchInfo
&BI
= FromBB
->getBranchInfo(*ToBB
);
912 std::optional
<SmallVector
<std::pair
<uint64_t, uint64_t>, 16>>
913 DataAggregator::getFallthroughsInTrace(BinaryFunction
&BF
,
914 const LBREntry
&FirstLBR
,
915 const LBREntry
&SecondLBR
,
916 uint64_t Count
) const {
917 SmallVector
<std::pair
<uint64_t, uint64_t>, 16> Res
;
919 if (!recordTrace(BF
, FirstLBR
, SecondLBR
, Count
, Res
))
925 bool DataAggregator::recordEntry(BinaryFunction
&BF
, uint64_t To
, bool Mispred
,
926 uint64_t Count
) const {
927 if (To
> BF
.getSize())
930 if (!BF
.hasProfile())
931 BF
.ExecutionCount
= 0;
933 BinaryBasicBlock
*EntryBB
= nullptr;
935 BF
.ExecutionCount
+= Count
;
937 EntryBB
= &BF
.front();
938 } else if (BinaryBasicBlock
*BB
= BF
.getBasicBlockAtOffset(To
)) {
939 if (BB
->isEntryPoint())
944 EntryBB
->setExecutionCount(EntryBB
->getKnownExecutionCount() + Count
);
949 bool DataAggregator::recordExit(BinaryFunction
&BF
, uint64_t From
, bool Mispred
,
950 uint64_t Count
) const {
951 if (!BF
.isSimple() || From
> BF
.getSize())
954 if (!BF
.hasProfile())
955 BF
.ExecutionCount
= 0;
960 ErrorOr
<LBREntry
> DataAggregator::parseLBREntry() {
962 ErrorOr
<StringRef
> FromStrRes
= parseString('/');
963 if (std::error_code EC
= FromStrRes
.getError())
965 StringRef OffsetStr
= FromStrRes
.get();
966 if (OffsetStr
.getAsInteger(0, Res
.From
)) {
967 reportError("expected hexadecimal number with From address");
968 Diag
<< "Found: " << OffsetStr
<< "\n";
969 return make_error_code(llvm::errc::io_error
);
972 ErrorOr
<StringRef
> ToStrRes
= parseString('/');
973 if (std::error_code EC
= ToStrRes
.getError())
975 OffsetStr
= ToStrRes
.get();
976 if (OffsetStr
.getAsInteger(0, Res
.To
)) {
977 reportError("expected hexadecimal number with To address");
978 Diag
<< "Found: " << OffsetStr
<< "\n";
979 return make_error_code(llvm::errc::io_error
);
982 ErrorOr
<StringRef
> MispredStrRes
= parseString('/');
983 if (std::error_code EC
= MispredStrRes
.getError())
985 StringRef MispredStr
= MispredStrRes
.get();
986 if (MispredStr
.size() != 1 ||
987 (MispredStr
[0] != 'P' && MispredStr
[0] != 'M' && MispredStr
[0] != '-')) {
988 reportError("expected single char for mispred bit");
989 Diag
<< "Found: " << MispredStr
<< "\n";
990 return make_error_code(llvm::errc::io_error
);
992 Res
.Mispred
= MispredStr
[0] == 'M';
994 static bool MispredWarning
= true;
995 if (MispredStr
[0] == '-' && MispredWarning
) {
996 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
997 MispredWarning
= false;
1000 ErrorOr
<StringRef
> Rest
= parseString(FieldSeparator
, true);
1001 if (std::error_code EC
= Rest
.getError())
1003 if (Rest
.get().size() < 5) {
1004 reportError("expected rest of LBR entry");
1005 Diag
<< "Found: " << Rest
.get() << "\n";
1006 return make_error_code(llvm::errc::io_error
);
1011 bool DataAggregator::checkAndConsumeFS() {
1012 if (ParsingBuf
[0] != FieldSeparator
)
1015 ParsingBuf
= ParsingBuf
.drop_front(1);
1020 void DataAggregator::consumeRestOfLine() {
1021 size_t LineEnd
= ParsingBuf
.find_first_of('\n');
1022 if (LineEnd
== StringRef::npos
) {
1023 ParsingBuf
= StringRef();
1028 ParsingBuf
= ParsingBuf
.drop_front(LineEnd
+ 1);
1033 bool DataAggregator::checkNewLine() {
1034 return ParsingBuf
[0] == '\n';
1037 ErrorOr
<DataAggregator::PerfBranchSample
> DataAggregator::parseBranchSample() {
1038 PerfBranchSample Res
;
1040 while (checkAndConsumeFS()) {
1043 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1044 if (std::error_code EC
= PIDRes
.getError())
1046 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1047 if (!opts::LinuxKernelMode
&& MMapInfoIter
== BinaryMMapInfo
.end()) {
1048 consumeRestOfLine();
1049 return make_error_code(errc::no_such_process
);
1052 while (checkAndConsumeFS()) {
1055 ErrorOr
<uint64_t> PCRes
= parseHexField(FieldSeparator
, true);
1056 if (std::error_code EC
= PCRes
.getError())
1058 Res
.PC
= PCRes
.get();
1060 if (checkAndConsumeNewLine())
1063 while (!checkAndConsumeNewLine()) {
1064 checkAndConsumeFS();
1066 ErrorOr
<LBREntry
> LBRRes
= parseLBREntry();
1067 if (std::error_code EC
= LBRRes
.getError())
1069 LBREntry LBR
= LBRRes
.get();
1070 if (ignoreKernelInterrupt(LBR
))
1072 if (!BC
->HasFixedLoadAddress
)
1073 adjustLBR(LBR
, MMapInfoIter
->second
);
1074 Res
.LBR
.push_back(LBR
);
1080 ErrorOr
<DataAggregator::PerfBasicSample
> DataAggregator::parseBasicSample() {
1081 while (checkAndConsumeFS()) {
1084 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1085 if (std::error_code EC
= PIDRes
.getError())
1088 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1089 if (MMapInfoIter
== BinaryMMapInfo
.end()) {
1090 consumeRestOfLine();
1091 return PerfBasicSample
{StringRef(), 0};
1094 while (checkAndConsumeFS()) {
1097 ErrorOr
<StringRef
> Event
= parseString(FieldSeparator
);
1098 if (std::error_code EC
= Event
.getError())
1101 while (checkAndConsumeFS()) {
1104 ErrorOr
<uint64_t> AddrRes
= parseHexField(FieldSeparator
, true);
1105 if (std::error_code EC
= AddrRes
.getError())
1108 if (!checkAndConsumeNewLine()) {
1109 reportError("expected end of line");
1110 return make_error_code(llvm::errc::io_error
);
1113 uint64_t Address
= *AddrRes
;
1114 if (!BC
->HasFixedLoadAddress
)
1115 adjustAddress(Address
, MMapInfoIter
->second
);
1117 return PerfBasicSample
{Event
.get(), Address
};
1120 ErrorOr
<DataAggregator::PerfMemSample
> DataAggregator::parseMemSample() {
1121 PerfMemSample Res
{0, 0};
1123 while (checkAndConsumeFS()) {
1126 ErrorOr
<int64_t> PIDRes
= parseNumberField(FieldSeparator
, true);
1127 if (std::error_code EC
= PIDRes
.getError())
1130 auto MMapInfoIter
= BinaryMMapInfo
.find(*PIDRes
);
1131 if (MMapInfoIter
== BinaryMMapInfo
.end()) {
1132 consumeRestOfLine();
1136 while (checkAndConsumeFS()) {
1139 ErrorOr
<StringRef
> Event
= parseString(FieldSeparator
);
1140 if (std::error_code EC
= Event
.getError())
1142 if (!Event
.get().contains("mem-loads")) {
1143 consumeRestOfLine();
1147 while (checkAndConsumeFS()) {
1150 ErrorOr
<uint64_t> AddrRes
= parseHexField(FieldSeparator
);
1151 if (std::error_code EC
= AddrRes
.getError())
1154 while (checkAndConsumeFS()) {
1157 ErrorOr
<uint64_t> PCRes
= parseHexField(FieldSeparator
, true);
1158 if (std::error_code EC
= PCRes
.getError()) {
1159 consumeRestOfLine();
1163 if (!checkAndConsumeNewLine()) {
1164 reportError("expected end of line");
1165 return make_error_code(llvm::errc::io_error
);
1168 uint64_t Address
= *AddrRes
;
1169 if (!BC
->HasFixedLoadAddress
)
1170 adjustAddress(Address
, MMapInfoIter
->second
);
1172 return PerfMemSample
{PCRes
.get(), Address
};
1175 ErrorOr
<Location
> DataAggregator::parseLocationOrOffset() {
1176 auto parseOffset
= [this]() -> ErrorOr
<Location
> {
1177 ErrorOr
<uint64_t> Res
= parseHexField(FieldSeparator
);
1178 if (std::error_code EC
= Res
.getError())
1180 return Location(Res
.get());
1183 size_t Sep
= ParsingBuf
.find_first_of(" \n");
1184 if (Sep
== StringRef::npos
)
1185 return parseOffset();
1186 StringRef LookAhead
= ParsingBuf
.substr(0, Sep
);
1187 if (LookAhead
.find_first_of(":") == StringRef::npos
)
1188 return parseOffset();
1190 ErrorOr
<StringRef
> BuildID
= parseString(':');
1191 if (std::error_code EC
= BuildID
.getError())
1193 ErrorOr
<uint64_t> Offset
= parseHexField(FieldSeparator
);
1194 if (std::error_code EC
= Offset
.getError())
1196 return Location(true, BuildID
.get(), Offset
.get());
1199 ErrorOr
<DataAggregator::AggregatedLBREntry
>
1200 DataAggregator::parseAggregatedLBREntry() {
1201 while (checkAndConsumeFS()) {
1204 ErrorOr
<StringRef
> TypeOrErr
= parseString(FieldSeparator
);
1205 if (std::error_code EC
= TypeOrErr
.getError())
1207 auto Type
= AggregatedLBREntry::BRANCH
;
1208 if (TypeOrErr
.get() == "B") {
1209 Type
= AggregatedLBREntry::BRANCH
;
1210 } else if (TypeOrErr
.get() == "F") {
1211 Type
= AggregatedLBREntry::FT
;
1212 } else if (TypeOrErr
.get() == "f") {
1213 Type
= AggregatedLBREntry::FT_EXTERNAL_ORIGIN
;
1215 reportError("expected B, F or f");
1216 return make_error_code(llvm::errc::io_error
);
1219 while (checkAndConsumeFS()) {
1221 ErrorOr
<Location
> From
= parseLocationOrOffset();
1222 if (std::error_code EC
= From
.getError())
1225 while (checkAndConsumeFS()) {
1227 ErrorOr
<Location
> To
= parseLocationOrOffset();
1228 if (std::error_code EC
= To
.getError())
1231 while (checkAndConsumeFS()) {
1233 ErrorOr
<int64_t> Frequency
=
1234 parseNumberField(FieldSeparator
, Type
!= AggregatedLBREntry::BRANCH
);
1235 if (std::error_code EC
= Frequency
.getError())
1238 uint64_t Mispreds
= 0;
1239 if (Type
== AggregatedLBREntry::BRANCH
) {
1240 while (checkAndConsumeFS()) {
1242 ErrorOr
<int64_t> MispredsOrErr
= parseNumberField(FieldSeparator
, true);
1243 if (std::error_code EC
= MispredsOrErr
.getError())
1245 Mispreds
= static_cast<uint64_t>(MispredsOrErr
.get());
1248 if (!checkAndConsumeNewLine()) {
1249 reportError("expected end of line");
1250 return make_error_code(llvm::errc::io_error
);
1253 return AggregatedLBREntry
{From
.get(), To
.get(),
1254 static_cast<uint64_t>(Frequency
.get()), Mispreds
,
1258 bool DataAggregator::ignoreKernelInterrupt(LBREntry
&LBR
) const {
1259 return opts::IgnoreInterruptLBR
&&
1260 (LBR
.From
>= KernelBaseAddr
|| LBR
.To
>= KernelBaseAddr
);
1263 std::error_code
DataAggregator::printLBRHeatMap() {
1264 outs() << "PERF2BOLT: parse branch events...\n";
1265 NamedRegionTimer
T("parseBranch", "Parsing branch events", TimerGroupName
,
1266 TimerGroupDesc
, opts::TimeAggregator
);
1268 if (opts::LinuxKernelMode
) {
1269 opts::HeatmapMaxAddress
= 0xffffffffffffffff;
1270 opts::HeatmapMinAddress
= KernelBaseAddr
;
1272 Heatmap
HM(opts::HeatmapBlock
, opts::HeatmapMinAddress
,
1273 opts::HeatmapMaxAddress
, getTextSections(BC
));
1274 uint64_t NumTotalSamples
= 0;
1276 if (opts::BasicAggregation
) {
1278 ErrorOr
<PerfBasicSample
> SampleRes
= parseBasicSample();
1279 if (std::error_code EC
= SampleRes
.getError()) {
1280 if (EC
== errc::no_such_process
)
1284 PerfBasicSample
&Sample
= SampleRes
.get();
1285 HM
.registerAddress(Sample
.PC
);
1288 outs() << "HEATMAP: read " << NumTotalSamples
<< " basic samples\n";
1291 ErrorOr
<PerfBranchSample
> SampleRes
= parseBranchSample();
1292 if (std::error_code EC
= SampleRes
.getError()) {
1293 if (EC
== errc::no_such_process
)
1298 PerfBranchSample
&Sample
= SampleRes
.get();
1300 // LBRs are stored in reverse execution order. NextLBR refers to the next
1301 // executed branch record.
1302 const LBREntry
*NextLBR
= nullptr;
1303 for (const LBREntry
&LBR
: Sample
.LBR
) {
1305 // Record fall-through trace.
1306 const uint64_t TraceFrom
= LBR
.To
;
1307 const uint64_t TraceTo
= NextLBR
->From
;
1308 ++FallthroughLBRs
[Trace(TraceFrom
, TraceTo
)].InternCount
;
1312 if (!Sample
.LBR
.empty()) {
1313 HM
.registerAddress(Sample
.LBR
.front().To
);
1314 HM
.registerAddress(Sample
.LBR
.back().From
);
1316 NumTotalSamples
+= Sample
.LBR
.size();
1318 outs() << "HEATMAP: read " << NumTotalSamples
<< " LBR samples\n";
1319 outs() << "HEATMAP: " << FallthroughLBRs
.size() << " unique traces\n";
1322 if (!NumTotalSamples
) {
1323 if (opts::BasicAggregation
) {
1324 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1325 "Cannot build heatmap.";
1327 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1328 "Cannot build heatmap. Use -nl for building heatmap from "
1334 outs() << "HEATMAP: building heat map...\n";
1336 for (const auto &LBR
: FallthroughLBRs
) {
1337 const Trace
&Trace
= LBR
.first
;
1338 const FTInfo
&Info
= LBR
.second
;
1339 HM
.registerAddressRange(Trace
.From
, Trace
.To
, Info
.InternCount
);
1342 if (HM
.getNumInvalidRanges())
1343 outs() << "HEATMAP: invalid traces: " << HM
.getNumInvalidRanges() << '\n';
1346 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1350 HM
.print(opts::OutputFilename
);
1351 if (opts::OutputFilename
== "-")
1352 HM
.printCDF(opts::OutputFilename
);
1354 HM
.printCDF(opts::OutputFilename
+ ".csv");
1355 if (opts::OutputFilename
== "-")
1356 HM
.printSectionHotness(opts::OutputFilename
);
1358 HM
.printSectionHotness(opts::OutputFilename
+ "-section-hotness.csv");
1360 return std::error_code();
1363 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample
&Sample
,
1364 bool NeedsSkylakeFix
) {
1365 uint64_t NumTraces
{0};
1366 // LBRs are stored in reverse execution order. NextPC refers to the next
1367 // recorded executed PC.
1368 uint64_t NextPC
= opts::UseEventPC
? Sample
.PC
: 0;
1369 uint32_t NumEntry
= 0;
1370 for (const LBREntry
&LBR
: Sample
.LBR
) {
1372 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1373 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1374 // us to likely record an invalid trace and generate a stale function for
1375 // BAT mode (non BAT disassembles the function and is able to ignore this
1376 // trace at aggregation time). Drop first 2 entries (last two, in
1377 // chronological order)
1378 if (NeedsSkylakeFix
&& NumEntry
<= 2)
1381 // Record fall-through trace.
1382 const uint64_t TraceFrom
= LBR
.To
;
1383 const uint64_t TraceTo
= NextPC
;
1384 const BinaryFunction
*TraceBF
=
1385 getBinaryFunctionContainingAddress(TraceFrom
);
1386 if (TraceBF
&& TraceBF
->containsAddress(TraceTo
)) {
1387 FTInfo
&Info
= FallthroughLBRs
[Trace(TraceFrom
, TraceTo
)];
1388 if (TraceBF
->containsAddress(LBR
.From
))
1393 const BinaryFunction
*ToFunc
=
1394 getBinaryFunctionContainingAddress(TraceTo
);
1395 if (TraceBF
&& ToFunc
) {
1397 dbgs() << "Invalid trace starting in " << TraceBF
->getPrintName()
1398 << formatv(" @ {0:x}", TraceFrom
- TraceBF
->getAddress())
1399 << formatv(" and ending @ {0:x}\n", TraceTo
);
1404 dbgs() << "Out of range trace starting in "
1405 << (TraceBF
? TraceBF
->getPrintName() : "None")
1406 << formatv(" @ {0:x}",
1407 TraceFrom
- (TraceBF
? TraceBF
->getAddress() : 0))
1408 << " and ending in "
1409 << (ToFunc
? ToFunc
->getPrintName() : "None")
1410 << formatv(" @ {0:x}\n",
1411 TraceTo
- (ToFunc
? ToFunc
->getAddress() : 0));
1413 ++NumLongRangeTraces
;
1420 uint64_t From
= getBinaryFunctionContainingAddress(LBR
.From
) ? LBR
.From
: 0;
1421 uint64_t To
= getBinaryFunctionContainingAddress(LBR
.To
) ? LBR
.To
: 0;
1424 BranchInfo
&Info
= BranchLBRs
[Trace(From
, To
)];
1426 Info
.MispredCount
+= LBR
.Mispred
;
1431 std::error_code
DataAggregator::parseBranchEvents() {
1432 outs() << "PERF2BOLT: parse branch events...\n";
1433 NamedRegionTimer
T("parseBranch", "Parsing branch events", TimerGroupName
,
1434 TimerGroupDesc
, opts::TimeAggregator
);
1436 uint64_t NumTotalSamples
= 0;
1437 uint64_t NumEntries
= 0;
1438 uint64_t NumSamples
= 0;
1439 uint64_t NumSamplesNoLBR
= 0;
1440 uint64_t NumTraces
= 0;
1441 bool NeedsSkylakeFix
= false;
1443 while (hasData() && NumTotalSamples
< opts::MaxSamples
) {
1446 ErrorOr
<PerfBranchSample
> SampleRes
= parseBranchSample();
1447 if (std::error_code EC
= SampleRes
.getError()) {
1448 if (EC
== errc::no_such_process
)
1454 PerfBranchSample
&Sample
= SampleRes
.get();
1455 if (opts::WriteAutoFDOData
)
1456 ++BasicSamples
[Sample
.PC
];
1458 if (Sample
.LBR
.empty()) {
1463 NumEntries
+= Sample
.LBR
.size();
1464 if (BAT
&& Sample
.LBR
.size() == 32 && !NeedsSkylakeFix
) {
1465 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1466 NeedsSkylakeFix
= true;
1469 NumTraces
+= parseLBRSample(Sample
, NeedsSkylakeFix
);
1472 for (const Trace
&Trace
: llvm::make_first_range(BranchLBRs
))
1473 for (const uint64_t Addr
: {Trace
.From
, Trace
.To
})
1474 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Addr
))
1475 BF
->setHasProfileAvailable();
1477 auto printColored
= [](raw_ostream
&OS
, float Percent
, float T1
, float T2
) {
1479 if (OS
.has_colors()) {
1481 OS
.changeColor(raw_ostream::RED
);
1482 else if (Percent
> T1
)
1483 OS
.changeColor(raw_ostream::YELLOW
);
1485 OS
.changeColor(raw_ostream::GREEN
);
1487 OS
<< format("%.1f%%", Percent
);
1488 if (OS
.has_colors())
1493 outs() << "PERF2BOLT: read " << NumSamples
<< " samples and " << NumEntries
1494 << " LBR entries\n";
1495 if (NumTotalSamples
) {
1496 if (NumSamples
&& NumSamplesNoLBR
== NumSamples
) {
1497 // Note: we don't know if perf2bolt is being used to parse memory samples
1498 // at this point. In this case, it is OK to parse zero LBRs.
1499 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1500 "LBR. Record profile with perf record -j any or run perf2bolt "
1501 "in no-LBR mode with -nl (the performance improvement in -nl "
1502 "mode may be limited)\n";
1504 const uint64_t IgnoredSamples
= NumTotalSamples
- NumSamples
;
1505 const float PercentIgnored
= 100.0f
* IgnoredSamples
/ NumTotalSamples
;
1506 outs() << "PERF2BOLT: " << IgnoredSamples
<< " samples";
1507 printColored(outs(), PercentIgnored
, 20, 50);
1508 outs() << " were ignored\n";
1509 if (PercentIgnored
> 50.0f
)
1510 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1511 "were attributed to the input binary\n";
1514 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1515 << NumInvalidTraces
;
1517 if (NumTraces
> 0) {
1518 Perc
= NumInvalidTraces
* 100.0f
/ NumTraces
;
1519 printColored(outs(), Perc
, 5, 10);
1523 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1524 "binary is probably not the same binary used during profiling "
1525 "collection. The generated data may be ineffective for improving "
1528 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1529 << NumLongRangeTraces
;
1531 outs() << format(" (%.1f%%)", NumLongRangeTraces
* 100.0f
/ NumTraces
);
1534 if (NumColdSamples
> 0) {
1535 const float ColdSamples
= NumColdSamples
* 100.0f
/ NumTotalSamples
;
1536 outs() << "PERF2BOLT: " << NumColdSamples
1537 << format(" (%.1f%%)", ColdSamples
)
1538 << " samples recorded in cold regions of split functions.\n";
1539 if (ColdSamples
> 5.0f
)
1541 << "WARNING: The BOLT-processed binary where samples were collected "
1542 "likely used bad data or your service observed a large shift in "
1543 "profile. You may want to audit this.\n";
1546 return std::error_code();
1549 void DataAggregator::processBranchEvents() {
1550 outs() << "PERF2BOLT: processing branch events...\n";
1551 NamedRegionTimer
T("processBranch", "Processing branch events",
1552 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1554 for (const auto &AggrLBR
: FallthroughLBRs
) {
1555 const Trace
&Loc
= AggrLBR
.first
;
1556 const FTInfo
&Info
= AggrLBR
.second
;
1557 LBREntry First
{Loc
.From
, Loc
.From
, false};
1558 LBREntry Second
{Loc
.To
, Loc
.To
, false};
1559 if (Info
.InternCount
)
1560 doTrace(First
, Second
, Info
.InternCount
);
1561 if (Info
.ExternCount
) {
1563 doTrace(First
, Second
, Info
.ExternCount
);
1567 for (const auto &AggrLBR
: BranchLBRs
) {
1568 const Trace
&Loc
= AggrLBR
.first
;
1569 const BranchInfo
&Info
= AggrLBR
.second
;
1570 doBranch(Loc
.From
, Loc
.To
, Info
.TakenCount
, Info
.MispredCount
);
1574 std::error_code
DataAggregator::parseBasicEvents() {
1575 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1576 NamedRegionTimer
T("parseBasic", "Parsing basic events", TimerGroupName
,
1577 TimerGroupDesc
, opts::TimeAggregator
);
1579 ErrorOr
<PerfBasicSample
> Sample
= parseBasicSample();
1580 if (std::error_code EC
= Sample
.getError())
1586 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Sample
->PC
))
1587 BF
->setHasProfileAvailable();
1589 ++BasicSamples
[Sample
->PC
];
1590 EventNames
.insert(Sample
->EventName
);
1593 return std::error_code();
1596 void DataAggregator::processBasicEvents() {
1597 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1598 NamedRegionTimer
T("processBasic", "Processing basic events", TimerGroupName
,
1599 TimerGroupDesc
, opts::TimeAggregator
);
1600 uint64_t OutOfRangeSamples
= 0;
1601 uint64_t NumSamples
= 0;
1602 for (auto &Sample
: BasicSamples
) {
1603 const uint64_t PC
= Sample
.first
;
1604 const uint64_t HitCount
= Sample
.second
;
1605 NumSamples
+= HitCount
;
1606 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(PC
);
1608 OutOfRangeSamples
+= HitCount
;
1612 doSample(*Func
, PC
, HitCount
);
1614 outs() << "PERF2BOLT: read " << NumSamples
<< " samples\n";
1616 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1617 << OutOfRangeSamples
;
1619 if (NumSamples
> 0) {
1621 Perc
= OutOfRangeSamples
* 100.0f
/ NumSamples
;
1622 if (outs().has_colors()) {
1624 outs().changeColor(raw_ostream::RED
);
1625 else if (Perc
> 40.0f
)
1626 outs().changeColor(raw_ostream::YELLOW
);
1628 outs().changeColor(raw_ostream::GREEN
);
1630 outs() << format("%.1f%%", Perc
);
1631 if (outs().has_colors())
1632 outs().resetColor();
1637 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1638 "binary is probably not the same binary used during profiling "
1639 "collection. The generated data may be ineffective for improving "
1643 std::error_code
DataAggregator::parseMemEvents() {
1644 outs() << "PERF2BOLT: parsing memory events...\n";
1645 NamedRegionTimer
T("parseMemEvents", "Parsing mem events", TimerGroupName
,
1646 TimerGroupDesc
, opts::TimeAggregator
);
1648 ErrorOr
<PerfMemSample
> Sample
= parseMemSample();
1649 if (std::error_code EC
= Sample
.getError())
1652 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Sample
->PC
))
1653 BF
->setHasProfileAvailable();
1655 MemSamples
.emplace_back(std::move(Sample
.get()));
1658 return std::error_code();
1661 void DataAggregator::processMemEvents() {
1662 NamedRegionTimer
T("ProcessMemEvents", "Processing mem events",
1663 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1664 for (const PerfMemSample
&Sample
: MemSamples
) {
1665 uint64_t PC
= Sample
.PC
;
1666 uint64_t Addr
= Sample
.Addr
;
1670 // Try to resolve symbol for PC
1671 BinaryFunction
*Func
= getBinaryFunctionContainingAddress(PC
);
1673 LLVM_DEBUG(if (PC
!= 0) {
1674 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC
, Addr
);
1679 FuncName
= Func
->getOneName();
1680 PC
-= Func
->getAddress();
1682 // Try to resolve symbol for memory load
1683 if (BinaryData
*BD
= BC
->getBinaryDataContainingAddress(Addr
)) {
1684 MemName
= BD
->getName();
1685 Addr
-= BD
->getAddress();
1686 } else if (opts::FilterMemProfile
) {
1687 // Filter out heap/stack accesses
1691 const Location
FuncLoc(!FuncName
.empty(), FuncName
, PC
);
1692 const Location
AddrLoc(!MemName
.empty(), MemName
, Addr
);
1694 FuncMemData
*MemData
= &NamesToMemEvents
[FuncName
];
1695 MemData
->Name
= FuncName
;
1696 setMemData(*Func
, MemData
);
1697 MemData
->update(FuncLoc
, AddrLoc
);
1698 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc
<< " = " << AddrLoc
<< "\n");
1702 std::error_code
DataAggregator::parsePreAggregatedLBRSamples() {
1703 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1704 NamedRegionTimer
T("parseAggregated", "Parsing aggregated branch events",
1705 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1707 ErrorOr
<AggregatedLBREntry
> AggrEntry
= parseAggregatedLBREntry();
1708 if (std::error_code EC
= AggrEntry
.getError())
1711 for (const uint64_t Addr
: {AggrEntry
->From
.Offset
, AggrEntry
->To
.Offset
})
1712 if (BinaryFunction
*BF
= getBinaryFunctionContainingAddress(Addr
))
1713 BF
->setHasProfileAvailable();
1715 AggregatedLBRs
.emplace_back(std::move(AggrEntry
.get()));
1718 return std::error_code();
1721 void DataAggregator::processPreAggregated() {
1722 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1723 NamedRegionTimer
T("processAggregated", "Processing aggregated branch events",
1724 TimerGroupName
, TimerGroupDesc
, opts::TimeAggregator
);
1726 uint64_t NumTraces
= 0;
1727 for (const AggregatedLBREntry
&AggrEntry
: AggregatedLBRs
) {
1728 switch (AggrEntry
.EntryType
) {
1729 case AggregatedLBREntry::BRANCH
:
1730 doBranch(AggrEntry
.From
.Offset
, AggrEntry
.To
.Offset
, AggrEntry
.Count
,
1731 AggrEntry
.Mispreds
);
1733 case AggregatedLBREntry::FT
:
1734 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN
: {
1735 LBREntry First
{AggrEntry
.EntryType
== AggregatedLBREntry::FT
1736 ? AggrEntry
.From
.Offset
1738 AggrEntry
.From
.Offset
, false};
1739 LBREntry Second
{AggrEntry
.To
.Offset
, AggrEntry
.To
.Offset
, false};
1740 doTrace(First
, Second
, AggrEntry
.Count
);
1741 NumTraces
+= AggrEntry
.Count
;
1747 outs() << "PERF2BOLT: read " << AggregatedLBRs
.size()
1748 << " aggregated LBR entries\n";
1749 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1750 << NumInvalidTraces
;
1752 if (NumTraces
> 0) {
1754 Perc
= NumInvalidTraces
* 100.0f
/ NumTraces
;
1755 if (outs().has_colors()) {
1757 outs().changeColor(raw_ostream::RED
);
1758 else if (Perc
> 5.0f
)
1759 outs().changeColor(raw_ostream::YELLOW
);
1761 outs().changeColor(raw_ostream::GREEN
);
1763 outs() << format("%.1f%%", Perc
);
1764 if (outs().has_colors())
1765 outs().resetColor();
1770 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1771 "binary is probably not the same binary used during profiling "
1772 "collection. The generated data may be ineffective for improving "
1775 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1776 << NumLongRangeTraces
;
1778 outs() << format(" (%.1f%%)", NumLongRangeTraces
* 100.0f
/ NumTraces
);
1782 std::optional
<int32_t> DataAggregator::parseCommExecEvent() {
1783 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1784 if (LineEnd
== StringRef::npos
) {
1785 reportError("expected rest of line");
1786 Diag
<< "Found: " << ParsingBuf
<< "\n";
1787 return std::nullopt
;
1789 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1791 size_t Pos
= Line
.find("PERF_RECORD_COMM exec");
1792 if (Pos
== StringRef::npos
)
1793 return std::nullopt
;
1794 Line
= Line
.drop_front(Pos
);
1797 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1798 StringRef PIDStr
= Line
.rsplit(':').second
.split('/').first
;
1800 if (PIDStr
.getAsInteger(10, PID
)) {
1801 reportError("expected PID");
1802 Diag
<< "Found: " << PIDStr
<< "in '" << Line
<< "'\n";
1803 return std::nullopt
;
1810 std::optional
<uint64_t> parsePerfTime(const StringRef TimeStr
) {
1811 const StringRef SecTimeStr
= TimeStr
.split('.').first
;
1812 const StringRef USecTimeStr
= TimeStr
.split('.').second
;
1815 if (SecTimeStr
.getAsInteger(10, SecTime
) ||
1816 USecTimeStr
.getAsInteger(10, USecTime
))
1817 return std::nullopt
;
1818 return SecTime
* 1000000ULL + USecTime
;
1822 std::optional
<DataAggregator::ForkInfo
> DataAggregator::parseForkEvent() {
1823 while (checkAndConsumeFS()) {
1826 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1827 if (LineEnd
== StringRef::npos
) {
1828 reportError("expected rest of line");
1829 Diag
<< "Found: " << ParsingBuf
<< "\n";
1830 return std::nullopt
;
1832 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1834 size_t Pos
= Line
.find("PERF_RECORD_FORK");
1835 if (Pos
== StringRef::npos
) {
1836 consumeRestOfLine();
1837 return std::nullopt
;
1842 const StringRef TimeStr
=
1843 Line
.substr(0, Pos
).rsplit(':').first
.rsplit(FieldSeparator
).second
;
1844 if (std::optional
<uint64_t> TimeRes
= parsePerfTime(TimeStr
)) {
1848 Line
= Line
.drop_front(Pos
);
1851 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1852 const StringRef ChildPIDStr
= Line
.split('(').second
.split(':').first
;
1853 if (ChildPIDStr
.getAsInteger(10, FI
.ChildPID
)) {
1854 reportError("expected PID");
1855 Diag
<< "Found: " << ChildPIDStr
<< "in '" << Line
<< "'\n";
1856 return std::nullopt
;
1859 const StringRef ParentPIDStr
= Line
.rsplit('(').second
.split(':').first
;
1860 if (ParentPIDStr
.getAsInteger(10, FI
.ParentPID
)) {
1861 reportError("expected PID");
1862 Diag
<< "Found: " << ParentPIDStr
<< "in '" << Line
<< "'\n";
1863 return std::nullopt
;
1866 consumeRestOfLine();
1871 ErrorOr
<std::pair
<StringRef
, DataAggregator::MMapInfo
>>
1872 DataAggregator::parseMMapEvent() {
1873 while (checkAndConsumeFS()) {
1876 MMapInfo ParsedInfo
;
1878 size_t LineEnd
= ParsingBuf
.find_first_of("\n");
1879 if (LineEnd
== StringRef::npos
) {
1880 reportError("expected rest of line");
1881 Diag
<< "Found: " << ParsingBuf
<< "\n";
1882 return make_error_code(llvm::errc::io_error
);
1884 StringRef Line
= ParsingBuf
.substr(0, LineEnd
);
1886 size_t Pos
= Line
.find("PERF_RECORD_MMAP2");
1887 if (Pos
== StringRef::npos
) {
1888 consumeRestOfLine();
1889 return std::make_pair(StringRef(), ParsedInfo
);
1893 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1895 const StringRef TimeStr
=
1896 Line
.substr(0, Pos
).rsplit(':').first
.rsplit(FieldSeparator
).second
;
1897 if (std::optional
<uint64_t> TimeRes
= parsePerfTime(TimeStr
))
1898 ParsedInfo
.Time
= *TimeRes
;
1900 Line
= Line
.drop_front(Pos
);
1903 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1905 StringRef FileName
= Line
.rsplit(FieldSeparator
).second
;
1906 if (FileName
.startswith("//") || FileName
.startswith("[")) {
1907 consumeRestOfLine();
1908 return std::make_pair(StringRef(), ParsedInfo
);
1910 FileName
= sys::path::filename(FileName
);
1912 const StringRef PIDStr
= Line
.split(FieldSeparator
).second
.split('/').first
;
1913 if (PIDStr
.getAsInteger(10, ParsedInfo
.PID
)) {
1914 reportError("expected PID");
1915 Diag
<< "Found: " << PIDStr
<< "in '" << Line
<< "'\n";
1916 return make_error_code(llvm::errc::io_error
);
1919 const StringRef BaseAddressStr
= Line
.split('[').second
.split('(').first
;
1920 if (BaseAddressStr
.getAsInteger(0, ParsedInfo
.MMapAddress
)) {
1921 reportError("expected base address");
1922 Diag
<< "Found: " << BaseAddressStr
<< "in '" << Line
<< "'\n";
1923 return make_error_code(llvm::errc::io_error
);
1926 const StringRef SizeStr
= Line
.split('(').second
.split(')').first
;
1927 if (SizeStr
.getAsInteger(0, ParsedInfo
.Size
)) {
1928 reportError("expected mmaped size");
1929 Diag
<< "Found: " << SizeStr
<< "in '" << Line
<< "'\n";
1930 return make_error_code(llvm::errc::io_error
);
1933 const StringRef OffsetStr
=
1934 Line
.split('@').second
.ltrim().split(FieldSeparator
).first
;
1935 if (OffsetStr
.getAsInteger(0, ParsedInfo
.Offset
)) {
1936 reportError("expected mmaped page-aligned offset");
1937 Diag
<< "Found: " << OffsetStr
<< "in '" << Line
<< "'\n";
1938 return make_error_code(llvm::errc::io_error
);
1941 consumeRestOfLine();
1943 return std::make_pair(FileName
, ParsedInfo
);
1946 std::error_code
DataAggregator::parseMMapEvents() {
1947 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1948 NamedRegionTimer
T("parseMMapEvents", "Parsing mmap events", TimerGroupName
,
1949 TimerGroupDesc
, opts::TimeAggregator
);
1951 std::multimap
<StringRef
, MMapInfo
> GlobalMMapInfo
;
1953 ErrorOr
<std::pair
<StringRef
, MMapInfo
>> FileMMapInfoRes
= parseMMapEvent();
1954 if (std::error_code EC
= FileMMapInfoRes
.getError())
1957 std::pair
<StringRef
, MMapInfo
> FileMMapInfo
= FileMMapInfoRes
.get();
1958 if (FileMMapInfo
.second
.PID
== -1)
1960 if (FileMMapInfo
.first
.equals("(deleted)"))
1963 // Consider only the first mapping of the file for any given PID
1964 auto Range
= GlobalMMapInfo
.equal_range(FileMMapInfo
.first
);
1965 bool PIDExists
= llvm::any_of(make_range(Range
), [&](const auto &MI
) {
1966 return MI
.second
.PID
== FileMMapInfo
.second
.PID
;
1972 GlobalMMapInfo
.insert(FileMMapInfo
);
1976 dbgs() << "FileName -> mmap info:\n"
1977 << " Filename : PID [MMapAddr, Size, Offset]\n";
1978 for (const auto &[Name
, MMap
] : GlobalMMapInfo
)
1979 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name
, MMap
.PID
,
1980 MMap
.MMapAddress
, MMap
.Size
, MMap
.Offset
);
1983 StringRef NameToUse
= llvm::sys::path::filename(BC
->getFilename());
1984 if (GlobalMMapInfo
.count(NameToUse
) == 0 && !BuildIDBinaryName
.empty()) {
1985 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1986 << "\" for profile matching\n";
1987 NameToUse
= BuildIDBinaryName
;
1990 auto Range
= GlobalMMapInfo
.equal_range(NameToUse
);
1991 for (MMapInfo
&MMapInfo
: llvm::make_second_range(make_range(Range
))) {
1992 if (BC
->HasFixedLoadAddress
&& MMapInfo
.MMapAddress
) {
1993 // Check that the binary mapping matches one of the segments.
1994 bool MatchFound
= llvm::any_of(
1995 llvm::make_second_range(BC
->SegmentMapInfo
),
1996 [&](SegmentInfo
&SegInfo
) {
1997 // The mapping is page-aligned and hence the MMapAddress could be
1998 // different from the segment start address. We cannot know the page
1999 // size of the mapping, but we know it should not exceed the segment
2000 // alignment value. Hence we are performing an approximate check.
2001 return SegInfo
.Address
>= MMapInfo
.MMapAddress
&&
2002 SegInfo
.Address
- MMapInfo
.MMapAddress
< SegInfo
.Alignment
;
2005 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2006 << " at 0x" << Twine::utohexstr(MMapInfo
.MMapAddress
) << '\n';
2011 // Set base address for shared objects.
2012 if (!BC
->HasFixedLoadAddress
) {
2013 std::optional
<uint64_t> BaseAddress
=
2014 BC
->getBaseAddressForMapping(MMapInfo
.MMapAddress
, MMapInfo
.Offset
);
2016 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2017 "binary when memory mapped at 0x"
2018 << Twine::utohexstr(MMapInfo
.MMapAddress
)
2019 << " using file offset 0x" << Twine::utohexstr(MMapInfo
.Offset
)
2020 << ". Ignoring profile data for this mapping\n";
2023 MMapInfo
.BaseAddress
= *BaseAddress
;
2027 BinaryMMapInfo
.insert(std::make_pair(MMapInfo
.PID
, MMapInfo
));
2030 if (BinaryMMapInfo
.empty()) {
2031 if (errs().has_colors())
2032 errs().changeColor(raw_ostream::RED
);
2033 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2034 << BC
->getFilename() << "\".";
2035 if (!GlobalMMapInfo
.empty()) {
2036 errs() << " Profile for the following binary name(s) is available:\n";
2037 for (auto I
= GlobalMMapInfo
.begin(), IE
= GlobalMMapInfo
.end(); I
!= IE
;
2038 I
= GlobalMMapInfo
.upper_bound(I
->first
))
2039 errs() << " " << I
->first
<< '\n';
2040 errs() << "Please rename the input binary.\n";
2042 errs() << " Failed to extract any binary name from a profile.\n";
2044 if (errs().has_colors())
2045 errs().resetColor();
2050 return std::error_code();
2053 std::error_code
DataAggregator::parseTaskEvents() {
2054 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2055 NamedRegionTimer
T("parseTaskEvents", "Parsing task events", TimerGroupName
,
2056 TimerGroupDesc
, opts::TimeAggregator
);
2059 if (std::optional
<int32_t> CommInfo
= parseCommExecEvent()) {
2060 // Remove forked child that ran execve
2061 auto MMapInfoIter
= BinaryMMapInfo
.find(*CommInfo
);
2062 if (MMapInfoIter
!= BinaryMMapInfo
.end() && MMapInfoIter
->second
.Forked
)
2063 BinaryMMapInfo
.erase(MMapInfoIter
);
2064 consumeRestOfLine();
2068 std::optional
<ForkInfo
> ForkInfo
= parseForkEvent();
2072 if (ForkInfo
->ParentPID
== ForkInfo
->ChildPID
)
2075 if (ForkInfo
->Time
== 0) {
2076 // Process was forked and mmaped before perf ran. In this case the child
2077 // should have its own mmap entry unless it was execve'd.
2081 auto MMapInfoIter
= BinaryMMapInfo
.find(ForkInfo
->ParentPID
);
2082 if (MMapInfoIter
== BinaryMMapInfo
.end())
2085 MMapInfo MMapInfo
= MMapInfoIter
->second
;
2086 MMapInfo
.PID
= ForkInfo
->ChildPID
;
2087 MMapInfo
.Forked
= true;
2088 BinaryMMapInfo
.insert(std::make_pair(MMapInfo
.PID
, MMapInfo
));
2091 outs() << "PERF2BOLT: input binary is associated with "
2092 << BinaryMMapInfo
.size() << " PID(s)\n";
2095 for (const MMapInfo
&MMI
: llvm::make_second_range(BinaryMMapInfo
))
2096 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI
.PID
,
2097 (MMI
.Forked
? " (forked)" : ""), MMI
.MMapAddress
,
2101 return std::error_code();
2104 std::optional
<std::pair
<StringRef
, StringRef
>>
2105 DataAggregator::parseNameBuildIDPair() {
2106 while (checkAndConsumeFS()) {
2109 ErrorOr
<StringRef
> BuildIDStr
= parseString(FieldSeparator
, true);
2110 if (std::error_code EC
= BuildIDStr
.getError())
2111 return std::nullopt
;
2113 // If one of the strings is missing, don't issue a parsing error, but still
2114 // do not return a value.
2115 consumeAllRemainingFS();
2117 return std::nullopt
;
2119 ErrorOr
<StringRef
> NameStr
= parseString(FieldSeparator
, true);
2120 if (std::error_code EC
= NameStr
.getError())
2121 return std::nullopt
;
2123 consumeRestOfLine();
2124 return std::make_pair(NameStr
.get(), BuildIDStr
.get());
2127 bool DataAggregator::hasAllBuildIDs() {
2128 const StringRef SavedParsingBuf
= ParsingBuf
;
2133 bool HasInvalidEntries
= false;
2135 if (!parseNameBuildIDPair()) {
2136 HasInvalidEntries
= true;
2141 ParsingBuf
= SavedParsingBuf
;
2143 return !HasInvalidEntries
;
2146 std::optional
<StringRef
>
2147 DataAggregator::getFileNameForBuildID(StringRef FileBuildID
) {
2148 const StringRef SavedParsingBuf
= ParsingBuf
;
2152 std::optional
<std::pair
<StringRef
, StringRef
>> IDPair
=
2153 parseNameBuildIDPair();
2155 consumeRestOfLine();
2159 if (IDPair
->second
.startswith(FileBuildID
)) {
2160 FileName
= sys::path::filename(IDPair
->first
);
2165 ParsingBuf
= SavedParsingBuf
;
2167 if (!FileName
.empty())
2170 return std::nullopt
;
2174 DataAggregator::writeAggregatedFile(StringRef OutputFilename
) const {
2176 raw_fd_ostream
OutFile(OutputFilename
, EC
, sys::fs::OpenFlags::OF_None
);
2180 bool WriteMemLocs
= false;
2182 auto writeLocation
= [&OutFile
, &WriteMemLocs
](const Location
&Loc
) {
2184 OutFile
<< (Loc
.IsSymbol
? "4 " : "3 ");
2186 OutFile
<< (Loc
.IsSymbol
? "1 " : "0 ");
2187 OutFile
<< (Loc
.Name
.empty() ? "[unknown]" : getEscapedName(Loc
.Name
))
2188 << " " << Twine::utohexstr(Loc
.Offset
) << FieldSeparator
;
2191 uint64_t BranchValues
= 0;
2192 uint64_t MemValues
= 0;
2195 OutFile
<< "boltedcollection\n";
2196 if (opts::BasicAggregation
) {
2197 OutFile
<< "no_lbr";
2198 for (const StringMapEntry
<std::nullopt_t
> &Entry
: EventNames
)
2199 OutFile
<< " " << Entry
.getKey();
2202 for (const auto &KV
: NamesToSamples
) {
2203 const FuncSampleData
&FSD
= KV
.second
;
2204 for (const SampleInfo
&SI
: FSD
.Data
) {
2205 writeLocation(SI
.Loc
);
2206 OutFile
<< SI
.Hits
<< "\n";
2211 for (const auto &KV
: NamesToBranches
) {
2212 const FuncBranchData
&FBD
= KV
.second
;
2213 for (const llvm::bolt::BranchInfo
&BI
: FBD
.Data
) {
2214 writeLocation(BI
.From
);
2215 writeLocation(BI
.To
);
2216 OutFile
<< BI
.Mispreds
<< " " << BI
.Branches
<< "\n";
2219 for (const llvm::bolt::BranchInfo
&BI
: FBD
.EntryData
) {
2220 // Do not output if source is a known symbol, since this was already
2221 // accounted for in the source function
2222 if (BI
.From
.IsSymbol
)
2224 writeLocation(BI
.From
);
2225 writeLocation(BI
.To
);
2226 OutFile
<< BI
.Mispreds
<< " " << BI
.Branches
<< "\n";
2231 WriteMemLocs
= true;
2232 for (const auto &KV
: NamesToMemEvents
) {
2233 const FuncMemData
&FMD
= KV
.second
;
2234 for (const MemInfo
&MemEvent
: FMD
.Data
) {
2235 writeLocation(MemEvent
.Offset
);
2236 writeLocation(MemEvent
.Addr
);
2237 OutFile
<< MemEvent
.Count
<< "\n";
2243 outs() << "PERF2BOLT: wrote " << BranchValues
<< " objects and " << MemValues
2244 << " memory objects to " << OutputFilename
<< "\n";
2246 return std::error_code();
2249 void DataAggregator::dump() const { DataReader::dump(); }
2251 void DataAggregator::dump(const LBREntry
&LBR
) const {
2252 Diag
<< "From: " << Twine::utohexstr(LBR
.From
)
2253 << " To: " << Twine::utohexstr(LBR
.To
) << " Mispred? " << LBR
.Mispred
2257 void DataAggregator::dump(const PerfBranchSample
&Sample
) const {
2258 Diag
<< "Sample LBR entries: " << Sample
.LBR
.size() << "\n";
2259 for (const LBREntry
&LBR
: Sample
.LBR
)
2263 void DataAggregator::dump(const PerfMemSample
&Sample
) const {
2264 Diag
<< "Sample mem entries: " << Sample
.PC
<< ": " << Sample
.Addr
<< "\n";