[RISCV] Make EmitRISCVCpuSupports accept multiple features (#104917)
[llvm-project.git] / bolt / lib / Profile / DataAggregator.cpp
bloba300e5b2b1dabd3799574a33d3433c7bde9ec79f
1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Passes/BinaryPasses.h"
18 #include "bolt/Profile/BoltAddressTranslation.h"
19 #include "bolt/Profile/Heatmap.h"
20 #include "bolt/Profile/YAMLProfileWriter.h"
21 #include "bolt/Utils/CommandLineOpts.h"
22 #include "bolt/Utils/Utils.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/Errc.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Program.h"
32 #include "llvm/Support/Regex.h"
33 #include "llvm/Support/Timer.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <map>
36 #include <optional>
37 #include <unordered_map>
38 #include <utility>
40 #define DEBUG_TYPE "aggregator"
42 using namespace llvm;
43 using namespace bolt;
45 namespace opts {
47 static cl::opt<bool>
48 BasicAggregation("nl",
49 cl::desc("aggregate basic samples (without LBR info)"),
50 cl::cat(AggregatorCategory));
52 static cl::opt<std::string>
53 ITraceAggregation("itrace",
54 cl::desc("Generate LBR info with perf itrace argument"),
55 cl::cat(AggregatorCategory));
57 static cl::opt<bool>
58 FilterMemProfile("filter-mem-profile",
59 cl::desc("if processing a memory profile, filter out stack or heap accesses "
60 "that won't be useful for BOLT to reduce profile file size"),
61 cl::init(true),
62 cl::cat(AggregatorCategory));
64 static cl::opt<unsigned long long>
65 FilterPID("pid",
66 cl::desc("only use samples from process with specified PID"),
67 cl::init(0),
68 cl::Optional,
69 cl::cat(AggregatorCategory));
71 static cl::opt<bool>
72 IgnoreBuildID("ignore-build-id",
73 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
74 cl::init(false),
75 cl::cat(AggregatorCategory));
77 static cl::opt<bool> IgnoreInterruptLBR(
78 "ignore-interrupt-lbr",
79 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
80 cl::init(true), cl::cat(AggregatorCategory));
82 static cl::opt<unsigned long long>
83 MaxSamples("max-samples",
84 cl::init(-1ULL),
85 cl::desc("maximum number of samples to read from LBR profile"),
86 cl::Optional,
87 cl::Hidden,
88 cl::cat(AggregatorCategory));
90 extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
91 extern cl::opt<bool> ProfileUsePseudoProbes;
92 extern cl::opt<std::string> SaveProfile;
94 cl::opt<bool> ReadPreAggregated(
95 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
96 cl::cat(AggregatorCategory));
98 static cl::opt<bool>
99 TimeAggregator("time-aggr",
100 cl::desc("time BOLT aggregator"),
101 cl::init(false),
102 cl::ZeroOrMore,
103 cl::cat(AggregatorCategory));
105 static cl::opt<bool>
106 UseEventPC("use-event-pc",
107 cl::desc("use event PC in combination with LBR sampling"),
108 cl::cat(AggregatorCategory));
110 static cl::opt<bool> WriteAutoFDOData(
111 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
112 cl::cat(AggregatorCategory));
114 } // namespace opts
116 namespace {
118 const char TimerGroupName[] = "aggregator";
119 const char TimerGroupDesc[] = "Aggregator";
121 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
122 std::vector<SectionNameAndRange> sections;
123 for (BinarySection &Section : BC->sections()) {
124 if (!Section.isText())
125 continue;
126 if (Section.getSize() == 0)
127 continue;
128 sections.push_back(
129 {Section.getName(), Section.getAddress(), Section.getEndAddress()});
131 llvm::sort(sections,
132 [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
133 return A.BeginAddress < B.BeginAddress;
135 return sections;
139 constexpr uint64_t DataAggregator::KernelBaseAddr;
141 DataAggregator::~DataAggregator() { deleteTempFiles(); }
143 namespace {
144 void deleteTempFile(const std::string &FileName) {
145 if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
146 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
147 << " with error " << Errc.message() << "\n";
151 void DataAggregator::deleteTempFiles() {
152 for (std::string &FileName : TempFiles)
153 deleteTempFile(FileName);
154 TempFiles.clear();
157 void DataAggregator::findPerfExecutable() {
158 std::optional<std::string> PerfExecutable =
159 sys::Process::FindInEnvPath("PATH", "perf");
160 if (!PerfExecutable) {
161 outs() << "PERF2BOLT: No perf executable found!\n";
162 exit(1);
164 PerfPath = *PerfExecutable;
167 void DataAggregator::start() {
168 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
170 // Don't launch perf for pre-aggregated files
171 if (opts::ReadPreAggregated)
172 return;
174 findPerfExecutable();
176 if (opts::BasicAggregation) {
177 launchPerfProcess("events without LBR",
178 MainEventsPPI,
179 "script -F pid,event,ip",
180 /*Wait = */false);
181 } else if (!opts::ITraceAggregation.empty()) {
182 std::string ItracePerfScriptArgs = llvm::formatv(
183 "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation);
184 launchPerfProcess("branch events with itrace", MainEventsPPI,
185 ItracePerfScriptArgs.c_str(),
186 /*Wait = */ false);
187 } else {
188 launchPerfProcess("branch events",
189 MainEventsPPI,
190 "script -F pid,ip,brstack",
191 /*Wait = */false);
194 // Note: we launch script for mem events regardless of the option, as the
195 // command fails fairly fast if mem events were not collected.
196 launchPerfProcess("mem events",
197 MemEventsPPI,
198 "script -F pid,event,addr,ip",
199 /*Wait = */false);
201 launchPerfProcess("process events", MMapEventsPPI,
202 "script --show-mmap-events --no-itrace",
203 /*Wait = */ false);
205 launchPerfProcess("task events", TaskEventsPPI,
206 "script --show-task-events --no-itrace",
207 /*Wait = */ false);
210 void DataAggregator::abort() {
211 if (opts::ReadPreAggregated)
212 return;
214 std::string Error;
216 // Kill subprocesses in case they are not finished
217 sys::Wait(TaskEventsPPI.PI, 1, &Error);
218 sys::Wait(MMapEventsPPI.PI, 1, &Error);
219 sys::Wait(MainEventsPPI.PI, 1, &Error);
220 sys::Wait(MemEventsPPI.PI, 1, &Error);
222 deleteTempFiles();
224 exit(1);
227 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
228 const char *ArgsString, bool Wait) {
229 SmallVector<StringRef, 4> Argv;
231 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
232 Argv.push_back(PerfPath.data());
234 StringRef(ArgsString).split(Argv, ' ');
235 Argv.push_back("-f");
236 Argv.push_back("-i");
237 Argv.push_back(Filename.c_str());
239 if (std::error_code Errc =
240 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
241 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
242 << " with error " << Errc.message() << "\n";
243 exit(1);
245 TempFiles.push_back(PPI.StdoutPath.data());
247 if (std::error_code Errc =
248 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
249 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
250 << " with error " << Errc.message() << "\n";
251 exit(1);
253 TempFiles.push_back(PPI.StderrPath.data());
255 std::optional<StringRef> Redirects[] = {
256 std::nullopt, // Stdin
257 StringRef(PPI.StdoutPath.data()), // Stdout
258 StringRef(PPI.StderrPath.data())}; // Stderr
260 LLVM_DEBUG({
261 dbgs() << "Launching perf: ";
262 for (StringRef Arg : Argv)
263 dbgs() << Arg << " ";
264 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
265 << "\n";
268 if (Wait)
269 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
270 /*envp*/ std::nullopt, Redirects);
271 else
272 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ std::nullopt,
273 Redirects);
276 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
277 PerfProcessInfo BuildIDProcessInfo;
278 launchPerfProcess("buildid list",
279 BuildIDProcessInfo,
280 "buildid-list",
281 /*Wait = */true);
283 if (BuildIDProcessInfo.PI.ReturnCode != 0) {
284 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
285 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
286 StringRef ErrBuf = (*MB)->getBuffer();
288 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
289 << '\n';
290 errs() << ErrBuf;
291 return;
294 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
295 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
296 if (std::error_code EC = MB.getError()) {
297 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
298 << EC.message() << "\n";
299 return;
302 FileBuf = std::move(*MB);
303 ParsingBuf = FileBuf->getBuffer();
305 std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
306 if (!FileName) {
307 if (hasAllBuildIDs()) {
308 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
309 "This indicates the input binary supplied for data aggregation "
310 "is not the same recorded by perf when collecting profiling "
311 "data, or there were no samples recorded for the binary. "
312 "Use -ignore-build-id option to override.\n";
313 if (!opts::IgnoreBuildID)
314 abort();
315 } else {
316 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
317 "data was recorded without it\n";
318 return;
320 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
321 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
322 BuildIDBinaryName = std::string(*FileName);
323 } else {
324 outs() << "PERF2BOLT: matched build-id and file name\n";
328 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
329 if (opts::ReadPreAggregated)
330 return true;
332 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
333 if (!FD) {
334 consumeError(FD.takeError());
335 return false;
338 char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
340 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
341 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
342 *FD, MutableArrayRef(Buf, sizeof(Buf)), 0);
343 if (!BytesRead) {
344 consumeError(BytesRead.takeError());
345 return false;
348 if (*BytesRead != 7)
349 return false;
351 if (strncmp(Buf, "PERFILE", 7) == 0)
352 return true;
353 return false;
356 void DataAggregator::parsePreAggregated() {
357 std::string Error;
359 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
360 MemoryBuffer::getFileOrSTDIN(Filename);
361 if (std::error_code EC = MB.getError()) {
362 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
363 << EC.message() << "\n";
364 exit(1);
367 FileBuf = std::move(*MB);
368 ParsingBuf = FileBuf->getBuffer();
369 Col = 0;
370 Line = 1;
371 if (parsePreAggregatedLBRSamples()) {
372 errs() << "PERF2BOLT: failed to parse samples\n";
373 exit(1);
377 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
378 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
379 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
380 TimerGroupDesc, opts::TimeAggregator);
382 std::error_code EC;
383 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
384 if (EC)
385 return EC;
387 // Format:
388 // number of unique traces
389 // from_1-to_1:count_1
390 // from_2-to_2:count_2
391 // ......
392 // from_n-to_n:count_n
393 // number of unique sample addresses
394 // addr_1:count_1
395 // addr_2:count_2
396 // ......
397 // addr_n:count_n
398 // number of unique LBR entries
399 // src_1->dst_1:count_1
400 // src_2->dst_2:count_2
401 // ......
402 // src_n->dst_n:count_n
404 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
406 // AutoFDO addresses are relative to the first allocated loadable program
407 // segment
408 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
409 if (Address < FirstAllocAddress)
410 return 0;
411 return Address - FirstAllocAddress;
414 OutFile << FallthroughLBRs.size() << "\n";
415 for (const auto &[Trace, Info] : FallthroughLBRs) {
416 OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
417 filterAddress(Trace.To),
418 Info.InternCount + Info.ExternCount);
421 OutFile << BasicSamples.size() << "\n";
422 for (const auto [PC, HitCount] : BasicSamples)
423 OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);
425 OutFile << BranchLBRs.size() << "\n";
426 for (const auto &[Trace, Info] : BranchLBRs) {
427 OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
428 filterAddress(Trace.To), Info.TakenCount);
431 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
432 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
433 << " unique branches to " << OutputFilename << "\n";
435 return std::error_code();
438 void DataAggregator::filterBinaryMMapInfo() {
439 if (opts::FilterPID) {
440 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
441 if (MMapInfoIter != BinaryMMapInfo.end()) {
442 MMapInfo MMap = MMapInfoIter->second;
443 BinaryMMapInfo.clear();
444 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
445 } else {
446 if (errs().has_colors())
447 errs().changeColor(raw_ostream::RED);
448 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
449 << opts::FilterPID << "\""
450 << " for binary \"" << BC->getFilename() << "\".";
451 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
452 errs() << " Profile for the following process is available:\n";
453 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
454 outs() << " " << MMI.second.PID
455 << (MMI.second.Forked ? " (forked)\n" : "\n");
457 if (errs().has_colors())
458 errs().resetColor();
460 exit(1);
465 int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
466 PerfProcessErrorCallbackTy Callback) {
467 std::string Error;
468 outs() << "PERF2BOLT: waiting for perf " << Name
469 << " collection to finish...\n";
470 sys::ProcessInfo PI = sys::Wait(Process.PI, std::nullopt, &Error);
472 if (!Error.empty()) {
473 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
474 deleteTempFiles();
475 exit(1);
478 if (PI.ReturnCode != 0) {
479 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
480 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
481 StringRef ErrBuf = (*ErrorMB)->getBuffer();
483 deleteTempFiles();
484 Callback(PI.ReturnCode, ErrBuf);
485 return PI.ReturnCode;
488 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
489 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
490 if (std::error_code EC = MB.getError()) {
491 errs() << "Cannot open " << Process.StdoutPath.data() << ": "
492 << EC.message() << "\n";
493 deleteTempFiles();
494 exit(1);
497 FileBuf = std::move(*MB);
498 ParsingBuf = FileBuf->getBuffer();
499 Col = 0;
500 Line = 1;
501 return PI.ReturnCode;
504 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
505 this->BC = &BC;
507 if (opts::ReadPreAggregated) {
508 parsePreAggregated();
509 return Error::success();
512 if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
513 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
514 processFileBuildID(*FileBuildID);
515 } else {
516 errs() << "BOLT-WARNING: build-id will not be checked because we could "
517 "not read one from input binary\n";
520 auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
521 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
522 exit(1);
525 auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
526 Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
527 "Cannot print 'addr' field.");
528 if (!NoData.match(ErrBuf))
529 ErrorCallback(ReturnCode, ErrBuf);
532 if (BC.IsLinuxKernel) {
533 // Current MMap parsing logic does not work with linux kernel.
534 // MMap entries for linux kernel uses PERF_RECORD_MMAP
535 // format instead of typical PERF_RECORD_MMAP2 format.
536 // Since linux kernel address mapping is absolute (same as
537 // in the ELF file), we avoid parsing MMap in linux kernel mode.
538 // While generating optimized linux kernel binary, we may need
539 // to parse MMap entries.
541 // In linux kernel mode, we analyze and optimize
542 // all linux kernel binary instructions, irrespective
543 // of whether they are due to system calls or due to
544 // interrupts. Therefore, we cannot ignore interrupt
545 // in Linux kernel mode.
546 opts::IgnoreInterruptLBR = false;
547 } else {
548 prepareToParse("mmap events", MMapEventsPPI, ErrorCallback);
549 if (parseMMapEvents())
550 errs() << "PERF2BOLT: failed to parse mmap events\n";
553 prepareToParse("task events", TaskEventsPPI, ErrorCallback);
554 if (parseTaskEvents())
555 errs() << "PERF2BOLT: failed to parse task events\n";
557 filterBinaryMMapInfo();
558 prepareToParse("events", MainEventsPPI, ErrorCallback);
560 if (opts::HeatmapMode) {
561 if (std::error_code EC = printLBRHeatMap()) {
562 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
563 exit(1);
565 exit(0);
568 if ((!opts::BasicAggregation && parseBranchEvents()) ||
569 (opts::BasicAggregation && parseBasicEvents()))
570 errs() << "PERF2BOLT: failed to parse samples\n";
572 // We can finish early if the goal is just to generate data for autofdo
573 if (opts::WriteAutoFDOData) {
574 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
575 errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
577 deleteTempFiles();
578 exit(0);
581 // Special handling for memory events
582 if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
583 return Error::success();
585 if (const std::error_code EC = parseMemEvents())
586 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
587 << '\n';
589 deleteTempFiles();
591 return Error::success();
594 Error DataAggregator::readProfile(BinaryContext &BC) {
595 processProfile(BC);
597 for (auto &BFI : BC.getBinaryFunctions()) {
598 BinaryFunction &Function = BFI.second;
599 convertBranchData(Function);
602 if (opts::AggregateOnly) {
603 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
604 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
605 report_error("cannot create output data file", EC);
607 // BAT YAML is handled by DataAggregator since normal YAML output requires
608 // CFG which is not available in BAT mode.
609 if (usesBAT()) {
610 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
611 if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename))
612 report_error("cannot create output data file", EC);
613 if (!opts::SaveProfile.empty())
614 if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile))
615 report_error("cannot create output data file", EC);
619 return Error::success();
622 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
623 return Function.hasProfileAvailable();
626 void DataAggregator::processProfile(BinaryContext &BC) {
627 if (opts::ReadPreAggregated)
628 processPreAggregated();
629 else if (opts::BasicAggregation)
630 processBasicEvents();
631 else
632 processBranchEvents();
634 processMemEvents();
636 // Mark all functions with registered events as having a valid profile.
637 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
638 : BinaryFunction::PF_LBR;
639 for (auto &BFI : BC.getBinaryFunctions()) {
640 BinaryFunction &BF = BFI.second;
641 if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
642 BF.markProfiled(Flags);
645 for (auto &FuncBranches : NamesToBranches)
646 llvm::stable_sort(FuncBranches.second.Data);
648 for (auto &MemEvents : NamesToMemEvents)
649 llvm::stable_sort(MemEvents.second.Data);
651 // Release intermediate storage.
652 clear(BranchLBRs);
653 clear(FallthroughLBRs);
654 clear(AggregatedLBRs);
655 clear(BasicSamples);
656 clear(MemSamples);
659 BinaryFunction *
660 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
661 if (!BC->containsAddress(Address))
662 return nullptr;
664 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
665 /*UseMaxSize=*/true);
668 BinaryFunction *
669 DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
670 if (BAT)
671 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress()))
672 return getBinaryFunctionContainingAddress(HotAddr);
673 return nullptr;
676 StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
677 bool BAT) {
678 if (!BAT)
679 return Func.getOneName();
681 const BinaryFunction *OrigFunc = &Func;
682 // If it is a local function, prefer the name containing the file name where
683 // the local function was declared
684 for (StringRef AlternativeName : OrigFunc->getNames()) {
685 size_t FileNameIdx = AlternativeName.find('/');
686 // Confirm the alternative name has the pattern Symbol/FileName/1 before
687 // using it
688 if (FileNameIdx == StringRef::npos ||
689 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
690 continue;
691 return AlternativeName;
693 return OrigFunc->getOneName();
696 bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
697 uint64_t Count) {
698 BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
699 BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
700 if (ParentFunc)
701 NumColdSamples += Count;
703 auto I = NamesToSamples.find(Func.getOneName());
704 if (I == NamesToSamples.end()) {
705 bool Success;
706 StringRef LocName = getLocationName(Func, BAT);
707 std::tie(I, Success) = NamesToSamples.insert(
708 std::make_pair(Func.getOneName(),
709 FuncSampleData(LocName, FuncSampleData::ContainerTy())));
712 Address -= Func.getAddress();
713 if (BAT)
714 Address = BAT->translate(Func.getAddress(), Address, /*IsBranchSrc=*/false);
716 I->second.bumpCount(Address, Count);
717 return true;
720 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
721 uint64_t To, uint64_t Count,
722 uint64_t Mispreds) {
723 FuncBranchData *AggrData = getBranchData(Func);
724 if (!AggrData) {
725 AggrData = &NamesToBranches[Func.getOneName()];
726 AggrData->Name = getLocationName(Func, BAT);
727 setBranchData(Func, AggrData);
730 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
731 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
732 AggrData->bumpBranchCount(From, To, Count, Mispreds);
733 return true;
736 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
737 BinaryFunction *ToFunc, uint64_t From,
738 uint64_t To, uint64_t Count,
739 uint64_t Mispreds) {
740 FuncBranchData *FromAggrData = nullptr;
741 FuncBranchData *ToAggrData = nullptr;
742 StringRef SrcFunc;
743 StringRef DstFunc;
744 if (FromFunc) {
745 SrcFunc = getLocationName(*FromFunc, BAT);
746 FromAggrData = getBranchData(*FromFunc);
747 if (!FromAggrData) {
748 FromAggrData = &NamesToBranches[FromFunc->getOneName()];
749 FromAggrData->Name = SrcFunc;
750 setBranchData(*FromFunc, FromAggrData);
753 recordExit(*FromFunc, From, Mispreds, Count);
755 if (ToFunc) {
756 DstFunc = getLocationName(*ToFunc, BAT);
757 ToAggrData = getBranchData(*ToFunc);
758 if (!ToAggrData) {
759 ToAggrData = &NamesToBranches[ToFunc->getOneName()];
760 ToAggrData->Name = DstFunc;
761 setBranchData(*ToFunc, ToAggrData);
764 recordEntry(*ToFunc, To, Mispreds, Count);
767 if (FromAggrData)
768 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
769 Count, Mispreds);
770 if (ToAggrData)
771 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
772 Count, Mispreds);
773 return true;
776 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
777 uint64_t Mispreds) {
778 bool IsReturn = false;
779 auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
780 if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
781 Addr -= Func->getAddress();
782 if (IsFrom) {
783 auto checkReturn = [&](auto MaybeInst) {
784 IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst);
786 if (Func->hasInstructions())
787 checkReturn(Func->getInstructionAtOffset(Addr));
788 else
789 checkReturn(Func->disassembleInstructionAtOffset(Addr));
792 if (BAT)
793 Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
795 if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
796 Func = ParentFunc;
797 if (IsFrom)
798 NumColdSamples += Count;
801 return Func;
803 return nullptr;
806 BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
807 // Ignore returns.
808 if (IsReturn)
809 return true;
810 BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
811 if (!FromFunc && !ToFunc)
812 return false;
814 // Treat recursive control transfers as inter-branches.
815 if (FromFunc == ToFunc && To != 0) {
816 recordBranch(*FromFunc, From, To, Count, Mispreds);
817 return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
820 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
823 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
824 uint64_t Count) {
825 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
826 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
827 if (!FromFunc || !ToFunc) {
828 LLVM_DEBUG({
829 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
830 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
831 << " and ending in " << ToFunc->getPrintName()
832 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
834 NumLongRangeTraces += Count;
835 return false;
837 if (FromFunc != ToFunc) {
838 NumInvalidTraces += Count;
839 LLVM_DEBUG({
840 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
841 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
842 << " and ending in " << ToFunc->getPrintName()
843 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
845 return false;
848 std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
849 BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
850 Second.From)
851 : getFallthroughsInTrace(*FromFunc, First, Second, Count);
852 if (!FTs) {
853 LLVM_DEBUG(
854 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
855 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
856 << " and ending in " << ToFunc->getPrintName() << " @ "
857 << ToFunc->getPrintName() << " @ "
858 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
859 NumInvalidTraces += Count;
860 return false;
863 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
864 << FromFunc->getPrintName() << ":"
865 << Twine::utohexstr(First.To) << " to "
866 << Twine::utohexstr(Second.From) << ".\n");
867 BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
868 for (auto [From, To] : *FTs) {
869 if (BAT) {
870 From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
871 To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
873 doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
876 return true;
879 std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
880 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
881 const LBREntry &FirstLBR,
882 const LBREntry &SecondLBR,
883 uint64_t Count) const {
884 SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches;
886 BinaryContext &BC = BF.getBinaryContext();
888 if (!BF.isSimple())
889 return std::nullopt;
891 assert(BF.hasCFG() && "can only record traces in CFG state");
893 // Offsets of the trace within this function.
894 const uint64_t From = FirstLBR.To - BF.getAddress();
895 const uint64_t To = SecondLBR.From - BF.getAddress();
897 if (From > To)
898 return std::nullopt;
900 const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
901 const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
903 if (!FromBB || !ToBB)
904 return std::nullopt;
906 // Adjust FromBB if the first LBR is a return from the last instruction in
907 // the previous block (that instruction should be a call).
908 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
909 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
910 const BinaryBasicBlock *PrevBB =
911 BF.getLayout().getBlock(FromBB->getIndex() - 1);
912 if (PrevBB->getSuccessor(FromBB->getLabel())) {
913 const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
914 if (Instr && BC.MIB->isCall(*Instr))
915 FromBB = PrevBB;
916 else
917 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
918 << '\n');
919 } else {
920 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
924 // Fill out information for fall-through edges. The From and To could be
925 // within the same basic block, e.g. when two call instructions are in the
926 // same block. In this case we skip the processing.
927 if (FromBB == ToBB)
928 return Branches;
930 // Process blocks in the original layout order.
931 BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex());
932 assert(BB == FromBB && "index mismatch");
933 while (BB != ToBB) {
934 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1);
935 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
937 // Check for bad LBRs.
938 if (!BB->getSuccessor(NextBB->getLabel())) {
939 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
940 << " " << FirstLBR << '\n'
941 << " " << SecondLBR << '\n');
942 return std::nullopt;
945 const MCInst *Instr = BB->getLastNonPseudoInstr();
946 uint64_t Offset = 0;
947 if (Instr)
948 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
949 else
950 Offset = BB->getOffset();
952 Branches.emplace_back(Offset, NextBB->getOffset());
954 BB = NextBB;
957 // Record fall-through jumps
958 for (const auto &[FromOffset, ToOffset] : Branches) {
959 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(FromOffset);
960 BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(ToOffset);
961 assert(FromBB && ToBB);
962 BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
963 BI.Count += Count;
966 return Branches;
969 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
970 uint64_t Count) const {
971 if (To > BF.getSize())
972 return false;
974 if (!BF.hasProfile())
975 BF.ExecutionCount = 0;
977 BinaryBasicBlock *EntryBB = nullptr;
978 if (To == 0) {
979 BF.ExecutionCount += Count;
980 if (!BF.empty())
981 EntryBB = &BF.front();
982 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
983 if (BB->isEntryPoint())
984 EntryBB = BB;
987 if (EntryBB)
988 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
990 return true;
993 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
994 uint64_t Count) const {
995 if (!BF.isSimple() || From > BF.getSize())
996 return false;
998 if (!BF.hasProfile())
999 BF.ExecutionCount = 0;
1001 return true;
1004 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
1005 LBREntry Res;
1006 ErrorOr<StringRef> FromStrRes = parseString('/');
1007 if (std::error_code EC = FromStrRes.getError())
1008 return EC;
1009 StringRef OffsetStr = FromStrRes.get();
1010 if (OffsetStr.getAsInteger(0, Res.From)) {
1011 reportError("expected hexadecimal number with From address");
1012 Diag << "Found: " << OffsetStr << "\n";
1013 return make_error_code(llvm::errc::io_error);
1016 ErrorOr<StringRef> ToStrRes = parseString('/');
1017 if (std::error_code EC = ToStrRes.getError())
1018 return EC;
1019 OffsetStr = ToStrRes.get();
1020 if (OffsetStr.getAsInteger(0, Res.To)) {
1021 reportError("expected hexadecimal number with To address");
1022 Diag << "Found: " << OffsetStr << "\n";
1023 return make_error_code(llvm::errc::io_error);
1026 ErrorOr<StringRef> MispredStrRes = parseString('/');
1027 if (std::error_code EC = MispredStrRes.getError())
1028 return EC;
1029 StringRef MispredStr = MispredStrRes.get();
1030 if (MispredStr.size() != 1 ||
1031 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1032 reportError("expected single char for mispred bit");
1033 Diag << "Found: " << MispredStr << "\n";
1034 return make_error_code(llvm::errc::io_error);
1036 Res.Mispred = MispredStr[0] == 'M';
1038 static bool MispredWarning = true;
1039 if (MispredStr[0] == '-' && MispredWarning) {
1040 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1041 MispredWarning = false;
1044 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1045 if (std::error_code EC = Rest.getError())
1046 return EC;
1047 if (Rest.get().size() < 5) {
1048 reportError("expected rest of LBR entry");
1049 Diag << "Found: " << Rest.get() << "\n";
1050 return make_error_code(llvm::errc::io_error);
1052 return Res;
1055 bool DataAggregator::checkAndConsumeFS() {
1056 if (ParsingBuf[0] != FieldSeparator)
1057 return false;
1059 ParsingBuf = ParsingBuf.drop_front(1);
1060 Col += 1;
1061 return true;
1064 void DataAggregator::consumeRestOfLine() {
1065 size_t LineEnd = ParsingBuf.find_first_of('\n');
1066 if (LineEnd == StringRef::npos) {
1067 ParsingBuf = StringRef();
1068 Col = 0;
1069 Line += 1;
1070 return;
1072 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1073 Col = 0;
1074 Line += 1;
1077 bool DataAggregator::checkNewLine() {
1078 return ParsingBuf[0] == '\n';
1081 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1082 PerfBranchSample Res;
1084 while (checkAndConsumeFS()) {
1087 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1088 if (std::error_code EC = PIDRes.getError())
1089 return EC;
1090 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1091 if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) {
1092 consumeRestOfLine();
1093 return make_error_code(errc::no_such_process);
1096 while (checkAndConsumeFS()) {
1099 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1100 if (std::error_code EC = PCRes.getError())
1101 return EC;
1102 Res.PC = PCRes.get();
1104 if (checkAndConsumeNewLine())
1105 return Res;
1107 while (!checkAndConsumeNewLine()) {
1108 checkAndConsumeFS();
1110 ErrorOr<LBREntry> LBRRes = parseLBREntry();
1111 if (std::error_code EC = LBRRes.getError())
1112 return EC;
1113 LBREntry LBR = LBRRes.get();
1114 if (ignoreKernelInterrupt(LBR))
1115 continue;
1116 if (!BC->HasFixedLoadAddress)
1117 adjustLBR(LBR, MMapInfoIter->second);
1118 Res.LBR.push_back(LBR);
1121 return Res;
1124 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1125 while (checkAndConsumeFS()) {
1128 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1129 if (std::error_code EC = PIDRes.getError())
1130 return EC;
1132 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1133 if (MMapInfoIter == BinaryMMapInfo.end()) {
1134 consumeRestOfLine();
1135 return PerfBasicSample{StringRef(), 0};
1138 while (checkAndConsumeFS()) {
1141 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1142 if (std::error_code EC = Event.getError())
1143 return EC;
1145 while (checkAndConsumeFS()) {
1148 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1149 if (std::error_code EC = AddrRes.getError())
1150 return EC;
1152 if (!checkAndConsumeNewLine()) {
1153 reportError("expected end of line");
1154 return make_error_code(llvm::errc::io_error);
1157 uint64_t Address = *AddrRes;
1158 if (!BC->HasFixedLoadAddress)
1159 adjustAddress(Address, MMapInfoIter->second);
1161 return PerfBasicSample{Event.get(), Address};
1164 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1165 PerfMemSample Res{0, 0};
1167 while (checkAndConsumeFS()) {
1170 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1171 if (std::error_code EC = PIDRes.getError())
1172 return EC;
1174 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1175 if (MMapInfoIter == BinaryMMapInfo.end()) {
1176 consumeRestOfLine();
1177 return Res;
1180 while (checkAndConsumeFS()) {
1183 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1184 if (std::error_code EC = Event.getError())
1185 return EC;
1186 if (!Event.get().contains("mem-loads")) {
1187 consumeRestOfLine();
1188 return Res;
1191 while (checkAndConsumeFS()) {
1194 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1195 if (std::error_code EC = AddrRes.getError())
1196 return EC;
1198 while (checkAndConsumeFS()) {
1201 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1202 if (std::error_code EC = PCRes.getError()) {
1203 consumeRestOfLine();
1204 return EC;
1207 if (!checkAndConsumeNewLine()) {
1208 reportError("expected end of line");
1209 return make_error_code(llvm::errc::io_error);
1212 uint64_t Address = *AddrRes;
1213 if (!BC->HasFixedLoadAddress)
1214 adjustAddress(Address, MMapInfoIter->second);
1216 return PerfMemSample{PCRes.get(), Address};
1219 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1220 auto parseOffset = [this]() -> ErrorOr<Location> {
1221 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1222 if (std::error_code EC = Res.getError())
1223 return EC;
1224 return Location(Res.get());
1227 size_t Sep = ParsingBuf.find_first_of(" \n");
1228 if (Sep == StringRef::npos)
1229 return parseOffset();
1230 StringRef LookAhead = ParsingBuf.substr(0, Sep);
1231 if (!LookAhead.contains(':'))
1232 return parseOffset();
1234 ErrorOr<StringRef> BuildID = parseString(':');
1235 if (std::error_code EC = BuildID.getError())
1236 return EC;
1237 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1238 if (std::error_code EC = Offset.getError())
1239 return EC;
1240 return Location(true, BuildID.get(), Offset.get());
1243 ErrorOr<DataAggregator::AggregatedLBREntry>
1244 DataAggregator::parseAggregatedLBREntry() {
1245 while (checkAndConsumeFS()) {
1248 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1249 if (std::error_code EC = TypeOrErr.getError())
1250 return EC;
1251 auto Type = AggregatedLBREntry::BRANCH;
1252 if (TypeOrErr.get() == "B") {
1253 Type = AggregatedLBREntry::BRANCH;
1254 } else if (TypeOrErr.get() == "F") {
1255 Type = AggregatedLBREntry::FT;
1256 } else if (TypeOrErr.get() == "f") {
1257 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1258 } else {
1259 reportError("expected B, F or f");
1260 return make_error_code(llvm::errc::io_error);
1263 while (checkAndConsumeFS()) {
1265 ErrorOr<Location> From = parseLocationOrOffset();
1266 if (std::error_code EC = From.getError())
1267 return EC;
1269 while (checkAndConsumeFS()) {
1271 ErrorOr<Location> To = parseLocationOrOffset();
1272 if (std::error_code EC = To.getError())
1273 return EC;
1275 while (checkAndConsumeFS()) {
1277 ErrorOr<int64_t> Frequency =
1278 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1279 if (std::error_code EC = Frequency.getError())
1280 return EC;
1282 uint64_t Mispreds = 0;
1283 if (Type == AggregatedLBREntry::BRANCH) {
1284 while (checkAndConsumeFS()) {
1286 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1287 if (std::error_code EC = MispredsOrErr.getError())
1288 return EC;
1289 Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1292 if (!checkAndConsumeNewLine()) {
1293 reportError("expected end of line");
1294 return make_error_code(llvm::errc::io_error);
1297 return AggregatedLBREntry{From.get(), To.get(),
1298 static_cast<uint64_t>(Frequency.get()), Mispreds,
1299 Type};
1302 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1303 return opts::IgnoreInterruptLBR &&
1304 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1307 std::error_code DataAggregator::printLBRHeatMap() {
1308 outs() << "PERF2BOLT: parse branch events...\n";
1309 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1310 TimerGroupDesc, opts::TimeAggregator);
1312 if (BC->IsLinuxKernel) {
1313 opts::HeatmapMaxAddress = 0xffffffffffffffff;
1314 opts::HeatmapMinAddress = KernelBaseAddr;
1316 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1317 opts::HeatmapMaxAddress, getTextSections(BC));
1318 uint64_t NumTotalSamples = 0;
1320 if (opts::BasicAggregation) {
1321 while (hasData()) {
1322 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1323 if (std::error_code EC = SampleRes.getError()) {
1324 if (EC == errc::no_such_process)
1325 continue;
1326 return EC;
1328 PerfBasicSample &Sample = SampleRes.get();
1329 HM.registerAddress(Sample.PC);
1330 NumTotalSamples++;
1332 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1333 } else {
1334 while (hasData()) {
1335 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1336 if (std::error_code EC = SampleRes.getError()) {
1337 if (EC == errc::no_such_process)
1338 continue;
1339 return EC;
1342 PerfBranchSample &Sample = SampleRes.get();
1344 // LBRs are stored in reverse execution order. NextLBR refers to the next
1345 // executed branch record.
1346 const LBREntry *NextLBR = nullptr;
1347 for (const LBREntry &LBR : Sample.LBR) {
1348 if (NextLBR) {
1349 // Record fall-through trace.
1350 const uint64_t TraceFrom = LBR.To;
1351 const uint64_t TraceTo = NextLBR->From;
1352 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1354 NextLBR = &LBR;
1356 if (!Sample.LBR.empty()) {
1357 HM.registerAddress(Sample.LBR.front().To);
1358 HM.registerAddress(Sample.LBR.back().From);
1360 NumTotalSamples += Sample.LBR.size();
1362 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1363 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1366 if (!NumTotalSamples) {
1367 if (opts::BasicAggregation) {
1368 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1369 "Cannot build heatmap.";
1370 } else {
1371 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1372 "Cannot build heatmap. Use -nl for building heatmap from "
1373 "basic events.\n";
1375 exit(1);
1378 outs() << "HEATMAP: building heat map...\n";
1380 for (const auto &LBR : FallthroughLBRs) {
1381 const Trace &Trace = LBR.first;
1382 const FTInfo &Info = LBR.second;
1383 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1386 if (HM.getNumInvalidRanges())
1387 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1389 if (!HM.size()) {
1390 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1391 exit(1);
1394 HM.print(opts::OutputFilename);
1395 if (opts::OutputFilename == "-")
1396 HM.printCDF(opts::OutputFilename);
1397 else
1398 HM.printCDF(opts::OutputFilename + ".csv");
1399 if (opts::OutputFilename == "-")
1400 HM.printSectionHotness(opts::OutputFilename);
1401 else
1402 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1404 return std::error_code();
1407 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1408 bool NeedsSkylakeFix) {
1409 uint64_t NumTraces{0};
1410 // LBRs are stored in reverse execution order. NextPC refers to the next
1411 // recorded executed PC.
1412 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1413 uint32_t NumEntry = 0;
1414 for (const LBREntry &LBR : Sample.LBR) {
1415 ++NumEntry;
1416 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1417 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1418 // us to likely record an invalid trace and generate a stale function for
1419 // BAT mode (non BAT disassembles the function and is able to ignore this
1420 // trace at aggregation time). Drop first 2 entries (last two, in
1421 // chronological order)
1422 if (NeedsSkylakeFix && NumEntry <= 2)
1423 continue;
1424 if (NextPC) {
1425 // Record fall-through trace.
1426 const uint64_t TraceFrom = LBR.To;
1427 const uint64_t TraceTo = NextPC;
1428 const BinaryFunction *TraceBF =
1429 getBinaryFunctionContainingAddress(TraceFrom);
1430 if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1431 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1432 if (TraceBF->containsAddress(LBR.From))
1433 ++Info.InternCount;
1434 else
1435 ++Info.ExternCount;
1436 } else {
1437 const BinaryFunction *ToFunc =
1438 getBinaryFunctionContainingAddress(TraceTo);
1439 if (TraceBF && ToFunc) {
1440 LLVM_DEBUG({
1441 dbgs() << "Invalid trace starting in " << TraceBF->getPrintName()
1442 << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress())
1443 << formatv(" and ending @ {0:x}\n", TraceTo);
1445 ++NumInvalidTraces;
1446 } else {
1447 LLVM_DEBUG({
1448 dbgs() << "Out of range trace starting in "
1449 << (TraceBF ? TraceBF->getPrintName() : "None")
1450 << formatv(" @ {0:x}",
1451 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1452 << " and ending in "
1453 << (ToFunc ? ToFunc->getPrintName() : "None")
1454 << formatv(" @ {0:x}\n",
1455 TraceTo - (ToFunc ? ToFunc->getAddress() : 0));
1457 ++NumLongRangeTraces;
1460 ++NumTraces;
1462 NextPC = LBR.From;
1464 uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
1465 uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
1466 if (!From && !To)
1467 continue;
1468 TakenBranchInfo &Info = BranchLBRs[Trace(From, To)];
1469 ++Info.TakenCount;
1470 Info.MispredCount += LBR.Mispred;
1472 return NumTraces;
1475 std::error_code DataAggregator::parseBranchEvents() {
1476 outs() << "PERF2BOLT: parse branch events...\n";
1477 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1478 TimerGroupDesc, opts::TimeAggregator);
1480 uint64_t NumTotalSamples = 0;
1481 uint64_t NumEntries = 0;
1482 uint64_t NumSamples = 0;
1483 uint64_t NumSamplesNoLBR = 0;
1484 uint64_t NumTraces = 0;
1485 bool NeedsSkylakeFix = false;
1487 while (hasData() && NumTotalSamples < opts::MaxSamples) {
1488 ++NumTotalSamples;
1490 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1491 if (std::error_code EC = SampleRes.getError()) {
1492 if (EC == errc::no_such_process)
1493 continue;
1494 return EC;
1496 ++NumSamples;
1498 PerfBranchSample &Sample = SampleRes.get();
1499 if (opts::WriteAutoFDOData)
1500 ++BasicSamples[Sample.PC];
1502 if (Sample.LBR.empty()) {
1503 ++NumSamplesNoLBR;
1504 continue;
1507 NumEntries += Sample.LBR.size();
1508 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1509 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1510 NeedsSkylakeFix = true;
1513 NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
1516 for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
1517 for (const uint64_t Addr : {Trace.From, Trace.To})
1518 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1519 BF->setHasProfileAvailable();
1521 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1522 OS << " (";
1523 if (OS.has_colors()) {
1524 if (Percent > T2)
1525 OS.changeColor(raw_ostream::RED);
1526 else if (Percent > T1)
1527 OS.changeColor(raw_ostream::YELLOW);
1528 else
1529 OS.changeColor(raw_ostream::GREEN);
1531 OS << format("%.1f%%", Percent);
1532 if (OS.has_colors())
1533 OS.resetColor();
1534 OS << ")";
1537 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1538 << " LBR entries\n";
1539 if (NumTotalSamples) {
1540 if (NumSamples && NumSamplesNoLBR == NumSamples) {
1541 // Note: we don't know if perf2bolt is being used to parse memory samples
1542 // at this point. In this case, it is OK to parse zero LBRs.
1543 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1544 "LBR. Record profile with perf record -j any or run perf2bolt "
1545 "in no-LBR mode with -nl (the performance improvement in -nl "
1546 "mode may be limited)\n";
1547 } else {
1548 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1549 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1550 outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1551 printColored(outs(), PercentIgnored, 20, 50);
1552 outs() << " were ignored\n";
1553 if (PercentIgnored > 50.0f)
1554 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1555 "were attributed to the input binary\n";
1558 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1559 << NumInvalidTraces;
1560 float Perc = 0.0f;
1561 if (NumTraces > 0) {
1562 Perc = NumInvalidTraces * 100.0f / NumTraces;
1563 printColored(outs(), Perc, 5, 10);
1565 outs() << "\n";
1566 if (Perc > 10.0f)
1567 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1568 "binary is probably not the same binary used during profiling "
1569 "collection. The generated data may be ineffective for improving "
1570 "performance.\n\n";
1572 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1573 << NumLongRangeTraces;
1574 if (NumTraces > 0)
1575 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1576 outs() << "\n";
1578 if (NumColdSamples > 0) {
1579 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1580 outs() << "PERF2BOLT: " << NumColdSamples
1581 << format(" (%.1f%%)", ColdSamples)
1582 << " samples recorded in cold regions of split functions.\n";
1583 if (ColdSamples > 5.0f)
1584 outs()
1585 << "WARNING: The BOLT-processed binary where samples were collected "
1586 "likely used bad data or your service observed a large shift in "
1587 "profile. You may want to audit this.\n";
1590 return std::error_code();
1593 void DataAggregator::processBranchEvents() {
1594 outs() << "PERF2BOLT: processing branch events...\n";
1595 NamedRegionTimer T("processBranch", "Processing branch events",
1596 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1598 for (const auto &AggrLBR : FallthroughLBRs) {
1599 const Trace &Loc = AggrLBR.first;
1600 const FTInfo &Info = AggrLBR.second;
1601 LBREntry First{Loc.From, Loc.From, false};
1602 LBREntry Second{Loc.To, Loc.To, false};
1603 if (Info.InternCount)
1604 doTrace(First, Second, Info.InternCount);
1605 if (Info.ExternCount) {
1606 First.From = 0;
1607 doTrace(First, Second, Info.ExternCount);
1611 for (const auto &AggrLBR : BranchLBRs) {
1612 const Trace &Loc = AggrLBR.first;
1613 const TakenBranchInfo &Info = AggrLBR.second;
1614 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1618 std::error_code DataAggregator::parseBasicEvents() {
1619 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1620 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1621 TimerGroupDesc, opts::TimeAggregator);
1622 while (hasData()) {
1623 ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1624 if (std::error_code EC = Sample.getError())
1625 return EC;
1627 if (!Sample->PC)
1628 continue;
1630 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1631 BF->setHasProfileAvailable();
1633 ++BasicSamples[Sample->PC];
1634 EventNames.insert(Sample->EventName);
1637 return std::error_code();
1640 void DataAggregator::processBasicEvents() {
1641 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1642 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1643 TimerGroupDesc, opts::TimeAggregator);
1644 uint64_t OutOfRangeSamples = 0;
1645 uint64_t NumSamples = 0;
1646 for (auto &Sample : BasicSamples) {
1647 const uint64_t PC = Sample.first;
1648 const uint64_t HitCount = Sample.second;
1649 NumSamples += HitCount;
1650 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1651 if (!Func) {
1652 OutOfRangeSamples += HitCount;
1653 continue;
1656 doSample(*Func, PC, HitCount);
1658 outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1660 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1661 << OutOfRangeSamples;
1662 float Perc = 0.0f;
1663 if (NumSamples > 0) {
1664 outs() << " (";
1665 Perc = OutOfRangeSamples * 100.0f / NumSamples;
1666 if (outs().has_colors()) {
1667 if (Perc > 60.0f)
1668 outs().changeColor(raw_ostream::RED);
1669 else if (Perc > 40.0f)
1670 outs().changeColor(raw_ostream::YELLOW);
1671 else
1672 outs().changeColor(raw_ostream::GREEN);
1674 outs() << format("%.1f%%", Perc);
1675 if (outs().has_colors())
1676 outs().resetColor();
1677 outs() << ")";
1679 outs() << "\n";
1680 if (Perc > 80.0f)
1681 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1682 "binary is probably not the same binary used during profiling "
1683 "collection. The generated data may be ineffective for improving "
1684 "performance.\n\n";
1687 std::error_code DataAggregator::parseMemEvents() {
1688 outs() << "PERF2BOLT: parsing memory events...\n";
1689 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1690 TimerGroupDesc, opts::TimeAggregator);
1691 while (hasData()) {
1692 ErrorOr<PerfMemSample> Sample = parseMemSample();
1693 if (std::error_code EC = Sample.getError())
1694 return EC;
1696 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1697 BF->setHasProfileAvailable();
1699 MemSamples.emplace_back(std::move(Sample.get()));
1702 return std::error_code();
1705 void DataAggregator::processMemEvents() {
1706 NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1707 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1708 for (const PerfMemSample &Sample : MemSamples) {
1709 uint64_t PC = Sample.PC;
1710 uint64_t Addr = Sample.Addr;
1711 StringRef FuncName;
1712 StringRef MemName;
1714 // Try to resolve symbol for PC
1715 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1716 if (!Func) {
1717 LLVM_DEBUG(if (PC != 0) {
1718 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr);
1720 continue;
1723 FuncName = Func->getOneName();
1724 PC -= Func->getAddress();
1726 // Try to resolve symbol for memory load
1727 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1728 MemName = BD->getName();
1729 Addr -= BD->getAddress();
1730 } else if (opts::FilterMemProfile) {
1731 // Filter out heap/stack accesses
1732 continue;
1735 const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1736 const Location AddrLoc(!MemName.empty(), MemName, Addr);
1738 FuncMemData *MemData = &NamesToMemEvents[FuncName];
1739 MemData->Name = FuncName;
1740 setMemData(*Func, MemData);
1741 MemData->update(FuncLoc, AddrLoc);
1742 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1746 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1747 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1748 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1749 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1750 while (hasData()) {
1751 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1752 if (std::error_code EC = AggrEntry.getError())
1753 return EC;
1755 for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset})
1756 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1757 BF->setHasProfileAvailable();
1759 AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1762 return std::error_code();
1765 void DataAggregator::processPreAggregated() {
1766 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1767 NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1768 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1770 uint64_t NumTraces = 0;
1771 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1772 switch (AggrEntry.EntryType) {
1773 case AggregatedLBREntry::BRANCH:
1774 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1775 AggrEntry.Mispreds);
1776 break;
1777 case AggregatedLBREntry::FT:
1778 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1779 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1780 ? AggrEntry.From.Offset
1781 : 0,
1782 AggrEntry.From.Offset, false};
1783 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1784 doTrace(First, Second, AggrEntry.Count);
1785 NumTraces += AggrEntry.Count;
1786 break;
1791 outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1792 << " aggregated LBR entries\n";
1793 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1794 << NumInvalidTraces;
1795 float Perc = 0.0f;
1796 if (NumTraces > 0) {
1797 outs() << " (";
1798 Perc = NumInvalidTraces * 100.0f / NumTraces;
1799 if (outs().has_colors()) {
1800 if (Perc > 10.0f)
1801 outs().changeColor(raw_ostream::RED);
1802 else if (Perc > 5.0f)
1803 outs().changeColor(raw_ostream::YELLOW);
1804 else
1805 outs().changeColor(raw_ostream::GREEN);
1807 outs() << format("%.1f%%", Perc);
1808 if (outs().has_colors())
1809 outs().resetColor();
1810 outs() << ")";
1812 outs() << "\n";
1813 if (Perc > 10.0f)
1814 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1815 "binary is probably not the same binary used during profiling "
1816 "collection. The generated data may be ineffective for improving "
1817 "performance.\n\n";
1819 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1820 << NumLongRangeTraces;
1821 if (NumTraces > 0)
1822 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1823 outs() << "\n";
1826 std::optional<int32_t> DataAggregator::parseCommExecEvent() {
1827 size_t LineEnd = ParsingBuf.find_first_of("\n");
1828 if (LineEnd == StringRef::npos) {
1829 reportError("expected rest of line");
1830 Diag << "Found: " << ParsingBuf << "\n";
1831 return std::nullopt;
1833 StringRef Line = ParsingBuf.substr(0, LineEnd);
1835 size_t Pos = Line.find("PERF_RECORD_COMM exec");
1836 if (Pos == StringRef::npos)
1837 return std::nullopt;
1838 Line = Line.drop_front(Pos);
1840 // Line:
1841 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1842 StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1843 int32_t PID;
1844 if (PIDStr.getAsInteger(10, PID)) {
1845 reportError("expected PID");
1846 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1847 return std::nullopt;
1850 return PID;
1853 namespace {
1854 std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1855 const StringRef SecTimeStr = TimeStr.split('.').first;
1856 const StringRef USecTimeStr = TimeStr.split('.').second;
1857 uint64_t SecTime;
1858 uint64_t USecTime;
1859 if (SecTimeStr.getAsInteger(10, SecTime) ||
1860 USecTimeStr.getAsInteger(10, USecTime))
1861 return std::nullopt;
1862 return SecTime * 1000000ULL + USecTime;
1866 std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1867 while (checkAndConsumeFS()) {
1870 size_t LineEnd = ParsingBuf.find_first_of("\n");
1871 if (LineEnd == StringRef::npos) {
1872 reportError("expected rest of line");
1873 Diag << "Found: " << ParsingBuf << "\n";
1874 return std::nullopt;
1876 StringRef Line = ParsingBuf.substr(0, LineEnd);
1878 size_t Pos = Line.find("PERF_RECORD_FORK");
1879 if (Pos == StringRef::npos) {
1880 consumeRestOfLine();
1881 return std::nullopt;
1884 ForkInfo FI;
1886 const StringRef TimeStr =
1887 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1888 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1889 FI.Time = *TimeRes;
1892 Line = Line.drop_front(Pos);
1894 // Line:
1895 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1896 const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1897 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1898 reportError("expected PID");
1899 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1900 return std::nullopt;
1903 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1904 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1905 reportError("expected PID");
1906 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1907 return std::nullopt;
1910 consumeRestOfLine();
1912 return FI;
1915 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1916 DataAggregator::parseMMapEvent() {
1917 while (checkAndConsumeFS()) {
1920 MMapInfo ParsedInfo;
1922 size_t LineEnd = ParsingBuf.find_first_of("\n");
1923 if (LineEnd == StringRef::npos) {
1924 reportError("expected rest of line");
1925 Diag << "Found: " << ParsingBuf << "\n";
1926 return make_error_code(llvm::errc::io_error);
1928 StringRef Line = ParsingBuf.substr(0, LineEnd);
1930 size_t Pos = Line.find("PERF_RECORD_MMAP2");
1931 if (Pos == StringRef::npos) {
1932 consumeRestOfLine();
1933 return std::make_pair(StringRef(), ParsedInfo);
1936 // Line:
1937 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1939 const StringRef TimeStr =
1940 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1941 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1942 ParsedInfo.Time = *TimeRes;
1944 Line = Line.drop_front(Pos);
1946 // Line:
1947 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1949 StringRef FileName = Line.rsplit(FieldSeparator).second;
1950 if (FileName.starts_with("//") || FileName.starts_with("[")) {
1951 consumeRestOfLine();
1952 return std::make_pair(StringRef(), ParsedInfo);
1954 FileName = sys::path::filename(FileName);
1956 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1957 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1958 reportError("expected PID");
1959 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1960 return make_error_code(llvm::errc::io_error);
1963 const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1964 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1965 reportError("expected base address");
1966 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1967 return make_error_code(llvm::errc::io_error);
1970 const StringRef SizeStr = Line.split('(').second.split(')').first;
1971 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1972 reportError("expected mmaped size");
1973 Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1974 return make_error_code(llvm::errc::io_error);
1977 const StringRef OffsetStr =
1978 Line.split('@').second.ltrim().split(FieldSeparator).first;
1979 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1980 reportError("expected mmaped page-aligned offset");
1981 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1982 return make_error_code(llvm::errc::io_error);
1985 consumeRestOfLine();
1987 return std::make_pair(FileName, ParsedInfo);
1990 std::error_code DataAggregator::parseMMapEvents() {
1991 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1992 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1993 TimerGroupDesc, opts::TimeAggregator);
1995 std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1996 while (hasData()) {
1997 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1998 if (std::error_code EC = FileMMapInfoRes.getError())
1999 return EC;
2001 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
2002 if (FileMMapInfo.second.PID == -1)
2003 continue;
2004 if (FileMMapInfo.first == "(deleted)")
2005 continue;
2007 // Consider only the first mapping of the file for any given PID
2008 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
2009 bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) {
2010 return MI.second.PID == FileMMapInfo.second.PID;
2013 if (PIDExists)
2014 continue;
2016 GlobalMMapInfo.insert(FileMMapInfo);
2019 LLVM_DEBUG({
2020 dbgs() << "FileName -> mmap info:\n"
2021 << " Filename : PID [MMapAddr, Size, Offset]\n";
2022 for (const auto &[Name, MMap] : GlobalMMapInfo)
2023 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID,
2024 MMap.MMapAddress, MMap.Size, MMap.Offset);
2027 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
2028 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2029 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2030 << "\" for profile matching\n";
2031 NameToUse = BuildIDBinaryName;
2034 auto Range = GlobalMMapInfo.equal_range(NameToUse);
2035 for (MMapInfo &MMapInfo : llvm::make_second_range(make_range(Range))) {
2036 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2037 // Check that the binary mapping matches one of the segments.
2038 bool MatchFound = llvm::any_of(
2039 llvm::make_second_range(BC->SegmentMapInfo),
2040 [&](SegmentInfo &SegInfo) {
2041 // The mapping is page-aligned and hence the MMapAddress could be
2042 // different from the segment start address. We cannot know the page
2043 // size of the mapping, but we know it should not exceed the segment
2044 // alignment value. Hence we are performing an approximate check.
2045 return SegInfo.Address >= MMapInfo.MMapAddress &&
2046 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment;
2048 if (!MatchFound) {
2049 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2050 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2051 continue;
2055 // Set base address for shared objects.
2056 if (!BC->HasFixedLoadAddress) {
2057 std::optional<uint64_t> BaseAddress =
2058 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2059 if (!BaseAddress) {
2060 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2061 "binary when memory mapped at 0x"
2062 << Twine::utohexstr(MMapInfo.MMapAddress)
2063 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2064 << ". Ignoring profile data for this mapping\n";
2065 continue;
2066 } else {
2067 MMapInfo.BaseAddress = *BaseAddress;
2071 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2074 if (BinaryMMapInfo.empty()) {
2075 if (errs().has_colors())
2076 errs().changeColor(raw_ostream::RED);
2077 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2078 << BC->getFilename() << "\".";
2079 if (!GlobalMMapInfo.empty()) {
2080 errs() << " Profile for the following binary name(s) is available:\n";
2081 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2082 I = GlobalMMapInfo.upper_bound(I->first))
2083 errs() << " " << I->first << '\n';
2084 errs() << "Please rename the input binary.\n";
2085 } else {
2086 errs() << " Failed to extract any binary name from a profile.\n";
2088 if (errs().has_colors())
2089 errs().resetColor();
2091 exit(1);
2094 return std::error_code();
2097 std::error_code DataAggregator::parseTaskEvents() {
2098 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2099 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2100 TimerGroupDesc, opts::TimeAggregator);
2102 while (hasData()) {
2103 if (std::optional<int32_t> CommInfo = parseCommExecEvent()) {
2104 // Remove forked child that ran execve
2105 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2106 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2107 BinaryMMapInfo.erase(MMapInfoIter);
2108 consumeRestOfLine();
2109 continue;
2112 std::optional<ForkInfo> ForkInfo = parseForkEvent();
2113 if (!ForkInfo)
2114 continue;
2116 if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2117 continue;
2119 if (ForkInfo->Time == 0) {
2120 // Process was forked and mmaped before perf ran. In this case the child
2121 // should have its own mmap entry unless it was execve'd.
2122 continue;
2125 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2126 if (MMapInfoIter == BinaryMMapInfo.end())
2127 continue;
2129 MMapInfo MMapInfo = MMapInfoIter->second;
2130 MMapInfo.PID = ForkInfo->ChildPID;
2131 MMapInfo.Forked = true;
2132 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2135 outs() << "PERF2BOLT: input binary is associated with "
2136 << BinaryMMapInfo.size() << " PID(s)\n";
2138 LLVM_DEBUG({
2139 for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo))
2140 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID,
2141 (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress,
2142 MMI.Size);
2145 return std::error_code();
2148 std::optional<std::pair<StringRef, StringRef>>
2149 DataAggregator::parseNameBuildIDPair() {
2150 while (checkAndConsumeFS()) {
2153 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2154 if (std::error_code EC = BuildIDStr.getError())
2155 return std::nullopt;
2157 // If one of the strings is missing, don't issue a parsing error, but still
2158 // do not return a value.
2159 consumeAllRemainingFS();
2160 if (checkNewLine())
2161 return std::nullopt;
2163 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2164 if (std::error_code EC = NameStr.getError())
2165 return std::nullopt;
2167 consumeRestOfLine();
2168 return std::make_pair(NameStr.get(), BuildIDStr.get());
2171 bool DataAggregator::hasAllBuildIDs() {
2172 const StringRef SavedParsingBuf = ParsingBuf;
2174 if (!hasData())
2175 return false;
2177 bool HasInvalidEntries = false;
2178 while (hasData()) {
2179 if (!parseNameBuildIDPair()) {
2180 HasInvalidEntries = true;
2181 break;
2185 ParsingBuf = SavedParsingBuf;
2187 return !HasInvalidEntries;
2190 std::optional<StringRef>
2191 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2192 const StringRef SavedParsingBuf = ParsingBuf;
2194 StringRef FileName;
2195 while (hasData()) {
2196 std::optional<std::pair<StringRef, StringRef>> IDPair =
2197 parseNameBuildIDPair();
2198 if (!IDPair) {
2199 consumeRestOfLine();
2200 continue;
2203 if (IDPair->second.starts_with(FileBuildID)) {
2204 FileName = sys::path::filename(IDPair->first);
2205 break;
2209 ParsingBuf = SavedParsingBuf;
2211 if (!FileName.empty())
2212 return FileName;
2214 return std::nullopt;
2217 std::error_code
2218 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2219 std::error_code EC;
2220 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2221 if (EC)
2222 return EC;
2224 bool WriteMemLocs = false;
2226 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2227 if (WriteMemLocs)
2228 OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2229 else
2230 OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2231 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2232 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2235 uint64_t BranchValues = 0;
2236 uint64_t MemValues = 0;
2238 if (BAT)
2239 OutFile << "boltedcollection\n";
2240 if (opts::BasicAggregation) {
2241 OutFile << "no_lbr";
2242 for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
2243 OutFile << " " << Entry.getKey();
2244 OutFile << "\n";
2246 for (const auto &KV : NamesToSamples) {
2247 const FuncSampleData &FSD = KV.second;
2248 for (const SampleInfo &SI : FSD.Data) {
2249 writeLocation(SI.Loc);
2250 OutFile << SI.Hits << "\n";
2251 ++BranchValues;
2254 } else {
2255 for (const auto &KV : NamesToBranches) {
2256 const FuncBranchData &FBD = KV.second;
2257 for (const BranchInfo &BI : FBD.Data) {
2258 writeLocation(BI.From);
2259 writeLocation(BI.To);
2260 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2261 ++BranchValues;
2263 for (const BranchInfo &BI : FBD.EntryData) {
2264 // Do not output if source is a known symbol, since this was already
2265 // accounted for in the source function
2266 if (BI.From.IsSymbol)
2267 continue;
2268 writeLocation(BI.From);
2269 writeLocation(BI.To);
2270 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2271 ++BranchValues;
2275 WriteMemLocs = true;
2276 for (const auto &KV : NamesToMemEvents) {
2277 const FuncMemData &FMD = KV.second;
2278 for (const MemInfo &MemEvent : FMD.Data) {
2279 writeLocation(MemEvent.Offset);
2280 writeLocation(MemEvent.Addr);
2281 OutFile << MemEvent.Count << "\n";
2282 ++MemValues;
2287 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2288 << " memory objects to " << OutputFilename << "\n";
2290 return std::error_code();
2293 std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
2294 StringRef OutputFilename) const {
2295 std::error_code EC;
2296 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2297 if (EC)
2298 return EC;
2300 yaml::bolt::BinaryProfile BP;
2302 const MCPseudoProbeDecoder *PseudoProbeDecoder =
2303 opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
2305 // Fill out the header info.
2306 BP.Header.Version = 1;
2307 BP.Header.FileName = std::string(BC.getFilename());
2308 std::optional<StringRef> BuildID = BC.getFileBuildID();
2309 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
2310 BP.Header.Origin = std::string(getReaderName());
2311 // Only the input binary layout order is supported.
2312 BP.Header.IsDFSOrder = false;
2313 // FIXME: Need to match hash function used to produce BAT hashes.
2314 BP.Header.HashFunction = HashFunction::Default;
2316 ListSeparator LS(",");
2317 raw_string_ostream EventNamesOS(BP.Header.EventNames);
2318 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
2319 EventNamesOS << LS << EventEntry.first().str();
2321 BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
2322 : BinaryFunction::PF_LBR;
2324 if (!opts::BasicAggregation) {
2325 // Convert profile for functions not covered by BAT
2326 for (auto &BFI : BC.getBinaryFunctions()) {
2327 BinaryFunction &Function = BFI.second;
2328 if (!Function.hasProfile())
2329 continue;
2330 if (BAT->isBATFunction(Function.getAddress()))
2331 continue;
2332 BP.Functions.emplace_back(
2333 YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
2336 for (const auto &KV : NamesToBranches) {
2337 const StringRef FuncName = KV.first;
2338 const FuncBranchData &Branches = KV.second;
2339 yaml::bolt::BinaryFunctionProfile YamlBF;
2340 BinaryData *BD = BC.getBinaryDataByName(FuncName);
2341 assert(BD);
2342 uint64_t FuncAddress = BD->getAddress();
2343 if (!BAT->isBATFunction(FuncAddress))
2344 continue;
2345 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
2346 assert(BF);
2347 YamlBF.Name = getLocationName(*BF, BAT);
2348 YamlBF.Id = BF->getFunctionNumber();
2349 YamlBF.Hash = BAT->getBFHash(FuncAddress);
2350 YamlBF.ExecCount = BF->getKnownExecutionCount();
2351 YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
2352 const BoltAddressTranslation::BBHashMapTy &BlockMap =
2353 BAT->getBBHashMap(FuncAddress);
2354 YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);
2356 for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) {
2357 const auto &Block = Entry.second;
2358 YamlBB.Hash = Block.Hash;
2359 YamlBB.Index = Block.Index;
2362 // Lookup containing basic block offset and index
2363 auto getBlock = [&BlockMap](uint32_t Offset) {
2364 auto BlockIt = BlockMap.upper_bound(Offset);
2365 if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) {
2366 errs() << "BOLT-ERROR: invalid BAT section\n";
2367 exit(1);
2369 --BlockIt;
2370 return std::pair(BlockIt->first, BlockIt->second.Index);
2373 for (const BranchInfo &BI : Branches.Data) {
2374 using namespace yaml::bolt;
2375 const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset);
2376 BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
2377 if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) {
2378 // Internal branch
2379 const unsigned SuccIndex = getBlock(BI.To.Offset).second;
2380 auto &SI = YamlBB.Successors.emplace_back(SuccessorInfo{SuccIndex});
2381 SI.Count = BI.Branches;
2382 SI.Mispreds = BI.Mispreds;
2383 } else {
2384 // Call
2385 const uint32_t Offset = BI.From.Offset - BlockOffset;
2386 auto &CSI = YamlBB.CallSites.emplace_back(CallSiteInfo{Offset});
2387 CSI.Count = BI.Branches;
2388 CSI.Mispreds = BI.Mispreds;
2389 if (const BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
2390 YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT,
2391 BI.To.Offset);
2394 // Set entry counts, similar to DataReader::readProfile.
2395 for (const BranchInfo &BI : Branches.EntryData) {
2396 if (!BlockMap.isInputBlock(BI.To.Offset)) {
2397 if (opts::Verbosity >= 1)
2398 errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName
2399 << " at 0x" << Twine::utohexstr(BI.To.Offset) << '\n';
2400 continue;
2402 const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
2403 YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
2405 if (PseudoProbeDecoder) {
2406 if ((YamlBF.GUID = BF->getGUID())) {
2407 const MCPseudoProbeFuncDesc *FuncDesc =
2408 PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
2409 YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
2411 // Fetch probes belonging to all fragments
2412 const AddressProbesMap &ProbeMap =
2413 PseudoProbeDecoder->getAddress2ProbesMap();
2414 BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
2415 Fragments.insert(BF);
2416 for (const BinaryFunction *F : Fragments) {
2417 const uint64_t FuncAddr = F->getAddress();
2418 const auto &FragmentProbes =
2419 llvm::make_range(ProbeMap.lower_bound(FuncAddr),
2420 ProbeMap.lower_bound(FuncAddr + F->getSize()));
2421 for (const auto &[OutputAddress, Probes] : FragmentProbes) {
2422 const uint32_t InputOffset = BAT->translate(
2423 FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
2424 const unsigned BlockIndex = getBlock(InputOffset).second;
2425 for (const MCDecodedPseudoProbe &Probe : Probes)
2426 YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
2427 yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
2428 Probe.getType()});
2432 // Drop blocks without a hash, won't be useful for stale matching.
2433 llvm::erase_if(YamlBF.Blocks,
2434 [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2435 return YamlBB.Hash == (yaml::Hex64)0;
2437 BP.Functions.emplace_back(YamlBF);
2441 // Write the profile.
2442 yaml::Output Out(OutFile, nullptr, 0);
2443 Out << BP;
2444 return std::error_code();
2447 void DataAggregator::dump() const { DataReader::dump(); }
2449 void DataAggregator::dump(const LBREntry &LBR) const {
2450 Diag << "From: " << Twine::utohexstr(LBR.From)
2451 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2452 << "\n";
2455 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2456 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2457 for (const LBREntry &LBR : Sample.LBR)
2458 dump(LBR);
2461 void DataAggregator::dump(const PerfMemSample &Sample) const {
2462 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";