[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / bolt / lib / Profile / DataAggregator.cpp
blobbe1e348b338f0f025675bd0b844a68f3c412a4b9
1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/ScopeExit.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Process.h"
28 #include "llvm/Support/Program.h"
29 #include "llvm/Support/Regex.h"
30 #include "llvm/Support/Timer.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <map>
33 #include <optional>
34 #include <unordered_map>
35 #include <utility>
37 #define DEBUG_TYPE "aggregator"
39 using namespace llvm;
40 using namespace bolt;
42 namespace opts {
44 static cl::opt<bool>
45 BasicAggregation("nl",
46 cl::desc("aggregate basic samples (without LBR info)"),
47 cl::cat(AggregatorCategory));
49 static cl::opt<std::string>
50 ITraceAggregation("itrace",
51 cl::desc("Generate LBR info with perf itrace argument"),
52 cl::cat(AggregatorCategory));
54 static cl::opt<bool>
55 FilterMemProfile("filter-mem-profile",
56 cl::desc("if processing a memory profile, filter out stack or heap accesses "
57 "that won't be useful for BOLT to reduce profile file size"),
58 cl::init(true),
59 cl::cat(AggregatorCategory));
61 static cl::opt<unsigned long long>
62 FilterPID("pid",
63 cl::desc("only use samples from process with specified PID"),
64 cl::init(0),
65 cl::Optional,
66 cl::cat(AggregatorCategory));
68 static cl::opt<bool>
69 IgnoreBuildID("ignore-build-id",
70 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
71 cl::init(false),
72 cl::cat(AggregatorCategory));
74 static cl::opt<bool> IgnoreInterruptLBR(
75 "ignore-interrupt-lbr",
76 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
77 cl::init(true), cl::cat(AggregatorCategory));
79 static cl::opt<unsigned long long>
80 MaxSamples("max-samples",
81 cl::init(-1ULL),
82 cl::desc("maximum number of samples to read from LBR profile"),
83 cl::Optional,
84 cl::Hidden,
85 cl::cat(AggregatorCategory));
87 extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
89 cl::opt<bool> ReadPreAggregated(
90 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
91 cl::cat(AggregatorCategory));
93 static cl::opt<bool>
94 TimeAggregator("time-aggr",
95 cl::desc("time BOLT aggregator"),
96 cl::init(false),
97 cl::ZeroOrMore,
98 cl::cat(AggregatorCategory));
100 static cl::opt<bool>
101 UseEventPC("use-event-pc",
102 cl::desc("use event PC in combination with LBR sampling"),
103 cl::cat(AggregatorCategory));
105 static cl::opt<bool> WriteAutoFDOData(
106 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
107 cl::cat(AggregatorCategory));
109 } // namespace opts
111 namespace {
113 const char TimerGroupName[] = "aggregator";
114 const char TimerGroupDesc[] = "Aggregator";
116 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
117 std::vector<SectionNameAndRange> sections;
118 for (BinarySection &Section : BC->sections()) {
119 if (!Section.isText())
120 continue;
121 if (Section.getSize() == 0)
122 continue;
123 sections.push_back(
124 {Section.getName(), Section.getAddress(), Section.getEndAddress()});
126 llvm::sort(sections,
127 [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
128 return A.BeginAddress < B.BeginAddress;
130 return sections;
134 constexpr uint64_t DataAggregator::KernelBaseAddr;
136 DataAggregator::~DataAggregator() { deleteTempFiles(); }
138 namespace {
139 void deleteTempFile(const std::string &FileName) {
140 if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
141 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
142 << " with error " << Errc.message() << "\n";
146 void DataAggregator::deleteTempFiles() {
147 for (std::string &FileName : TempFiles)
148 deleteTempFile(FileName);
149 TempFiles.clear();
152 void DataAggregator::findPerfExecutable() {
153 std::optional<std::string> PerfExecutable =
154 sys::Process::FindInEnvPath("PATH", "perf");
155 if (!PerfExecutable) {
156 outs() << "PERF2BOLT: No perf executable found!\n";
157 exit(1);
159 PerfPath = *PerfExecutable;
162 void DataAggregator::start() {
163 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
165 // Don't launch perf for pre-aggregated files
166 if (opts::ReadPreAggregated)
167 return;
169 findPerfExecutable();
171 if (opts::BasicAggregation) {
172 launchPerfProcess("events without LBR",
173 MainEventsPPI,
174 "script -F pid,event,ip",
175 /*Wait = */false);
176 } else if (!opts::ITraceAggregation.empty()) {
177 std::string ItracePerfScriptArgs = llvm::formatv(
178 "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation);
179 launchPerfProcess("branch events with itrace", MainEventsPPI,
180 ItracePerfScriptArgs.c_str(),
181 /*Wait = */ false);
182 } else {
183 launchPerfProcess("branch events",
184 MainEventsPPI,
185 "script -F pid,ip,brstack",
186 /*Wait = */false);
189 // Note: we launch script for mem events regardless of the option, as the
190 // command fails fairly fast if mem events were not collected.
191 launchPerfProcess("mem events",
192 MemEventsPPI,
193 "script -F pid,event,addr,ip",
194 /*Wait = */false);
196 launchPerfProcess("process events", MMapEventsPPI,
197 "script --show-mmap-events --no-itrace",
198 /*Wait = */ false);
200 launchPerfProcess("task events", TaskEventsPPI,
201 "script --show-task-events --no-itrace",
202 /*Wait = */ false);
205 void DataAggregator::abort() {
206 if (opts::ReadPreAggregated)
207 return;
209 std::string Error;
211 // Kill subprocesses in case they are not finished
212 sys::Wait(TaskEventsPPI.PI, 1, &Error);
213 sys::Wait(MMapEventsPPI.PI, 1, &Error);
214 sys::Wait(MainEventsPPI.PI, 1, &Error);
215 sys::Wait(MemEventsPPI.PI, 1, &Error);
217 deleteTempFiles();
219 exit(1);
222 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
223 const char *ArgsString, bool Wait) {
224 SmallVector<StringRef, 4> Argv;
226 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
227 Argv.push_back(PerfPath.data());
229 StringRef(ArgsString).split(Argv, ' ');
230 Argv.push_back("-f");
231 Argv.push_back("-i");
232 Argv.push_back(Filename.c_str());
234 if (std::error_code Errc =
235 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
236 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
237 << " with error " << Errc.message() << "\n";
238 exit(1);
240 TempFiles.push_back(PPI.StdoutPath.data());
242 if (std::error_code Errc =
243 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
244 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
245 << " with error " << Errc.message() << "\n";
246 exit(1);
248 TempFiles.push_back(PPI.StderrPath.data());
250 std::optional<StringRef> Redirects[] = {
251 std::nullopt, // Stdin
252 StringRef(PPI.StdoutPath.data()), // Stdout
253 StringRef(PPI.StderrPath.data())}; // Stderr
255 LLVM_DEBUG({
256 dbgs() << "Launching perf: ";
257 for (StringRef Arg : Argv)
258 dbgs() << Arg << " ";
259 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
260 << "\n";
263 if (Wait)
264 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
265 /*envp*/ std::nullopt, Redirects);
266 else
267 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ std::nullopt,
268 Redirects);
271 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
272 PerfProcessInfo BuildIDProcessInfo;
273 launchPerfProcess("buildid list",
274 BuildIDProcessInfo,
275 "buildid-list",
276 /*Wait = */true);
278 if (BuildIDProcessInfo.PI.ReturnCode != 0) {
279 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
280 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
281 StringRef ErrBuf = (*MB)->getBuffer();
283 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
284 << '\n';
285 errs() << ErrBuf;
286 return;
289 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
290 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
291 if (std::error_code EC = MB.getError()) {
292 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
293 << EC.message() << "\n";
294 return;
297 FileBuf = std::move(*MB);
298 ParsingBuf = FileBuf->getBuffer();
300 std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
301 if (!FileName) {
302 if (hasAllBuildIDs()) {
303 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
304 "This indicates the input binary supplied for data aggregation "
305 "is not the same recorded by perf when collecting profiling "
306 "data, or there were no samples recorded for the binary. "
307 "Use -ignore-build-id option to override.\n";
308 if (!opts::IgnoreBuildID)
309 abort();
310 } else {
311 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
312 "data was recorded without it\n";
313 return;
315 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
316 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
317 BuildIDBinaryName = std::string(*FileName);
318 } else {
319 outs() << "PERF2BOLT: matched build-id and file name\n";
323 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
324 if (opts::ReadPreAggregated)
325 return true;
327 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
328 if (!FD) {
329 consumeError(FD.takeError());
330 return false;
333 char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
335 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
336 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
337 *FD, MutableArrayRef(Buf, sizeof(Buf)), 0);
338 if (!BytesRead) {
339 consumeError(BytesRead.takeError());
340 return false;
343 if (*BytesRead != 7)
344 return false;
346 if (strncmp(Buf, "PERFILE", 7) == 0)
347 return true;
348 return false;
351 void DataAggregator::parsePreAggregated() {
352 std::string Error;
354 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
355 MemoryBuffer::getFileOrSTDIN(Filename);
356 if (std::error_code EC = MB.getError()) {
357 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
358 << EC.message() << "\n";
359 exit(1);
362 FileBuf = std::move(*MB);
363 ParsingBuf = FileBuf->getBuffer();
364 Col = 0;
365 Line = 1;
366 if (parsePreAggregatedLBRSamples()) {
367 errs() << "PERF2BOLT: failed to parse samples\n";
368 exit(1);
372 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
373 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
374 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
375 TimerGroupDesc, opts::TimeAggregator);
377 std::error_code EC;
378 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
379 if (EC)
380 return EC;
382 // Format:
383 // number of unique traces
384 // from_1-to_1:count_1
385 // from_2-to_2:count_2
386 // ......
387 // from_n-to_n:count_n
388 // number of unique sample addresses
389 // addr_1:count_1
390 // addr_2:count_2
391 // ......
392 // addr_n:count_n
393 // number of unique LBR entries
394 // src_1->dst_1:count_1
395 // src_2->dst_2:count_2
396 // ......
397 // src_n->dst_n:count_n
399 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
401 // AutoFDO addresses are relative to the first allocated loadable program
402 // segment
403 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
404 if (Address < FirstAllocAddress)
405 return 0;
406 return Address - FirstAllocAddress;
409 OutFile << FallthroughLBRs.size() << "\n";
410 for (const auto &[Trace, Info] : FallthroughLBRs) {
411 OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
412 filterAddress(Trace.To),
413 Info.InternCount + Info.ExternCount);
416 OutFile << BasicSamples.size() << "\n";
417 for (const auto [PC, HitCount] : BasicSamples)
418 OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);
420 OutFile << BranchLBRs.size() << "\n";
421 for (const auto &[Trace, Info] : BranchLBRs) {
422 OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
423 filterAddress(Trace.To), Info.TakenCount);
426 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
427 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
428 << " unique branches to " << OutputFilename << "\n";
430 return std::error_code();
433 void DataAggregator::filterBinaryMMapInfo() {
434 if (opts::FilterPID) {
435 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
436 if (MMapInfoIter != BinaryMMapInfo.end()) {
437 MMapInfo MMap = MMapInfoIter->second;
438 BinaryMMapInfo.clear();
439 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
440 } else {
441 if (errs().has_colors())
442 errs().changeColor(raw_ostream::RED);
443 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
444 << opts::FilterPID << "\""
445 << " for binary \"" << BC->getFilename() << "\".";
446 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
447 errs() << " Profile for the following process is available:\n";
448 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
449 outs() << " " << MMI.second.PID
450 << (MMI.second.Forked ? " (forked)\n" : "\n");
452 if (errs().has_colors())
453 errs().resetColor();
455 exit(1);
460 int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
461 PerfProcessErrorCallbackTy Callback) {
462 std::string Error;
463 outs() << "PERF2BOLT: waiting for perf " << Name
464 << " collection to finish...\n";
465 sys::ProcessInfo PI = sys::Wait(Process.PI, std::nullopt, &Error);
467 if (!Error.empty()) {
468 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
469 deleteTempFiles();
470 exit(1);
473 if (PI.ReturnCode != 0) {
474 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
475 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
476 StringRef ErrBuf = (*ErrorMB)->getBuffer();
478 deleteTempFiles();
479 Callback(PI.ReturnCode, ErrBuf);
480 return PI.ReturnCode;
483 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
484 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
485 if (std::error_code EC = MB.getError()) {
486 errs() << "Cannot open " << Process.StdoutPath.data() << ": "
487 << EC.message() << "\n";
488 deleteTempFiles();
489 exit(1);
492 FileBuf = std::move(*MB);
493 ParsingBuf = FileBuf->getBuffer();
494 Col = 0;
495 Line = 1;
496 return PI.ReturnCode;
499 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
500 this->BC = &BC;
502 if (opts::ReadPreAggregated) {
503 parsePreAggregated();
504 return Error::success();
507 if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
508 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
509 processFileBuildID(*FileBuildID);
510 } else {
511 errs() << "BOLT-WARNING: build-id will not be checked because we could "
512 "not read one from input binary\n";
515 auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
516 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
517 exit(1);
520 auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
521 Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
522 "Cannot print 'addr' field.");
523 if (!NoData.match(ErrBuf))
524 ErrorCallback(ReturnCode, ErrBuf);
527 if (opts::LinuxKernelMode) {
528 // Current MMap parsing logic does not work with linux kernel.
529 // MMap entries for linux kernel uses PERF_RECORD_MMAP
530 // format instead of typical PERF_RECORD_MMAP2 format.
531 // Since linux kernel address mapping is absolute (same as
532 // in the ELF file), we avoid parsing MMap in linux kernel mode.
533 // While generating optimized linux kernel binary, we may need
534 // to parse MMap entries.
536 // In linux kernel mode, we analyze and optimize
537 // all linux kernel binary instructions, irrespective
538 // of whether they are due to system calls or due to
539 // interrupts. Therefore, we cannot ignore interrupt
540 // in Linux kernel mode.
541 opts::IgnoreInterruptLBR = false;
542 } else {
543 prepareToParse("mmap events", MMapEventsPPI, ErrorCallback);
544 if (parseMMapEvents())
545 errs() << "PERF2BOLT: failed to parse mmap events\n";
548 prepareToParse("task events", TaskEventsPPI, ErrorCallback);
549 if (parseTaskEvents())
550 errs() << "PERF2BOLT: failed to parse task events\n";
552 filterBinaryMMapInfo();
553 prepareToParse("events", MainEventsPPI, ErrorCallback);
555 if (opts::HeatmapMode) {
556 if (std::error_code EC = printLBRHeatMap()) {
557 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
558 exit(1);
560 exit(0);
563 if ((!opts::BasicAggregation && parseBranchEvents()) ||
564 (opts::BasicAggregation && parseBasicEvents()))
565 errs() << "PERF2BOLT: failed to parse samples\n";
567 // We can finish early if the goal is just to generate data for autofdo
568 if (opts::WriteAutoFDOData) {
569 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
570 errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
572 deleteTempFiles();
573 exit(0);
576 // Special handling for memory events
577 if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
578 return Error::success();
580 if (const std::error_code EC = parseMemEvents())
581 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
582 << '\n';
584 deleteTempFiles();
586 return Error::success();
589 Error DataAggregator::readProfile(BinaryContext &BC) {
590 processProfile(BC);
592 for (auto &BFI : BC.getBinaryFunctions()) {
593 BinaryFunction &Function = BFI.second;
594 convertBranchData(Function);
597 if (opts::AggregateOnly &&
598 opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata) {
599 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
600 report_error("cannot create output data file", EC);
603 return Error::success();
606 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
607 return Function.hasProfileAvailable();
610 void DataAggregator::processProfile(BinaryContext &BC) {
611 if (opts::ReadPreAggregated)
612 processPreAggregated();
613 else if (opts::BasicAggregation)
614 processBasicEvents();
615 else
616 processBranchEvents();
618 processMemEvents();
620 // Mark all functions with registered events as having a valid profile.
621 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
622 : BinaryFunction::PF_LBR;
623 for (auto &BFI : BC.getBinaryFunctions()) {
624 BinaryFunction &BF = BFI.second;
625 if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
626 BF.markProfiled(Flags);
629 for (auto &FuncBranches : NamesToBranches)
630 llvm::stable_sort(FuncBranches.second.Data);
632 for (auto &MemEvents : NamesToMemEvents)
633 llvm::stable_sort(MemEvents.second.Data);
635 // Release intermediate storage.
636 clear(BranchLBRs);
637 clear(FallthroughLBRs);
638 clear(AggregatedLBRs);
639 clear(BasicSamples);
640 clear(MemSamples);
643 BinaryFunction *
644 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
645 if (!BC->containsAddress(Address))
646 return nullptr;
648 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
649 /*UseMaxSize=*/true);
652 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
653 uint64_t Count) {
654 if (!BAT)
655 return Func.getOneName();
657 const BinaryFunction *OrigFunc = &Func;
658 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
659 NumColdSamples += Count;
660 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
661 if (HotFunc)
662 OrigFunc = HotFunc;
664 // If it is a local function, prefer the name containing the file name where
665 // the local function was declared
666 for (StringRef AlternativeName : OrigFunc->getNames()) {
667 size_t FileNameIdx = AlternativeName.find('/');
668 // Confirm the alternative name has the pattern Symbol/FileName/1 before
669 // using it
670 if (FileNameIdx == StringRef::npos ||
671 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
672 continue;
673 return AlternativeName;
675 return OrigFunc->getOneName();
678 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
679 uint64_t Count) {
680 auto I = NamesToSamples.find(Func.getOneName());
681 if (I == NamesToSamples.end()) {
682 bool Success;
683 StringRef LocName = getLocationName(Func, Count);
684 std::tie(I, Success) = NamesToSamples.insert(
685 std::make_pair(Func.getOneName(),
686 FuncSampleData(LocName, FuncSampleData::ContainerTy())));
689 Address -= Func.getAddress();
690 if (BAT)
691 Address = BAT->translate(Func.getAddress(), Address, /*IsBranchSrc=*/false);
693 I->second.bumpCount(Address, Count);
694 return true;
697 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
698 uint64_t To, uint64_t Count,
699 uint64_t Mispreds) {
700 FuncBranchData *AggrData = getBranchData(Func);
701 if (!AggrData) {
702 AggrData = &NamesToBranches[Func.getOneName()];
703 AggrData->Name = getLocationName(Func, Count);
704 setBranchData(Func, AggrData);
707 From -= Func.getAddress();
708 To -= Func.getAddress();
709 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
710 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
711 if (BAT) {
712 From = BAT->translate(Func.getAddress(), From, /*IsBranchSrc=*/true);
713 To = BAT->translate(Func.getAddress(), To, /*IsBranchSrc=*/false);
714 LLVM_DEBUG(
715 dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
716 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
719 AggrData->bumpBranchCount(From, To, Count, Mispreds);
720 return true;
723 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
724 BinaryFunction *ToFunc, uint64_t From,
725 uint64_t To, uint64_t Count,
726 uint64_t Mispreds) {
727 FuncBranchData *FromAggrData = nullptr;
728 FuncBranchData *ToAggrData = nullptr;
729 StringRef SrcFunc;
730 StringRef DstFunc;
731 if (FromFunc) {
732 SrcFunc = getLocationName(*FromFunc, Count);
733 FromAggrData = getBranchData(*FromFunc);
734 if (!FromAggrData) {
735 FromAggrData = &NamesToBranches[FromFunc->getOneName()];
736 FromAggrData->Name = SrcFunc;
737 setBranchData(*FromFunc, FromAggrData);
739 From -= FromFunc->getAddress();
740 if (BAT)
741 From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
743 recordExit(*FromFunc, From, Mispreds, Count);
745 if (ToFunc) {
746 DstFunc = getLocationName(*ToFunc, 0);
747 ToAggrData = getBranchData(*ToFunc);
748 if (!ToAggrData) {
749 ToAggrData = &NamesToBranches[ToFunc->getOneName()];
750 ToAggrData->Name = DstFunc;
751 setBranchData(*ToFunc, ToAggrData);
753 To -= ToFunc->getAddress();
754 if (BAT)
755 To = BAT->translate(ToFunc->getAddress(), To, /*IsBranchSrc=*/false);
757 recordEntry(*ToFunc, To, Mispreds, Count);
760 if (FromAggrData)
761 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
762 Count, Mispreds);
763 if (ToAggrData)
764 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
765 Count, Mispreds);
766 return true;
769 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
770 uint64_t Mispreds) {
771 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
772 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
773 if (!FromFunc && !ToFunc)
774 return false;
776 // Treat recursive control transfers as inter-branches.
777 if (FromFunc == ToFunc && (To != ToFunc->getAddress())) {
778 recordBranch(*FromFunc, From - FromFunc->getAddress(),
779 To - FromFunc->getAddress(), Count, Mispreds);
780 return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
783 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
786 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
787 uint64_t Count) {
788 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
789 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
790 if (!FromFunc || !ToFunc) {
791 LLVM_DEBUG({
792 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
793 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
794 << " and ending in " << ToFunc->getPrintName()
795 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
797 NumLongRangeTraces += Count;
798 return false;
800 if (FromFunc != ToFunc) {
801 NumInvalidTraces += Count;
802 LLVM_DEBUG({
803 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
804 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
805 << " and ending in " << ToFunc->getPrintName()
806 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
808 return false;
811 std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
812 BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
813 Second.From)
814 : getFallthroughsInTrace(*FromFunc, First, Second, Count);
815 if (!FTs) {
816 LLVM_DEBUG(
817 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
818 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
819 << " and ending in " << ToFunc->getPrintName() << " @ "
820 << ToFunc->getPrintName() << " @ "
821 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
822 NumInvalidTraces += Count;
823 return false;
826 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
827 << FromFunc->getPrintName() << ":"
828 << Twine::utohexstr(First.To) << " to "
829 << Twine::utohexstr(Second.From) << ".\n");
830 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
831 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
832 Pair.second + FromFunc->getAddress(), Count, false);
834 return true;
837 bool DataAggregator::recordTrace(
838 BinaryFunction &BF, const LBREntry &FirstLBR, const LBREntry &SecondLBR,
839 uint64_t Count,
840 SmallVector<std::pair<uint64_t, uint64_t>, 16> &Branches) const {
841 BinaryContext &BC = BF.getBinaryContext();
843 if (!BF.isSimple())
844 return false;
846 assert(BF.hasCFG() && "can only record traces in CFG state");
848 // Offsets of the trace within this function.
849 const uint64_t From = FirstLBR.To - BF.getAddress();
850 const uint64_t To = SecondLBR.From - BF.getAddress();
852 if (From > To)
853 return false;
855 const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
856 const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
858 if (!FromBB || !ToBB)
859 return false;
861 // Adjust FromBB if the first LBR is a return from the last instruction in
862 // the previous block (that instruction should be a call).
863 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
864 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
865 const BinaryBasicBlock *PrevBB =
866 BF.getLayout().getBlock(FromBB->getIndex() - 1);
867 if (PrevBB->getSuccessor(FromBB->getLabel())) {
868 const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
869 if (Instr && BC.MIB->isCall(*Instr))
870 FromBB = PrevBB;
871 else
872 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
873 << '\n');
874 } else {
875 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
879 // Fill out information for fall-through edges. The From and To could be
880 // within the same basic block, e.g. when two call instructions are in the
881 // same block. In this case we skip the processing.
882 if (FromBB == ToBB)
883 return true;
885 // Process blocks in the original layout order.
886 BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex());
887 assert(BB == FromBB && "index mismatch");
888 while (BB != ToBB) {
889 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1);
890 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
892 // Check for bad LBRs.
893 if (!BB->getSuccessor(NextBB->getLabel())) {
894 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
895 << " " << FirstLBR << '\n'
896 << " " << SecondLBR << '\n');
897 return false;
900 const MCInst *Instr = BB->getLastNonPseudoInstr();
901 uint64_t Offset = 0;
902 if (Instr)
903 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
904 else
905 Offset = BB->getOffset();
907 Branches.emplace_back(Offset, NextBB->getOffset());
909 BB = NextBB;
912 // Record fall-through jumps
913 for (const auto &[FromOffset, ToOffset] : Branches) {
914 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(FromOffset);
915 BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(ToOffset);
916 assert(FromBB && ToBB);
917 BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
918 BI.Count += Count;
921 return true;
924 std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
925 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
926 const LBREntry &FirstLBR,
927 const LBREntry &SecondLBR,
928 uint64_t Count) const {
929 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
931 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, Res))
932 return std::nullopt;
934 return Res;
937 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
938 uint64_t Count) const {
939 if (To > BF.getSize())
940 return false;
942 if (!BF.hasProfile())
943 BF.ExecutionCount = 0;
945 BinaryBasicBlock *EntryBB = nullptr;
946 if (To == 0) {
947 BF.ExecutionCount += Count;
948 if (!BF.empty())
949 EntryBB = &BF.front();
950 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
951 if (BB->isEntryPoint())
952 EntryBB = BB;
955 if (EntryBB)
956 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
958 return true;
961 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
962 uint64_t Count) const {
963 if (!BF.isSimple() || From > BF.getSize())
964 return false;
966 if (!BF.hasProfile())
967 BF.ExecutionCount = 0;
969 return true;
972 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
973 LBREntry Res;
974 ErrorOr<StringRef> FromStrRes = parseString('/');
975 if (std::error_code EC = FromStrRes.getError())
976 return EC;
977 StringRef OffsetStr = FromStrRes.get();
978 if (OffsetStr.getAsInteger(0, Res.From)) {
979 reportError("expected hexadecimal number with From address");
980 Diag << "Found: " << OffsetStr << "\n";
981 return make_error_code(llvm::errc::io_error);
984 ErrorOr<StringRef> ToStrRes = parseString('/');
985 if (std::error_code EC = ToStrRes.getError())
986 return EC;
987 OffsetStr = ToStrRes.get();
988 if (OffsetStr.getAsInteger(0, Res.To)) {
989 reportError("expected hexadecimal number with To address");
990 Diag << "Found: " << OffsetStr << "\n";
991 return make_error_code(llvm::errc::io_error);
994 ErrorOr<StringRef> MispredStrRes = parseString('/');
995 if (std::error_code EC = MispredStrRes.getError())
996 return EC;
997 StringRef MispredStr = MispredStrRes.get();
998 if (MispredStr.size() != 1 ||
999 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1000 reportError("expected single char for mispred bit");
1001 Diag << "Found: " << MispredStr << "\n";
1002 return make_error_code(llvm::errc::io_error);
1004 Res.Mispred = MispredStr[0] == 'M';
1006 static bool MispredWarning = true;
1007 if (MispredStr[0] == '-' && MispredWarning) {
1008 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1009 MispredWarning = false;
1012 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1013 if (std::error_code EC = Rest.getError())
1014 return EC;
1015 if (Rest.get().size() < 5) {
1016 reportError("expected rest of LBR entry");
1017 Diag << "Found: " << Rest.get() << "\n";
1018 return make_error_code(llvm::errc::io_error);
1020 return Res;
1023 bool DataAggregator::checkAndConsumeFS() {
1024 if (ParsingBuf[0] != FieldSeparator)
1025 return false;
1027 ParsingBuf = ParsingBuf.drop_front(1);
1028 Col += 1;
1029 return true;
1032 void DataAggregator::consumeRestOfLine() {
1033 size_t LineEnd = ParsingBuf.find_first_of('\n');
1034 if (LineEnd == StringRef::npos) {
1035 ParsingBuf = StringRef();
1036 Col = 0;
1037 Line += 1;
1038 return;
1040 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1041 Col = 0;
1042 Line += 1;
1045 bool DataAggregator::checkNewLine() {
1046 return ParsingBuf[0] == '\n';
1049 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1050 PerfBranchSample Res;
1052 while (checkAndConsumeFS()) {
1055 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1056 if (std::error_code EC = PIDRes.getError())
1057 return EC;
1058 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1059 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1060 consumeRestOfLine();
1061 return make_error_code(errc::no_such_process);
1064 while (checkAndConsumeFS()) {
1067 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1068 if (std::error_code EC = PCRes.getError())
1069 return EC;
1070 Res.PC = PCRes.get();
1072 if (checkAndConsumeNewLine())
1073 return Res;
1075 while (!checkAndConsumeNewLine()) {
1076 checkAndConsumeFS();
1078 ErrorOr<LBREntry> LBRRes = parseLBREntry();
1079 if (std::error_code EC = LBRRes.getError())
1080 return EC;
1081 LBREntry LBR = LBRRes.get();
1082 if (ignoreKernelInterrupt(LBR))
1083 continue;
1084 if (!BC->HasFixedLoadAddress)
1085 adjustLBR(LBR, MMapInfoIter->second);
1086 Res.LBR.push_back(LBR);
1089 return Res;
1092 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1093 while (checkAndConsumeFS()) {
1096 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1097 if (std::error_code EC = PIDRes.getError())
1098 return EC;
1100 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1101 if (MMapInfoIter == BinaryMMapInfo.end()) {
1102 consumeRestOfLine();
1103 return PerfBasicSample{StringRef(), 0};
1106 while (checkAndConsumeFS()) {
1109 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1110 if (std::error_code EC = Event.getError())
1111 return EC;
1113 while (checkAndConsumeFS()) {
1116 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1117 if (std::error_code EC = AddrRes.getError())
1118 return EC;
1120 if (!checkAndConsumeNewLine()) {
1121 reportError("expected end of line");
1122 return make_error_code(llvm::errc::io_error);
1125 uint64_t Address = *AddrRes;
1126 if (!BC->HasFixedLoadAddress)
1127 adjustAddress(Address, MMapInfoIter->second);
1129 return PerfBasicSample{Event.get(), Address};
1132 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1133 PerfMemSample Res{0, 0};
1135 while (checkAndConsumeFS()) {
1138 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1139 if (std::error_code EC = PIDRes.getError())
1140 return EC;
1142 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1143 if (MMapInfoIter == BinaryMMapInfo.end()) {
1144 consumeRestOfLine();
1145 return Res;
1148 while (checkAndConsumeFS()) {
1151 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1152 if (std::error_code EC = Event.getError())
1153 return EC;
1154 if (!Event.get().contains("mem-loads")) {
1155 consumeRestOfLine();
1156 return Res;
1159 while (checkAndConsumeFS()) {
1162 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1163 if (std::error_code EC = AddrRes.getError())
1164 return EC;
1166 while (checkAndConsumeFS()) {
1169 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1170 if (std::error_code EC = PCRes.getError()) {
1171 consumeRestOfLine();
1172 return EC;
1175 if (!checkAndConsumeNewLine()) {
1176 reportError("expected end of line");
1177 return make_error_code(llvm::errc::io_error);
1180 uint64_t Address = *AddrRes;
1181 if (!BC->HasFixedLoadAddress)
1182 adjustAddress(Address, MMapInfoIter->second);
1184 return PerfMemSample{PCRes.get(), Address};
1187 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1188 auto parseOffset = [this]() -> ErrorOr<Location> {
1189 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1190 if (std::error_code EC = Res.getError())
1191 return EC;
1192 return Location(Res.get());
1195 size_t Sep = ParsingBuf.find_first_of(" \n");
1196 if (Sep == StringRef::npos)
1197 return parseOffset();
1198 StringRef LookAhead = ParsingBuf.substr(0, Sep);
1199 if (LookAhead.find_first_of(":") == StringRef::npos)
1200 return parseOffset();
1202 ErrorOr<StringRef> BuildID = parseString(':');
1203 if (std::error_code EC = BuildID.getError())
1204 return EC;
1205 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1206 if (std::error_code EC = Offset.getError())
1207 return EC;
1208 return Location(true, BuildID.get(), Offset.get());
1211 ErrorOr<DataAggregator::AggregatedLBREntry>
1212 DataAggregator::parseAggregatedLBREntry() {
1213 while (checkAndConsumeFS()) {
1216 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1217 if (std::error_code EC = TypeOrErr.getError())
1218 return EC;
1219 auto Type = AggregatedLBREntry::BRANCH;
1220 if (TypeOrErr.get() == "B") {
1221 Type = AggregatedLBREntry::BRANCH;
1222 } else if (TypeOrErr.get() == "F") {
1223 Type = AggregatedLBREntry::FT;
1224 } else if (TypeOrErr.get() == "f") {
1225 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1226 } else {
1227 reportError("expected B, F or f");
1228 return make_error_code(llvm::errc::io_error);
1231 while (checkAndConsumeFS()) {
1233 ErrorOr<Location> From = parseLocationOrOffset();
1234 if (std::error_code EC = From.getError())
1235 return EC;
1237 while (checkAndConsumeFS()) {
1239 ErrorOr<Location> To = parseLocationOrOffset();
1240 if (std::error_code EC = To.getError())
1241 return EC;
1243 while (checkAndConsumeFS()) {
1245 ErrorOr<int64_t> Frequency =
1246 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1247 if (std::error_code EC = Frequency.getError())
1248 return EC;
1250 uint64_t Mispreds = 0;
1251 if (Type == AggregatedLBREntry::BRANCH) {
1252 while (checkAndConsumeFS()) {
1254 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1255 if (std::error_code EC = MispredsOrErr.getError())
1256 return EC;
1257 Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1260 if (!checkAndConsumeNewLine()) {
1261 reportError("expected end of line");
1262 return make_error_code(llvm::errc::io_error);
1265 return AggregatedLBREntry{From.get(), To.get(),
1266 static_cast<uint64_t>(Frequency.get()), Mispreds,
1267 Type};
1270 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1271 return opts::IgnoreInterruptLBR &&
1272 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1275 std::error_code DataAggregator::printLBRHeatMap() {
1276 outs() << "PERF2BOLT: parse branch events...\n";
1277 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1278 TimerGroupDesc, opts::TimeAggregator);
1280 if (opts::LinuxKernelMode) {
1281 opts::HeatmapMaxAddress = 0xffffffffffffffff;
1282 opts::HeatmapMinAddress = KernelBaseAddr;
1284 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1285 opts::HeatmapMaxAddress, getTextSections(BC));
1286 uint64_t NumTotalSamples = 0;
1288 if (opts::BasicAggregation) {
1289 while (hasData()) {
1290 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1291 if (std::error_code EC = SampleRes.getError()) {
1292 if (EC == errc::no_such_process)
1293 continue;
1294 return EC;
1296 PerfBasicSample &Sample = SampleRes.get();
1297 HM.registerAddress(Sample.PC);
1298 NumTotalSamples++;
1300 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1301 } else {
1302 while (hasData()) {
1303 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1304 if (std::error_code EC = SampleRes.getError()) {
1305 if (EC == errc::no_such_process)
1306 continue;
1307 return EC;
1310 PerfBranchSample &Sample = SampleRes.get();
1312 // LBRs are stored in reverse execution order. NextLBR refers to the next
1313 // executed branch record.
1314 const LBREntry *NextLBR = nullptr;
1315 for (const LBREntry &LBR : Sample.LBR) {
1316 if (NextLBR) {
1317 // Record fall-through trace.
1318 const uint64_t TraceFrom = LBR.To;
1319 const uint64_t TraceTo = NextLBR->From;
1320 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1322 NextLBR = &LBR;
1324 if (!Sample.LBR.empty()) {
1325 HM.registerAddress(Sample.LBR.front().To);
1326 HM.registerAddress(Sample.LBR.back().From);
1328 NumTotalSamples += Sample.LBR.size();
1330 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1331 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1334 if (!NumTotalSamples) {
1335 if (opts::BasicAggregation) {
1336 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1337 "Cannot build heatmap.";
1338 } else {
1339 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1340 "Cannot build heatmap. Use -nl for building heatmap from "
1341 "basic events.\n";
1343 exit(1);
1346 outs() << "HEATMAP: building heat map...\n";
1348 for (const auto &LBR : FallthroughLBRs) {
1349 const Trace &Trace = LBR.first;
1350 const FTInfo &Info = LBR.second;
1351 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1354 if (HM.getNumInvalidRanges())
1355 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1357 if (!HM.size()) {
1358 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1359 exit(1);
1362 HM.print(opts::OutputFilename);
1363 if (opts::OutputFilename == "-")
1364 HM.printCDF(opts::OutputFilename);
1365 else
1366 HM.printCDF(opts::OutputFilename + ".csv");
1367 if (opts::OutputFilename == "-")
1368 HM.printSectionHotness(opts::OutputFilename);
1369 else
1370 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1372 return std::error_code();
1375 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1376 bool NeedsSkylakeFix) {
1377 uint64_t NumTraces{0};
1378 // LBRs are stored in reverse execution order. NextPC refers to the next
1379 // recorded executed PC.
1380 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1381 uint32_t NumEntry = 0;
1382 for (const LBREntry &LBR : Sample.LBR) {
1383 ++NumEntry;
1384 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1385 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1386 // us to likely record an invalid trace and generate a stale function for
1387 // BAT mode (non BAT disassembles the function and is able to ignore this
1388 // trace at aggregation time). Drop first 2 entries (last two, in
1389 // chronological order)
1390 if (NeedsSkylakeFix && NumEntry <= 2)
1391 continue;
1392 if (NextPC) {
1393 // Record fall-through trace.
1394 const uint64_t TraceFrom = LBR.To;
1395 const uint64_t TraceTo = NextPC;
1396 const BinaryFunction *TraceBF =
1397 getBinaryFunctionContainingAddress(TraceFrom);
1398 if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1399 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1400 if (TraceBF->containsAddress(LBR.From))
1401 ++Info.InternCount;
1402 else
1403 ++Info.ExternCount;
1404 } else {
1405 const BinaryFunction *ToFunc =
1406 getBinaryFunctionContainingAddress(TraceTo);
1407 if (TraceBF && ToFunc) {
1408 LLVM_DEBUG({
1409 dbgs() << "Invalid trace starting in " << TraceBF->getPrintName()
1410 << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress())
1411 << formatv(" and ending @ {0:x}\n", TraceTo);
1413 ++NumInvalidTraces;
1414 } else {
1415 LLVM_DEBUG({
1416 dbgs() << "Out of range trace starting in "
1417 << (TraceBF ? TraceBF->getPrintName() : "None")
1418 << formatv(" @ {0:x}",
1419 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1420 << " and ending in "
1421 << (ToFunc ? ToFunc->getPrintName() : "None")
1422 << formatv(" @ {0:x}\n",
1423 TraceTo - (ToFunc ? ToFunc->getAddress() : 0));
1425 ++NumLongRangeTraces;
1428 ++NumTraces;
1430 NextPC = LBR.From;
1432 uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
1433 uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
1434 if (!From && !To)
1435 continue;
1436 BranchInfo &Info = BranchLBRs[Trace(From, To)];
1437 ++Info.TakenCount;
1438 Info.MispredCount += LBR.Mispred;
1440 return NumTraces;
1443 std::error_code DataAggregator::parseBranchEvents() {
1444 outs() << "PERF2BOLT: parse branch events...\n";
1445 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1446 TimerGroupDesc, opts::TimeAggregator);
1448 uint64_t NumTotalSamples = 0;
1449 uint64_t NumEntries = 0;
1450 uint64_t NumSamples = 0;
1451 uint64_t NumSamplesNoLBR = 0;
1452 uint64_t NumTraces = 0;
1453 bool NeedsSkylakeFix = false;
1455 while (hasData() && NumTotalSamples < opts::MaxSamples) {
1456 ++NumTotalSamples;
1458 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1459 if (std::error_code EC = SampleRes.getError()) {
1460 if (EC == errc::no_such_process)
1461 continue;
1462 return EC;
1464 ++NumSamples;
1466 PerfBranchSample &Sample = SampleRes.get();
1467 if (opts::WriteAutoFDOData)
1468 ++BasicSamples[Sample.PC];
1470 if (Sample.LBR.empty()) {
1471 ++NumSamplesNoLBR;
1472 continue;
1475 NumEntries += Sample.LBR.size();
1476 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1477 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1478 NeedsSkylakeFix = true;
1481 NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
1484 for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
1485 for (const uint64_t Addr : {Trace.From, Trace.To})
1486 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1487 BF->setHasProfileAvailable();
1489 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1490 OS << " (";
1491 if (OS.has_colors()) {
1492 if (Percent > T2)
1493 OS.changeColor(raw_ostream::RED);
1494 else if (Percent > T1)
1495 OS.changeColor(raw_ostream::YELLOW);
1496 else
1497 OS.changeColor(raw_ostream::GREEN);
1499 OS << format("%.1f%%", Percent);
1500 if (OS.has_colors())
1501 OS.resetColor();
1502 OS << ")";
1505 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1506 << " LBR entries\n";
1507 if (NumTotalSamples) {
1508 if (NumSamples && NumSamplesNoLBR == NumSamples) {
1509 // Note: we don't know if perf2bolt is being used to parse memory samples
1510 // at this point. In this case, it is OK to parse zero LBRs.
1511 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1512 "LBR. Record profile with perf record -j any or run perf2bolt "
1513 "in no-LBR mode with -nl (the performance improvement in -nl "
1514 "mode may be limited)\n";
1515 } else {
1516 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1517 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1518 outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1519 printColored(outs(), PercentIgnored, 20, 50);
1520 outs() << " were ignored\n";
1521 if (PercentIgnored > 50.0f)
1522 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1523 "were attributed to the input binary\n";
1526 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1527 << NumInvalidTraces;
1528 float Perc = 0.0f;
1529 if (NumTraces > 0) {
1530 Perc = NumInvalidTraces * 100.0f / NumTraces;
1531 printColored(outs(), Perc, 5, 10);
1533 outs() << "\n";
1534 if (Perc > 10.0f)
1535 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1536 "binary is probably not the same binary used during profiling "
1537 "collection. The generated data may be ineffective for improving "
1538 "performance.\n\n";
1540 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1541 << NumLongRangeTraces;
1542 if (NumTraces > 0)
1543 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1544 outs() << "\n";
1546 if (NumColdSamples > 0) {
1547 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1548 outs() << "PERF2BOLT: " << NumColdSamples
1549 << format(" (%.1f%%)", ColdSamples)
1550 << " samples recorded in cold regions of split functions.\n";
1551 if (ColdSamples > 5.0f)
1552 outs()
1553 << "WARNING: The BOLT-processed binary where samples were collected "
1554 "likely used bad data or your service observed a large shift in "
1555 "profile. You may want to audit this.\n";
1558 return std::error_code();
1561 void DataAggregator::processBranchEvents() {
1562 outs() << "PERF2BOLT: processing branch events...\n";
1563 NamedRegionTimer T("processBranch", "Processing branch events",
1564 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1566 for (const auto &AggrLBR : FallthroughLBRs) {
1567 const Trace &Loc = AggrLBR.first;
1568 const FTInfo &Info = AggrLBR.second;
1569 LBREntry First{Loc.From, Loc.From, false};
1570 LBREntry Second{Loc.To, Loc.To, false};
1571 if (Info.InternCount)
1572 doTrace(First, Second, Info.InternCount);
1573 if (Info.ExternCount) {
1574 First.From = 0;
1575 doTrace(First, Second, Info.ExternCount);
1579 for (const auto &AggrLBR : BranchLBRs) {
1580 const Trace &Loc = AggrLBR.first;
1581 const BranchInfo &Info = AggrLBR.second;
1582 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1586 std::error_code DataAggregator::parseBasicEvents() {
1587 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1588 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1589 TimerGroupDesc, opts::TimeAggregator);
1590 while (hasData()) {
1591 ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1592 if (std::error_code EC = Sample.getError())
1593 return EC;
1595 if (!Sample->PC)
1596 continue;
1598 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1599 BF->setHasProfileAvailable();
1601 ++BasicSamples[Sample->PC];
1602 EventNames.insert(Sample->EventName);
1605 return std::error_code();
1608 void DataAggregator::processBasicEvents() {
1609 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1610 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1611 TimerGroupDesc, opts::TimeAggregator);
1612 uint64_t OutOfRangeSamples = 0;
1613 uint64_t NumSamples = 0;
1614 for (auto &Sample : BasicSamples) {
1615 const uint64_t PC = Sample.first;
1616 const uint64_t HitCount = Sample.second;
1617 NumSamples += HitCount;
1618 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1619 if (!Func) {
1620 OutOfRangeSamples += HitCount;
1621 continue;
1624 doSample(*Func, PC, HitCount);
1626 outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1628 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1629 << OutOfRangeSamples;
1630 float Perc = 0.0f;
1631 if (NumSamples > 0) {
1632 outs() << " (";
1633 Perc = OutOfRangeSamples * 100.0f / NumSamples;
1634 if (outs().has_colors()) {
1635 if (Perc > 60.0f)
1636 outs().changeColor(raw_ostream::RED);
1637 else if (Perc > 40.0f)
1638 outs().changeColor(raw_ostream::YELLOW);
1639 else
1640 outs().changeColor(raw_ostream::GREEN);
1642 outs() << format("%.1f%%", Perc);
1643 if (outs().has_colors())
1644 outs().resetColor();
1645 outs() << ")";
1647 outs() << "\n";
1648 if (Perc > 80.0f)
1649 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1650 "binary is probably not the same binary used during profiling "
1651 "collection. The generated data may be ineffective for improving "
1652 "performance.\n\n";
1655 std::error_code DataAggregator::parseMemEvents() {
1656 outs() << "PERF2BOLT: parsing memory events...\n";
1657 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1658 TimerGroupDesc, opts::TimeAggregator);
1659 while (hasData()) {
1660 ErrorOr<PerfMemSample> Sample = parseMemSample();
1661 if (std::error_code EC = Sample.getError())
1662 return EC;
1664 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1665 BF->setHasProfileAvailable();
1667 MemSamples.emplace_back(std::move(Sample.get()));
1670 return std::error_code();
1673 void DataAggregator::processMemEvents() {
1674 NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1675 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1676 for (const PerfMemSample &Sample : MemSamples) {
1677 uint64_t PC = Sample.PC;
1678 uint64_t Addr = Sample.Addr;
1679 StringRef FuncName;
1680 StringRef MemName;
1682 // Try to resolve symbol for PC
1683 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1684 if (!Func) {
1685 LLVM_DEBUG(if (PC != 0) {
1686 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr);
1688 continue;
1691 FuncName = Func->getOneName();
1692 PC -= Func->getAddress();
1694 // Try to resolve symbol for memory load
1695 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1696 MemName = BD->getName();
1697 Addr -= BD->getAddress();
1698 } else if (opts::FilterMemProfile) {
1699 // Filter out heap/stack accesses
1700 continue;
1703 const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1704 const Location AddrLoc(!MemName.empty(), MemName, Addr);
1706 FuncMemData *MemData = &NamesToMemEvents[FuncName];
1707 MemData->Name = FuncName;
1708 setMemData(*Func, MemData);
1709 MemData->update(FuncLoc, AddrLoc);
1710 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1714 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1715 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1716 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1717 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1718 while (hasData()) {
1719 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1720 if (std::error_code EC = AggrEntry.getError())
1721 return EC;
1723 for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset})
1724 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1725 BF->setHasProfileAvailable();
1727 AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1730 return std::error_code();
1733 void DataAggregator::processPreAggregated() {
1734 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1735 NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1736 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1738 uint64_t NumTraces = 0;
1739 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1740 switch (AggrEntry.EntryType) {
1741 case AggregatedLBREntry::BRANCH:
1742 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1743 AggrEntry.Mispreds);
1744 break;
1745 case AggregatedLBREntry::FT:
1746 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1747 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1748 ? AggrEntry.From.Offset
1749 : 0,
1750 AggrEntry.From.Offset, false};
1751 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1752 doTrace(First, Second, AggrEntry.Count);
1753 NumTraces += AggrEntry.Count;
1754 break;
1759 outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1760 << " aggregated LBR entries\n";
1761 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1762 << NumInvalidTraces;
1763 float Perc = 0.0f;
1764 if (NumTraces > 0) {
1765 outs() << " (";
1766 Perc = NumInvalidTraces * 100.0f / NumTraces;
1767 if (outs().has_colors()) {
1768 if (Perc > 10.0f)
1769 outs().changeColor(raw_ostream::RED);
1770 else if (Perc > 5.0f)
1771 outs().changeColor(raw_ostream::YELLOW);
1772 else
1773 outs().changeColor(raw_ostream::GREEN);
1775 outs() << format("%.1f%%", Perc);
1776 if (outs().has_colors())
1777 outs().resetColor();
1778 outs() << ")";
1780 outs() << "\n";
1781 if (Perc > 10.0f)
1782 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1783 "binary is probably not the same binary used during profiling "
1784 "collection. The generated data may be ineffective for improving "
1785 "performance.\n\n";
1787 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1788 << NumLongRangeTraces;
1789 if (NumTraces > 0)
1790 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1791 outs() << "\n";
1794 std::optional<int32_t> DataAggregator::parseCommExecEvent() {
1795 size_t LineEnd = ParsingBuf.find_first_of("\n");
1796 if (LineEnd == StringRef::npos) {
1797 reportError("expected rest of line");
1798 Diag << "Found: " << ParsingBuf << "\n";
1799 return std::nullopt;
1801 StringRef Line = ParsingBuf.substr(0, LineEnd);
1803 size_t Pos = Line.find("PERF_RECORD_COMM exec");
1804 if (Pos == StringRef::npos)
1805 return std::nullopt;
1806 Line = Line.drop_front(Pos);
1808 // Line:
1809 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1810 StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1811 int32_t PID;
1812 if (PIDStr.getAsInteger(10, PID)) {
1813 reportError("expected PID");
1814 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1815 return std::nullopt;
1818 return PID;
1821 namespace {
1822 std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1823 const StringRef SecTimeStr = TimeStr.split('.').first;
1824 const StringRef USecTimeStr = TimeStr.split('.').second;
1825 uint64_t SecTime;
1826 uint64_t USecTime;
1827 if (SecTimeStr.getAsInteger(10, SecTime) ||
1828 USecTimeStr.getAsInteger(10, USecTime))
1829 return std::nullopt;
1830 return SecTime * 1000000ULL + USecTime;
1834 std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1835 while (checkAndConsumeFS()) {
1838 size_t LineEnd = ParsingBuf.find_first_of("\n");
1839 if (LineEnd == StringRef::npos) {
1840 reportError("expected rest of line");
1841 Diag << "Found: " << ParsingBuf << "\n";
1842 return std::nullopt;
1844 StringRef Line = ParsingBuf.substr(0, LineEnd);
1846 size_t Pos = Line.find("PERF_RECORD_FORK");
1847 if (Pos == StringRef::npos) {
1848 consumeRestOfLine();
1849 return std::nullopt;
1852 ForkInfo FI;
1854 const StringRef TimeStr =
1855 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1856 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1857 FI.Time = *TimeRes;
1860 Line = Line.drop_front(Pos);
1862 // Line:
1863 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1864 const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1865 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1866 reportError("expected PID");
1867 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1868 return std::nullopt;
1871 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1872 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1873 reportError("expected PID");
1874 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1875 return std::nullopt;
1878 consumeRestOfLine();
1880 return FI;
1883 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1884 DataAggregator::parseMMapEvent() {
1885 while (checkAndConsumeFS()) {
1888 MMapInfo ParsedInfo;
1890 size_t LineEnd = ParsingBuf.find_first_of("\n");
1891 if (LineEnd == StringRef::npos) {
1892 reportError("expected rest of line");
1893 Diag << "Found: " << ParsingBuf << "\n";
1894 return make_error_code(llvm::errc::io_error);
1896 StringRef Line = ParsingBuf.substr(0, LineEnd);
1898 size_t Pos = Line.find("PERF_RECORD_MMAP2");
1899 if (Pos == StringRef::npos) {
1900 consumeRestOfLine();
1901 return std::make_pair(StringRef(), ParsedInfo);
1904 // Line:
1905 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1907 const StringRef TimeStr =
1908 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1909 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1910 ParsedInfo.Time = *TimeRes;
1912 Line = Line.drop_front(Pos);
1914 // Line:
1915 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1917 StringRef FileName = Line.rsplit(FieldSeparator).second;
1918 if (FileName.starts_with("//") || FileName.starts_with("[")) {
1919 consumeRestOfLine();
1920 return std::make_pair(StringRef(), ParsedInfo);
1922 FileName = sys::path::filename(FileName);
1924 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1925 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1926 reportError("expected PID");
1927 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1928 return make_error_code(llvm::errc::io_error);
1931 const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1932 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1933 reportError("expected base address");
1934 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1935 return make_error_code(llvm::errc::io_error);
1938 const StringRef SizeStr = Line.split('(').second.split(')').first;
1939 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1940 reportError("expected mmaped size");
1941 Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1942 return make_error_code(llvm::errc::io_error);
1945 const StringRef OffsetStr =
1946 Line.split('@').second.ltrim().split(FieldSeparator).first;
1947 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1948 reportError("expected mmaped page-aligned offset");
1949 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1950 return make_error_code(llvm::errc::io_error);
1953 consumeRestOfLine();
1955 return std::make_pair(FileName, ParsedInfo);
1958 std::error_code DataAggregator::parseMMapEvents() {
1959 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1960 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1961 TimerGroupDesc, opts::TimeAggregator);
1963 std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1964 while (hasData()) {
1965 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1966 if (std::error_code EC = FileMMapInfoRes.getError())
1967 return EC;
1969 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1970 if (FileMMapInfo.second.PID == -1)
1971 continue;
1972 if (FileMMapInfo.first.equals("(deleted)"))
1973 continue;
1975 // Consider only the first mapping of the file for any given PID
1976 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1977 bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) {
1978 return MI.second.PID == FileMMapInfo.second.PID;
1981 if (PIDExists)
1982 continue;
1984 GlobalMMapInfo.insert(FileMMapInfo);
1987 LLVM_DEBUG({
1988 dbgs() << "FileName -> mmap info:\n"
1989 << " Filename : PID [MMapAddr, Size, Offset]\n";
1990 for (const auto &[Name, MMap] : GlobalMMapInfo)
1991 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID,
1992 MMap.MMapAddress, MMap.Size, MMap.Offset);
1995 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
1996 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
1997 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1998 << "\" for profile matching\n";
1999 NameToUse = BuildIDBinaryName;
2002 auto Range = GlobalMMapInfo.equal_range(NameToUse);
2003 for (MMapInfo &MMapInfo : llvm::make_second_range(make_range(Range))) {
2004 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2005 // Check that the binary mapping matches one of the segments.
2006 bool MatchFound = llvm::any_of(
2007 llvm::make_second_range(BC->SegmentMapInfo),
2008 [&](SegmentInfo &SegInfo) {
2009 // The mapping is page-aligned and hence the MMapAddress could be
2010 // different from the segment start address. We cannot know the page
2011 // size of the mapping, but we know it should not exceed the segment
2012 // alignment value. Hence we are performing an approximate check.
2013 return SegInfo.Address >= MMapInfo.MMapAddress &&
2014 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment;
2016 if (!MatchFound) {
2017 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2018 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2019 continue;
2023 // Set base address for shared objects.
2024 if (!BC->HasFixedLoadAddress) {
2025 std::optional<uint64_t> BaseAddress =
2026 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2027 if (!BaseAddress) {
2028 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2029 "binary when memory mapped at 0x"
2030 << Twine::utohexstr(MMapInfo.MMapAddress)
2031 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2032 << ". Ignoring profile data for this mapping\n";
2033 continue;
2034 } else {
2035 MMapInfo.BaseAddress = *BaseAddress;
2039 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2042 if (BinaryMMapInfo.empty()) {
2043 if (errs().has_colors())
2044 errs().changeColor(raw_ostream::RED);
2045 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2046 << BC->getFilename() << "\".";
2047 if (!GlobalMMapInfo.empty()) {
2048 errs() << " Profile for the following binary name(s) is available:\n";
2049 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2050 I = GlobalMMapInfo.upper_bound(I->first))
2051 errs() << " " << I->first << '\n';
2052 errs() << "Please rename the input binary.\n";
2053 } else {
2054 errs() << " Failed to extract any binary name from a profile.\n";
2056 if (errs().has_colors())
2057 errs().resetColor();
2059 exit(1);
2062 return std::error_code();
2065 std::error_code DataAggregator::parseTaskEvents() {
2066 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2067 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2068 TimerGroupDesc, opts::TimeAggregator);
2070 while (hasData()) {
2071 if (std::optional<int32_t> CommInfo = parseCommExecEvent()) {
2072 // Remove forked child that ran execve
2073 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2074 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2075 BinaryMMapInfo.erase(MMapInfoIter);
2076 consumeRestOfLine();
2077 continue;
2080 std::optional<ForkInfo> ForkInfo = parseForkEvent();
2081 if (!ForkInfo)
2082 continue;
2084 if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2085 continue;
2087 if (ForkInfo->Time == 0) {
2088 // Process was forked and mmaped before perf ran. In this case the child
2089 // should have its own mmap entry unless it was execve'd.
2090 continue;
2093 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2094 if (MMapInfoIter == BinaryMMapInfo.end())
2095 continue;
2097 MMapInfo MMapInfo = MMapInfoIter->second;
2098 MMapInfo.PID = ForkInfo->ChildPID;
2099 MMapInfo.Forked = true;
2100 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2103 outs() << "PERF2BOLT: input binary is associated with "
2104 << BinaryMMapInfo.size() << " PID(s)\n";
2106 LLVM_DEBUG({
2107 for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo))
2108 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID,
2109 (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress,
2110 MMI.Size);
2113 return std::error_code();
2116 std::optional<std::pair<StringRef, StringRef>>
2117 DataAggregator::parseNameBuildIDPair() {
2118 while (checkAndConsumeFS()) {
2121 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2122 if (std::error_code EC = BuildIDStr.getError())
2123 return std::nullopt;
2125 // If one of the strings is missing, don't issue a parsing error, but still
2126 // do not return a value.
2127 consumeAllRemainingFS();
2128 if (checkNewLine())
2129 return std::nullopt;
2131 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2132 if (std::error_code EC = NameStr.getError())
2133 return std::nullopt;
2135 consumeRestOfLine();
2136 return std::make_pair(NameStr.get(), BuildIDStr.get());
2139 bool DataAggregator::hasAllBuildIDs() {
2140 const StringRef SavedParsingBuf = ParsingBuf;
2142 if (!hasData())
2143 return false;
2145 bool HasInvalidEntries = false;
2146 while (hasData()) {
2147 if (!parseNameBuildIDPair()) {
2148 HasInvalidEntries = true;
2149 break;
2153 ParsingBuf = SavedParsingBuf;
2155 return !HasInvalidEntries;
2158 std::optional<StringRef>
2159 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2160 const StringRef SavedParsingBuf = ParsingBuf;
2162 StringRef FileName;
2163 while (hasData()) {
2164 std::optional<std::pair<StringRef, StringRef>> IDPair =
2165 parseNameBuildIDPair();
2166 if (!IDPair) {
2167 consumeRestOfLine();
2168 continue;
2171 if (IDPair->second.starts_with(FileBuildID)) {
2172 FileName = sys::path::filename(IDPair->first);
2173 break;
2177 ParsingBuf = SavedParsingBuf;
2179 if (!FileName.empty())
2180 return FileName;
2182 return std::nullopt;
2185 std::error_code
2186 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2187 std::error_code EC;
2188 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2189 if (EC)
2190 return EC;
2192 bool WriteMemLocs = false;
2194 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2195 if (WriteMemLocs)
2196 OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2197 else
2198 OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2199 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2200 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2203 uint64_t BranchValues = 0;
2204 uint64_t MemValues = 0;
2206 if (BAT)
2207 OutFile << "boltedcollection\n";
2208 if (opts::BasicAggregation) {
2209 OutFile << "no_lbr";
2210 for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
2211 OutFile << " " << Entry.getKey();
2212 OutFile << "\n";
2214 for (const auto &KV : NamesToSamples) {
2215 const FuncSampleData &FSD = KV.second;
2216 for (const SampleInfo &SI : FSD.Data) {
2217 writeLocation(SI.Loc);
2218 OutFile << SI.Hits << "\n";
2219 ++BranchValues;
2222 } else {
2223 for (const auto &KV : NamesToBranches) {
2224 const FuncBranchData &FBD = KV.second;
2225 for (const llvm::bolt::BranchInfo &BI : FBD.Data) {
2226 writeLocation(BI.From);
2227 writeLocation(BI.To);
2228 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2229 ++BranchValues;
2231 for (const llvm::bolt::BranchInfo &BI : FBD.EntryData) {
2232 // Do not output if source is a known symbol, since this was already
2233 // accounted for in the source function
2234 if (BI.From.IsSymbol)
2235 continue;
2236 writeLocation(BI.From);
2237 writeLocation(BI.To);
2238 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2239 ++BranchValues;
2243 WriteMemLocs = true;
2244 for (const auto &KV : NamesToMemEvents) {
2245 const FuncMemData &FMD = KV.second;
2246 for (const MemInfo &MemEvent : FMD.Data) {
2247 writeLocation(MemEvent.Offset);
2248 writeLocation(MemEvent.Addr);
2249 OutFile << MemEvent.Count << "\n";
2250 ++MemValues;
2255 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2256 << " memory objects to " << OutputFilename << "\n";
2258 return std::error_code();
2261 void DataAggregator::dump() const { DataReader::dump(); }
2263 void DataAggregator::dump(const LBREntry &LBR) const {
2264 Diag << "From: " << Twine::utohexstr(LBR.From)
2265 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2266 << "\n";
2269 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2270 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2271 for (const LBREntry &LBR : Sample.LBR)
2272 dump(LBR);
2275 void DataAggregator::dump(const PerfMemSample &Sample) const {
2276 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";