Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Profile / DataAggregator.cpp
blobb72bd0edf1a2df22a16ebb3dd06ed33907533d37
1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/ScopeExit.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Process.h"
28 #include "llvm/Support/Program.h"
29 #include "llvm/Support/Regex.h"
30 #include "llvm/Support/Timer.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <map>
33 #include <optional>
34 #include <unordered_map>
35 #include <utility>
37 #define DEBUG_TYPE "aggregator"
39 using namespace llvm;
40 using namespace bolt;
42 namespace opts {
44 static cl::opt<bool>
45 BasicAggregation("nl",
46 cl::desc("aggregate basic samples (without LBR info)"),
47 cl::cat(AggregatorCategory));
49 static cl::opt<bool>
50 FilterMemProfile("filter-mem-profile",
51 cl::desc("if processing a memory profile, filter out stack or heap accesses "
52 "that won't be useful for BOLT to reduce profile file size"),
53 cl::init(true),
54 cl::cat(AggregatorCategory));
56 static cl::opt<unsigned long long>
57 FilterPID("pid",
58 cl::desc("only use samples from process with specified PID"),
59 cl::init(0),
60 cl::Optional,
61 cl::cat(AggregatorCategory));
63 static cl::opt<bool>
64 IgnoreBuildID("ignore-build-id",
65 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
66 cl::init(false),
67 cl::cat(AggregatorCategory));
69 static cl::opt<bool> IgnoreInterruptLBR(
70 "ignore-interrupt-lbr",
71 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
72 cl::init(true), cl::cat(AggregatorCategory));
74 static cl::opt<unsigned long long>
75 MaxSamples("max-samples",
76 cl::init(-1ULL),
77 cl::desc("maximum number of samples to read from LBR profile"),
78 cl::Optional,
79 cl::Hidden,
80 cl::cat(AggregatorCategory));
82 extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
84 cl::opt<bool> ReadPreAggregated(
85 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
86 cl::cat(AggregatorCategory));
88 static cl::opt<bool>
89 TimeAggregator("time-aggr",
90 cl::desc("time BOLT aggregator"),
91 cl::init(false),
92 cl::ZeroOrMore,
93 cl::cat(AggregatorCategory));
95 static cl::opt<bool>
96 UseEventPC("use-event-pc",
97 cl::desc("use event PC in combination with LBR sampling"),
98 cl::cat(AggregatorCategory));
100 static cl::opt<bool> WriteAutoFDOData(
101 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
102 cl::cat(AggregatorCategory));
104 } // namespace opts
106 namespace {
108 const char TimerGroupName[] = "aggregator";
109 const char TimerGroupDesc[] = "Aggregator";
111 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
112 std::vector<SectionNameAndRange> sections;
113 for (BinarySection &Section : BC->sections()) {
114 if (!Section.isText())
115 continue;
116 if (Section.getSize() == 0)
117 continue;
118 sections.push_back(
119 {Section.getName(), Section.getAddress(), Section.getEndAddress()});
121 llvm::sort(sections,
122 [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
123 return A.BeginAddress < B.BeginAddress;
125 return sections;
129 constexpr uint64_t DataAggregator::KernelBaseAddr;
131 DataAggregator::~DataAggregator() { deleteTempFiles(); }
133 namespace {
134 void deleteTempFile(const std::string &FileName) {
135 if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
136 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
137 << " with error " << Errc.message() << "\n";
141 void DataAggregator::deleteTempFiles() {
142 for (std::string &FileName : TempFiles)
143 deleteTempFile(FileName);
144 TempFiles.clear();
147 void DataAggregator::findPerfExecutable() {
148 std::optional<std::string> PerfExecutable =
149 sys::Process::FindInEnvPath("PATH", "perf");
150 if (!PerfExecutable) {
151 outs() << "PERF2BOLT: No perf executable found!\n";
152 exit(1);
154 PerfPath = *PerfExecutable;
157 void DataAggregator::start() {
158 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
160 // Don't launch perf for pre-aggregated files
161 if (opts::ReadPreAggregated)
162 return;
164 findPerfExecutable();
166 if (opts::BasicAggregation)
167 launchPerfProcess("events without LBR",
168 MainEventsPPI,
169 "script -F pid,event,ip",
170 /*Wait = */false);
171 else
172 launchPerfProcess("branch events",
173 MainEventsPPI,
174 "script -F pid,ip,brstack",
175 /*Wait = */false);
177 // Note: we launch script for mem events regardless of the option, as the
178 // command fails fairly fast if mem events were not collected.
179 launchPerfProcess("mem events",
180 MemEventsPPI,
181 "script -F pid,event,addr,ip",
182 /*Wait = */false);
184 launchPerfProcess("process events", MMapEventsPPI,
185 "script --show-mmap-events --no-itrace",
186 /*Wait = */ false);
188 launchPerfProcess("task events", TaskEventsPPI,
189 "script --show-task-events --no-itrace",
190 /*Wait = */ false);
193 void DataAggregator::abort() {
194 if (opts::ReadPreAggregated)
195 return;
197 std::string Error;
199 // Kill subprocesses in case they are not finished
200 sys::Wait(TaskEventsPPI.PI, 1, &Error);
201 sys::Wait(MMapEventsPPI.PI, 1, &Error);
202 sys::Wait(MainEventsPPI.PI, 1, &Error);
203 sys::Wait(MemEventsPPI.PI, 1, &Error);
205 deleteTempFiles();
207 exit(1);
210 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
211 const char *ArgsString, bool Wait) {
212 SmallVector<StringRef, 4> Argv;
214 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
215 Argv.push_back(PerfPath.data());
217 StringRef(ArgsString).split(Argv, ' ');
218 Argv.push_back("-f");
219 Argv.push_back("-i");
220 Argv.push_back(Filename.c_str());
222 if (std::error_code Errc =
223 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
224 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
225 << " with error " << Errc.message() << "\n";
226 exit(1);
228 TempFiles.push_back(PPI.StdoutPath.data());
230 if (std::error_code Errc =
231 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
232 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
233 << " with error " << Errc.message() << "\n";
234 exit(1);
236 TempFiles.push_back(PPI.StderrPath.data());
238 std::optional<StringRef> Redirects[] = {
239 std::nullopt, // Stdin
240 StringRef(PPI.StdoutPath.data()), // Stdout
241 StringRef(PPI.StderrPath.data())}; // Stderr
243 LLVM_DEBUG({
244 dbgs() << "Launching perf: ";
245 for (StringRef Arg : Argv)
246 dbgs() << Arg << " ";
247 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
248 << "\n";
251 if (Wait)
252 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
253 /*envp*/ std::nullopt, Redirects);
254 else
255 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ std::nullopt,
256 Redirects);
259 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
260 PerfProcessInfo BuildIDProcessInfo;
261 launchPerfProcess("buildid list",
262 BuildIDProcessInfo,
263 "buildid-list",
264 /*Wait = */true);
266 if (BuildIDProcessInfo.PI.ReturnCode != 0) {
267 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
268 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
269 StringRef ErrBuf = (*MB)->getBuffer();
271 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
272 << '\n';
273 errs() << ErrBuf;
274 return;
277 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
278 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
279 if (std::error_code EC = MB.getError()) {
280 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
281 << EC.message() << "\n";
282 return;
285 FileBuf = std::move(*MB);
286 ParsingBuf = FileBuf->getBuffer();
288 std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
289 if (!FileName) {
290 if (hasAllBuildIDs()) {
291 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
292 "This indicates the input binary supplied for data aggregation "
293 "is not the same recorded by perf when collecting profiling "
294 "data, or there were no samples recorded for the binary. "
295 "Use -ignore-build-id option to override.\n";
296 if (!opts::IgnoreBuildID)
297 abort();
298 } else {
299 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
300 "data was recorded without it\n";
301 return;
303 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
304 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
305 BuildIDBinaryName = std::string(*FileName);
306 } else {
307 outs() << "PERF2BOLT: matched build-id and file name\n";
311 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
312 if (opts::ReadPreAggregated)
313 return true;
315 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
316 if (!FD) {
317 consumeError(FD.takeError());
318 return false;
321 char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
323 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
324 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
325 *FD, MutableArrayRef(Buf, sizeof(Buf)), 0);
326 if (!BytesRead) {
327 consumeError(BytesRead.takeError());
328 return false;
331 if (*BytesRead != 7)
332 return false;
334 if (strncmp(Buf, "PERFILE", 7) == 0)
335 return true;
336 return false;
339 void DataAggregator::parsePreAggregated() {
340 std::string Error;
342 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
343 MemoryBuffer::getFileOrSTDIN(Filename);
344 if (std::error_code EC = MB.getError()) {
345 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
346 << EC.message() << "\n";
347 exit(1);
350 FileBuf = std::move(*MB);
351 ParsingBuf = FileBuf->getBuffer();
352 Col = 0;
353 Line = 1;
354 if (parsePreAggregatedLBRSamples()) {
355 errs() << "PERF2BOLT: failed to parse samples\n";
356 exit(1);
360 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
361 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
362 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
363 TimerGroupDesc, opts::TimeAggregator);
365 std::error_code EC;
366 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
367 if (EC)
368 return EC;
370 // Format:
371 // number of unique traces
372 // from_1-to_1:count_1
373 // from_2-to_2:count_2
374 // ......
375 // from_n-to_n:count_n
376 // number of unique sample addresses
377 // addr_1:count_1
378 // addr_2:count_2
379 // ......
380 // addr_n:count_n
381 // number of unique LBR entries
382 // src_1->dst_1:count_1
383 // src_2->dst_2:count_2
384 // ......
385 // src_n->dst_n:count_n
387 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
389 // AutoFDO addresses are relative to the first allocated loadable program
390 // segment
391 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
392 if (Address < FirstAllocAddress)
393 return 0;
394 return Address - FirstAllocAddress;
397 OutFile << FallthroughLBRs.size() << "\n";
398 for (const auto &[Trace, Info] : FallthroughLBRs) {
399 OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
400 filterAddress(Trace.To),
401 Info.InternCount + Info.ExternCount);
404 OutFile << BasicSamples.size() << "\n";
405 for (const auto [PC, HitCount] : BasicSamples)
406 OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);
408 OutFile << BranchLBRs.size() << "\n";
409 for (const auto &[Trace, Info] : BranchLBRs) {
410 OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
411 filterAddress(Trace.To), Info.TakenCount);
414 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
415 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
416 << " unique branches to " << OutputFilename << "\n";
418 return std::error_code();
421 void DataAggregator::filterBinaryMMapInfo() {
422 if (opts::FilterPID) {
423 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
424 if (MMapInfoIter != BinaryMMapInfo.end()) {
425 MMapInfo MMap = MMapInfoIter->second;
426 BinaryMMapInfo.clear();
427 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
428 } else {
429 if (errs().has_colors())
430 errs().changeColor(raw_ostream::RED);
431 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
432 << opts::FilterPID << "\""
433 << " for binary \"" << BC->getFilename() << "\".";
434 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
435 errs() << " Profile for the following process is available:\n";
436 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
437 outs() << " " << MMI.second.PID
438 << (MMI.second.Forked ? " (forked)\n" : "\n");
440 if (errs().has_colors())
441 errs().resetColor();
443 exit(1);
448 int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
449 PerfProcessErrorCallbackTy Callback) {
450 std::string Error;
451 outs() << "PERF2BOLT: waiting for perf " << Name
452 << " collection to finish...\n";
453 sys::ProcessInfo PI = sys::Wait(Process.PI, std::nullopt, &Error);
455 if (!Error.empty()) {
456 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
457 deleteTempFiles();
458 exit(1);
461 if (PI.ReturnCode != 0) {
462 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
463 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
464 StringRef ErrBuf = (*ErrorMB)->getBuffer();
466 deleteTempFiles();
467 Callback(PI.ReturnCode, ErrBuf);
468 return PI.ReturnCode;
471 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
472 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
473 if (std::error_code EC = MB.getError()) {
474 errs() << "Cannot open " << Process.StdoutPath.data() << ": "
475 << EC.message() << "\n";
476 deleteTempFiles();
477 exit(1);
480 FileBuf = std::move(*MB);
481 ParsingBuf = FileBuf->getBuffer();
482 Col = 0;
483 Line = 1;
484 return PI.ReturnCode;
487 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
488 this->BC = &BC;
490 if (opts::ReadPreAggregated) {
491 parsePreAggregated();
492 return Error::success();
495 if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
496 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
497 processFileBuildID(*FileBuildID);
498 } else {
499 errs() << "BOLT-WARNING: build-id will not be checked because we could "
500 "not read one from input binary\n";
503 auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
504 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
505 exit(1);
508 auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
509 Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
510 "Cannot print 'addr' field.");
511 if (!NoData.match(ErrBuf))
512 ErrorCallback(ReturnCode, ErrBuf);
515 if (opts::LinuxKernelMode) {
516 // Current MMap parsing logic does not work with linux kernel.
517 // MMap entries for linux kernel uses PERF_RECORD_MMAP
518 // format instead of typical PERF_RECORD_MMAP2 format.
519 // Since linux kernel address mapping is absolute (same as
520 // in the ELF file), we avoid parsing MMap in linux kernel mode.
521 // While generating optimized linux kernel binary, we may need
522 // to parse MMap entries.
524 // In linux kernel mode, we analyze and optimize
525 // all linux kernel binary instructions, irrespective
526 // of whether they are due to system calls or due to
527 // interrupts. Therefore, we cannot ignore interrupt
528 // in Linux kernel mode.
529 opts::IgnoreInterruptLBR = false;
530 } else {
531 prepareToParse("mmap events", MMapEventsPPI, ErrorCallback);
532 if (parseMMapEvents())
533 errs() << "PERF2BOLT: failed to parse mmap events\n";
536 prepareToParse("task events", TaskEventsPPI, ErrorCallback);
537 if (parseTaskEvents())
538 errs() << "PERF2BOLT: failed to parse task events\n";
540 filterBinaryMMapInfo();
541 prepareToParse("events", MainEventsPPI, ErrorCallback);
543 if (opts::HeatmapMode) {
544 if (std::error_code EC = printLBRHeatMap()) {
545 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
546 exit(1);
548 exit(0);
551 if ((!opts::BasicAggregation && parseBranchEvents()) ||
552 (opts::BasicAggregation && parseBasicEvents()))
553 errs() << "PERF2BOLT: failed to parse samples\n";
555 // We can finish early if the goal is just to generate data for autofdo
556 if (opts::WriteAutoFDOData) {
557 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
558 errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
560 deleteTempFiles();
561 exit(0);
564 // Special handling for memory events
565 if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
566 return Error::success();
568 if (const std::error_code EC = parseMemEvents())
569 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
570 << '\n';
572 deleteTempFiles();
574 return Error::success();
577 Error DataAggregator::readProfile(BinaryContext &BC) {
578 processProfile(BC);
580 for (auto &BFI : BC.getBinaryFunctions()) {
581 BinaryFunction &Function = BFI.second;
582 convertBranchData(Function);
585 if (opts::AggregateOnly &&
586 opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata) {
587 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
588 report_error("cannot create output data file", EC);
591 return Error::success();
594 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
595 return Function.hasProfileAvailable();
598 void DataAggregator::processProfile(BinaryContext &BC) {
599 if (opts::ReadPreAggregated)
600 processPreAggregated();
601 else if (opts::BasicAggregation)
602 processBasicEvents();
603 else
604 processBranchEvents();
606 processMemEvents();
608 // Mark all functions with registered events as having a valid profile.
609 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
610 : BinaryFunction::PF_LBR;
611 for (auto &BFI : BC.getBinaryFunctions()) {
612 BinaryFunction &BF = BFI.second;
613 if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
614 BF.markProfiled(Flags);
617 for (auto &FuncBranches : NamesToBranches)
618 llvm::stable_sort(FuncBranches.second.Data);
620 for (auto &MemEvents : NamesToMemEvents)
621 llvm::stable_sort(MemEvents.second.Data);
623 // Release intermediate storage.
624 clear(BranchLBRs);
625 clear(FallthroughLBRs);
626 clear(AggregatedLBRs);
627 clear(BasicSamples);
628 clear(MemSamples);
631 BinaryFunction *
632 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
633 if (!BC->containsAddress(Address))
634 return nullptr;
636 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
637 /*UseMaxSize=*/true);
640 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
641 uint64_t Count) {
642 if (!BAT)
643 return Func.getOneName();
645 const BinaryFunction *OrigFunc = &Func;
646 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
647 NumColdSamples += Count;
648 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
649 if (HotFunc)
650 OrigFunc = HotFunc;
652 // If it is a local function, prefer the name containing the file name where
653 // the local function was declared
654 for (StringRef AlternativeName : OrigFunc->getNames()) {
655 size_t FileNameIdx = AlternativeName.find('/');
656 // Confirm the alternative name has the pattern Symbol/FileName/1 before
657 // using it
658 if (FileNameIdx == StringRef::npos ||
659 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
660 continue;
661 return AlternativeName;
663 return OrigFunc->getOneName();
666 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
667 uint64_t Count) {
668 auto I = NamesToSamples.find(Func.getOneName());
669 if (I == NamesToSamples.end()) {
670 bool Success;
671 StringRef LocName = getLocationName(Func, Count);
672 std::tie(I, Success) = NamesToSamples.insert(
673 std::make_pair(Func.getOneName(),
674 FuncSampleData(LocName, FuncSampleData::ContainerTy())));
677 Address -= Func.getAddress();
678 if (BAT)
679 Address = BAT->translate(Func.getAddress(), Address, /*IsBranchSrc=*/false);
681 I->second.bumpCount(Address, Count);
682 return true;
685 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
686 uint64_t To, uint64_t Count,
687 uint64_t Mispreds) {
688 FuncBranchData *AggrData = getBranchData(Func);
689 if (!AggrData) {
690 AggrData = &NamesToBranches[Func.getOneName()];
691 AggrData->Name = getLocationName(Func, Count);
692 setBranchData(Func, AggrData);
695 From -= Func.getAddress();
696 To -= Func.getAddress();
697 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
698 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
699 if (BAT) {
700 From = BAT->translate(Func.getAddress(), From, /*IsBranchSrc=*/true);
701 To = BAT->translate(Func.getAddress(), To, /*IsBranchSrc=*/false);
702 LLVM_DEBUG(
703 dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
704 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
707 AggrData->bumpBranchCount(From, To, Count, Mispreds);
708 return true;
711 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
712 BinaryFunction *ToFunc, uint64_t From,
713 uint64_t To, uint64_t Count,
714 uint64_t Mispreds) {
715 FuncBranchData *FromAggrData = nullptr;
716 FuncBranchData *ToAggrData = nullptr;
717 StringRef SrcFunc;
718 StringRef DstFunc;
719 if (FromFunc) {
720 SrcFunc = getLocationName(*FromFunc, Count);
721 FromAggrData = getBranchData(*FromFunc);
722 if (!FromAggrData) {
723 FromAggrData = &NamesToBranches[FromFunc->getOneName()];
724 FromAggrData->Name = SrcFunc;
725 setBranchData(*FromFunc, FromAggrData);
727 From -= FromFunc->getAddress();
728 if (BAT)
729 From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
731 recordExit(*FromFunc, From, Mispreds, Count);
733 if (ToFunc) {
734 DstFunc = getLocationName(*ToFunc, 0);
735 ToAggrData = getBranchData(*ToFunc);
736 if (!ToAggrData) {
737 ToAggrData = &NamesToBranches[ToFunc->getOneName()];
738 ToAggrData->Name = DstFunc;
739 setBranchData(*ToFunc, ToAggrData);
741 To -= ToFunc->getAddress();
742 if (BAT)
743 To = BAT->translate(ToFunc->getAddress(), To, /*IsBranchSrc=*/false);
745 recordEntry(*ToFunc, To, Mispreds, Count);
748 if (FromAggrData)
749 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
750 Count, Mispreds);
751 if (ToAggrData)
752 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
753 Count, Mispreds);
754 return true;
757 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
758 uint64_t Mispreds) {
759 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
760 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
761 if (!FromFunc && !ToFunc)
762 return false;
764 // Treat recursive control transfers as inter-branches.
765 if (FromFunc == ToFunc && (To != ToFunc->getAddress())) {
766 recordBranch(*FromFunc, From - FromFunc->getAddress(),
767 To - FromFunc->getAddress(), Count, Mispreds);
768 return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
771 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
774 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
775 uint64_t Count) {
776 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
777 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
778 if (!FromFunc || !ToFunc) {
779 LLVM_DEBUG({
780 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
781 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
782 << " and ending in " << ToFunc->getPrintName()
783 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
785 NumLongRangeTraces += Count;
786 return false;
788 if (FromFunc != ToFunc) {
789 NumInvalidTraces += Count;
790 LLVM_DEBUG({
791 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
792 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
793 << " and ending in " << ToFunc->getPrintName()
794 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
796 return false;
799 std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
800 BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
801 Second.From)
802 : getFallthroughsInTrace(*FromFunc, First, Second, Count);
803 if (!FTs) {
804 LLVM_DEBUG(
805 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
806 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
807 << " and ending in " << ToFunc->getPrintName() << " @ "
808 << ToFunc->getPrintName() << " @ "
809 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
810 NumInvalidTraces += Count;
811 return false;
814 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
815 << FromFunc->getPrintName() << ":"
816 << Twine::utohexstr(First.To) << " to "
817 << Twine::utohexstr(Second.From) << ".\n");
818 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
819 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
820 Pair.second + FromFunc->getAddress(), Count, false);
822 return true;
825 bool DataAggregator::recordTrace(
826 BinaryFunction &BF, const LBREntry &FirstLBR, const LBREntry &SecondLBR,
827 uint64_t Count,
828 SmallVector<std::pair<uint64_t, uint64_t>, 16> &Branches) const {
829 BinaryContext &BC = BF.getBinaryContext();
831 if (!BF.isSimple())
832 return false;
834 assert(BF.hasCFG() && "can only record traces in CFG state");
836 // Offsets of the trace within this function.
837 const uint64_t From = FirstLBR.To - BF.getAddress();
838 const uint64_t To = SecondLBR.From - BF.getAddress();
840 if (From > To)
841 return false;
843 const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
844 const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
846 if (!FromBB || !ToBB)
847 return false;
849 // Adjust FromBB if the first LBR is a return from the last instruction in
850 // the previous block (that instruction should be a call).
851 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
852 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
853 const BinaryBasicBlock *PrevBB =
854 BF.getLayout().getBlock(FromBB->getIndex() - 1);
855 if (PrevBB->getSuccessor(FromBB->getLabel())) {
856 const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
857 if (Instr && BC.MIB->isCall(*Instr))
858 FromBB = PrevBB;
859 else
860 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
861 << '\n');
862 } else {
863 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
867 // Fill out information for fall-through edges. The From and To could be
868 // within the same basic block, e.g. when two call instructions are in the
869 // same block. In this case we skip the processing.
870 if (FromBB == ToBB)
871 return true;
873 // Process blocks in the original layout order.
874 BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex());
875 assert(BB == FromBB && "index mismatch");
876 while (BB != ToBB) {
877 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1);
878 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
880 // Check for bad LBRs.
881 if (!BB->getSuccessor(NextBB->getLabel())) {
882 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
883 << " " << FirstLBR << '\n'
884 << " " << SecondLBR << '\n');
885 return false;
888 const MCInst *Instr = BB->getLastNonPseudoInstr();
889 uint64_t Offset = 0;
890 if (Instr)
891 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
892 else
893 Offset = BB->getOffset();
895 Branches.emplace_back(Offset, NextBB->getOffset());
897 BB = NextBB;
900 // Record fall-through jumps
901 for (const auto &[FromOffset, ToOffset] : Branches) {
902 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(FromOffset);
903 BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(ToOffset);
904 assert(FromBB && ToBB);
905 BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
906 BI.Count += Count;
909 return true;
912 std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
913 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
914 const LBREntry &FirstLBR,
915 const LBREntry &SecondLBR,
916 uint64_t Count) const {
917 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
919 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, Res))
920 return std::nullopt;
922 return Res;
925 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
926 uint64_t Count) const {
927 if (To > BF.getSize())
928 return false;
930 if (!BF.hasProfile())
931 BF.ExecutionCount = 0;
933 BinaryBasicBlock *EntryBB = nullptr;
934 if (To == 0) {
935 BF.ExecutionCount += Count;
936 if (!BF.empty())
937 EntryBB = &BF.front();
938 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
939 if (BB->isEntryPoint())
940 EntryBB = BB;
943 if (EntryBB)
944 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
946 return true;
949 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
950 uint64_t Count) const {
951 if (!BF.isSimple() || From > BF.getSize())
952 return false;
954 if (!BF.hasProfile())
955 BF.ExecutionCount = 0;
957 return true;
960 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
961 LBREntry Res;
962 ErrorOr<StringRef> FromStrRes = parseString('/');
963 if (std::error_code EC = FromStrRes.getError())
964 return EC;
965 StringRef OffsetStr = FromStrRes.get();
966 if (OffsetStr.getAsInteger(0, Res.From)) {
967 reportError("expected hexadecimal number with From address");
968 Diag << "Found: " << OffsetStr << "\n";
969 return make_error_code(llvm::errc::io_error);
972 ErrorOr<StringRef> ToStrRes = parseString('/');
973 if (std::error_code EC = ToStrRes.getError())
974 return EC;
975 OffsetStr = ToStrRes.get();
976 if (OffsetStr.getAsInteger(0, Res.To)) {
977 reportError("expected hexadecimal number with To address");
978 Diag << "Found: " << OffsetStr << "\n";
979 return make_error_code(llvm::errc::io_error);
982 ErrorOr<StringRef> MispredStrRes = parseString('/');
983 if (std::error_code EC = MispredStrRes.getError())
984 return EC;
985 StringRef MispredStr = MispredStrRes.get();
986 if (MispredStr.size() != 1 ||
987 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
988 reportError("expected single char for mispred bit");
989 Diag << "Found: " << MispredStr << "\n";
990 return make_error_code(llvm::errc::io_error);
992 Res.Mispred = MispredStr[0] == 'M';
994 static bool MispredWarning = true;
995 if (MispredStr[0] == '-' && MispredWarning) {
996 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
997 MispredWarning = false;
1000 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1001 if (std::error_code EC = Rest.getError())
1002 return EC;
1003 if (Rest.get().size() < 5) {
1004 reportError("expected rest of LBR entry");
1005 Diag << "Found: " << Rest.get() << "\n";
1006 return make_error_code(llvm::errc::io_error);
1008 return Res;
1011 bool DataAggregator::checkAndConsumeFS() {
1012 if (ParsingBuf[0] != FieldSeparator)
1013 return false;
1015 ParsingBuf = ParsingBuf.drop_front(1);
1016 Col += 1;
1017 return true;
1020 void DataAggregator::consumeRestOfLine() {
1021 size_t LineEnd = ParsingBuf.find_first_of('\n');
1022 if (LineEnd == StringRef::npos) {
1023 ParsingBuf = StringRef();
1024 Col = 0;
1025 Line += 1;
1026 return;
1028 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1029 Col = 0;
1030 Line += 1;
1033 bool DataAggregator::checkNewLine() {
1034 return ParsingBuf[0] == '\n';
1037 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1038 PerfBranchSample Res;
1040 while (checkAndConsumeFS()) {
1043 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1044 if (std::error_code EC = PIDRes.getError())
1045 return EC;
1046 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1047 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1048 consumeRestOfLine();
1049 return make_error_code(errc::no_such_process);
1052 while (checkAndConsumeFS()) {
1055 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1056 if (std::error_code EC = PCRes.getError())
1057 return EC;
1058 Res.PC = PCRes.get();
1060 if (checkAndConsumeNewLine())
1061 return Res;
1063 while (!checkAndConsumeNewLine()) {
1064 checkAndConsumeFS();
1066 ErrorOr<LBREntry> LBRRes = parseLBREntry();
1067 if (std::error_code EC = LBRRes.getError())
1068 return EC;
1069 LBREntry LBR = LBRRes.get();
1070 if (ignoreKernelInterrupt(LBR))
1071 continue;
1072 if (!BC->HasFixedLoadAddress)
1073 adjustLBR(LBR, MMapInfoIter->second);
1074 Res.LBR.push_back(LBR);
1077 return Res;
1080 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1081 while (checkAndConsumeFS()) {
1084 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1085 if (std::error_code EC = PIDRes.getError())
1086 return EC;
1088 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1089 if (MMapInfoIter == BinaryMMapInfo.end()) {
1090 consumeRestOfLine();
1091 return PerfBasicSample{StringRef(), 0};
1094 while (checkAndConsumeFS()) {
1097 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1098 if (std::error_code EC = Event.getError())
1099 return EC;
1101 while (checkAndConsumeFS()) {
1104 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1105 if (std::error_code EC = AddrRes.getError())
1106 return EC;
1108 if (!checkAndConsumeNewLine()) {
1109 reportError("expected end of line");
1110 return make_error_code(llvm::errc::io_error);
1113 uint64_t Address = *AddrRes;
1114 if (!BC->HasFixedLoadAddress)
1115 adjustAddress(Address, MMapInfoIter->second);
1117 return PerfBasicSample{Event.get(), Address};
1120 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1121 PerfMemSample Res{0, 0};
1123 while (checkAndConsumeFS()) {
1126 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1127 if (std::error_code EC = PIDRes.getError())
1128 return EC;
1130 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1131 if (MMapInfoIter == BinaryMMapInfo.end()) {
1132 consumeRestOfLine();
1133 return Res;
1136 while (checkAndConsumeFS()) {
1139 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1140 if (std::error_code EC = Event.getError())
1141 return EC;
1142 if (!Event.get().contains("mem-loads")) {
1143 consumeRestOfLine();
1144 return Res;
1147 while (checkAndConsumeFS()) {
1150 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1151 if (std::error_code EC = AddrRes.getError())
1152 return EC;
1154 while (checkAndConsumeFS()) {
1157 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1158 if (std::error_code EC = PCRes.getError()) {
1159 consumeRestOfLine();
1160 return EC;
1163 if (!checkAndConsumeNewLine()) {
1164 reportError("expected end of line");
1165 return make_error_code(llvm::errc::io_error);
1168 uint64_t Address = *AddrRes;
1169 if (!BC->HasFixedLoadAddress)
1170 adjustAddress(Address, MMapInfoIter->second);
1172 return PerfMemSample{PCRes.get(), Address};
1175 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1176 auto parseOffset = [this]() -> ErrorOr<Location> {
1177 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1178 if (std::error_code EC = Res.getError())
1179 return EC;
1180 return Location(Res.get());
1183 size_t Sep = ParsingBuf.find_first_of(" \n");
1184 if (Sep == StringRef::npos)
1185 return parseOffset();
1186 StringRef LookAhead = ParsingBuf.substr(0, Sep);
1187 if (LookAhead.find_first_of(":") == StringRef::npos)
1188 return parseOffset();
1190 ErrorOr<StringRef> BuildID = parseString(':');
1191 if (std::error_code EC = BuildID.getError())
1192 return EC;
1193 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1194 if (std::error_code EC = Offset.getError())
1195 return EC;
1196 return Location(true, BuildID.get(), Offset.get());
1199 ErrorOr<DataAggregator::AggregatedLBREntry>
1200 DataAggregator::parseAggregatedLBREntry() {
1201 while (checkAndConsumeFS()) {
1204 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1205 if (std::error_code EC = TypeOrErr.getError())
1206 return EC;
1207 auto Type = AggregatedLBREntry::BRANCH;
1208 if (TypeOrErr.get() == "B") {
1209 Type = AggregatedLBREntry::BRANCH;
1210 } else if (TypeOrErr.get() == "F") {
1211 Type = AggregatedLBREntry::FT;
1212 } else if (TypeOrErr.get() == "f") {
1213 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1214 } else {
1215 reportError("expected B, F or f");
1216 return make_error_code(llvm::errc::io_error);
1219 while (checkAndConsumeFS()) {
1221 ErrorOr<Location> From = parseLocationOrOffset();
1222 if (std::error_code EC = From.getError())
1223 return EC;
1225 while (checkAndConsumeFS()) {
1227 ErrorOr<Location> To = parseLocationOrOffset();
1228 if (std::error_code EC = To.getError())
1229 return EC;
1231 while (checkAndConsumeFS()) {
1233 ErrorOr<int64_t> Frequency =
1234 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1235 if (std::error_code EC = Frequency.getError())
1236 return EC;
1238 uint64_t Mispreds = 0;
1239 if (Type == AggregatedLBREntry::BRANCH) {
1240 while (checkAndConsumeFS()) {
1242 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1243 if (std::error_code EC = MispredsOrErr.getError())
1244 return EC;
1245 Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1248 if (!checkAndConsumeNewLine()) {
1249 reportError("expected end of line");
1250 return make_error_code(llvm::errc::io_error);
1253 return AggregatedLBREntry{From.get(), To.get(),
1254 static_cast<uint64_t>(Frequency.get()), Mispreds,
1255 Type};
1258 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1259 return opts::IgnoreInterruptLBR &&
1260 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1263 std::error_code DataAggregator::printLBRHeatMap() {
1264 outs() << "PERF2BOLT: parse branch events...\n";
1265 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1266 TimerGroupDesc, opts::TimeAggregator);
1268 if (opts::LinuxKernelMode) {
1269 opts::HeatmapMaxAddress = 0xffffffffffffffff;
1270 opts::HeatmapMinAddress = KernelBaseAddr;
1272 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1273 opts::HeatmapMaxAddress, getTextSections(BC));
1274 uint64_t NumTotalSamples = 0;
1276 if (opts::BasicAggregation) {
1277 while (hasData()) {
1278 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1279 if (std::error_code EC = SampleRes.getError()) {
1280 if (EC == errc::no_such_process)
1281 continue;
1282 return EC;
1284 PerfBasicSample &Sample = SampleRes.get();
1285 HM.registerAddress(Sample.PC);
1286 NumTotalSamples++;
1288 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1289 } else {
1290 while (hasData()) {
1291 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1292 if (std::error_code EC = SampleRes.getError()) {
1293 if (EC == errc::no_such_process)
1294 continue;
1295 return EC;
1298 PerfBranchSample &Sample = SampleRes.get();
1300 // LBRs are stored in reverse execution order. NextLBR refers to the next
1301 // executed branch record.
1302 const LBREntry *NextLBR = nullptr;
1303 for (const LBREntry &LBR : Sample.LBR) {
1304 if (NextLBR) {
1305 // Record fall-through trace.
1306 const uint64_t TraceFrom = LBR.To;
1307 const uint64_t TraceTo = NextLBR->From;
1308 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1310 NextLBR = &LBR;
1312 if (!Sample.LBR.empty()) {
1313 HM.registerAddress(Sample.LBR.front().To);
1314 HM.registerAddress(Sample.LBR.back().From);
1316 NumTotalSamples += Sample.LBR.size();
1318 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1319 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1322 if (!NumTotalSamples) {
1323 if (opts::BasicAggregation) {
1324 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1325 "Cannot build heatmap.";
1326 } else {
1327 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1328 "Cannot build heatmap. Use -nl for building heatmap from "
1329 "basic events.\n";
1331 exit(1);
1334 outs() << "HEATMAP: building heat map...\n";
1336 for (const auto &LBR : FallthroughLBRs) {
1337 const Trace &Trace = LBR.first;
1338 const FTInfo &Info = LBR.second;
1339 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1342 if (HM.getNumInvalidRanges())
1343 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1345 if (!HM.size()) {
1346 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1347 exit(1);
1350 HM.print(opts::OutputFilename);
1351 if (opts::OutputFilename == "-")
1352 HM.printCDF(opts::OutputFilename);
1353 else
1354 HM.printCDF(opts::OutputFilename + ".csv");
1355 if (opts::OutputFilename == "-")
1356 HM.printSectionHotness(opts::OutputFilename);
1357 else
1358 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1360 return std::error_code();
1363 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1364 bool NeedsSkylakeFix) {
1365 uint64_t NumTraces{0};
1366 // LBRs are stored in reverse execution order. NextPC refers to the next
1367 // recorded executed PC.
1368 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1369 uint32_t NumEntry = 0;
1370 for (const LBREntry &LBR : Sample.LBR) {
1371 ++NumEntry;
1372 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1373 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1374 // us to likely record an invalid trace and generate a stale function for
1375 // BAT mode (non BAT disassembles the function and is able to ignore this
1376 // trace at aggregation time). Drop first 2 entries (last two, in
1377 // chronological order)
1378 if (NeedsSkylakeFix && NumEntry <= 2)
1379 continue;
1380 if (NextPC) {
1381 // Record fall-through trace.
1382 const uint64_t TraceFrom = LBR.To;
1383 const uint64_t TraceTo = NextPC;
1384 const BinaryFunction *TraceBF =
1385 getBinaryFunctionContainingAddress(TraceFrom);
1386 if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1387 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1388 if (TraceBF->containsAddress(LBR.From))
1389 ++Info.InternCount;
1390 else
1391 ++Info.ExternCount;
1392 } else {
1393 const BinaryFunction *ToFunc =
1394 getBinaryFunctionContainingAddress(TraceTo);
1395 if (TraceBF && ToFunc) {
1396 LLVM_DEBUG({
1397 dbgs() << "Invalid trace starting in " << TraceBF->getPrintName()
1398 << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress())
1399 << formatv(" and ending @ {0:x}\n", TraceTo);
1401 ++NumInvalidTraces;
1402 } else {
1403 LLVM_DEBUG({
1404 dbgs() << "Out of range trace starting in "
1405 << (TraceBF ? TraceBF->getPrintName() : "None")
1406 << formatv(" @ {0:x}",
1407 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1408 << " and ending in "
1409 << (ToFunc ? ToFunc->getPrintName() : "None")
1410 << formatv(" @ {0:x}\n",
1411 TraceTo - (ToFunc ? ToFunc->getAddress() : 0));
1413 ++NumLongRangeTraces;
1416 ++NumTraces;
1418 NextPC = LBR.From;
1420 uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
1421 uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
1422 if (!From && !To)
1423 continue;
1424 BranchInfo &Info = BranchLBRs[Trace(From, To)];
1425 ++Info.TakenCount;
1426 Info.MispredCount += LBR.Mispred;
1428 return NumTraces;
1431 std::error_code DataAggregator::parseBranchEvents() {
1432 outs() << "PERF2BOLT: parse branch events...\n";
1433 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1434 TimerGroupDesc, opts::TimeAggregator);
1436 uint64_t NumTotalSamples = 0;
1437 uint64_t NumEntries = 0;
1438 uint64_t NumSamples = 0;
1439 uint64_t NumSamplesNoLBR = 0;
1440 uint64_t NumTraces = 0;
1441 bool NeedsSkylakeFix = false;
1443 while (hasData() && NumTotalSamples < opts::MaxSamples) {
1444 ++NumTotalSamples;
1446 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1447 if (std::error_code EC = SampleRes.getError()) {
1448 if (EC == errc::no_such_process)
1449 continue;
1450 return EC;
1452 ++NumSamples;
1454 PerfBranchSample &Sample = SampleRes.get();
1455 if (opts::WriteAutoFDOData)
1456 ++BasicSamples[Sample.PC];
1458 if (Sample.LBR.empty()) {
1459 ++NumSamplesNoLBR;
1460 continue;
1463 NumEntries += Sample.LBR.size();
1464 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1465 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1466 NeedsSkylakeFix = true;
1469 NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
1472 for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
1473 for (const uint64_t Addr : {Trace.From, Trace.To})
1474 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1475 BF->setHasProfileAvailable();
1477 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1478 OS << " (";
1479 if (OS.has_colors()) {
1480 if (Percent > T2)
1481 OS.changeColor(raw_ostream::RED);
1482 else if (Percent > T1)
1483 OS.changeColor(raw_ostream::YELLOW);
1484 else
1485 OS.changeColor(raw_ostream::GREEN);
1487 OS << format("%.1f%%", Percent);
1488 if (OS.has_colors())
1489 OS.resetColor();
1490 OS << ")";
1493 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1494 << " LBR entries\n";
1495 if (NumTotalSamples) {
1496 if (NumSamples && NumSamplesNoLBR == NumSamples) {
1497 // Note: we don't know if perf2bolt is being used to parse memory samples
1498 // at this point. In this case, it is OK to parse zero LBRs.
1499 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1500 "LBR. Record profile with perf record -j any or run perf2bolt "
1501 "in no-LBR mode with -nl (the performance improvement in -nl "
1502 "mode may be limited)\n";
1503 } else {
1504 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1505 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1506 outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1507 printColored(outs(), PercentIgnored, 20, 50);
1508 outs() << " were ignored\n";
1509 if (PercentIgnored > 50.0f)
1510 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1511 "were attributed to the input binary\n";
1514 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1515 << NumInvalidTraces;
1516 float Perc = 0.0f;
1517 if (NumTraces > 0) {
1518 Perc = NumInvalidTraces * 100.0f / NumTraces;
1519 printColored(outs(), Perc, 5, 10);
1521 outs() << "\n";
1522 if (Perc > 10.0f)
1523 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1524 "binary is probably not the same binary used during profiling "
1525 "collection. The generated data may be ineffective for improving "
1526 "performance.\n\n";
1528 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1529 << NumLongRangeTraces;
1530 if (NumTraces > 0)
1531 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1532 outs() << "\n";
1534 if (NumColdSamples > 0) {
1535 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1536 outs() << "PERF2BOLT: " << NumColdSamples
1537 << format(" (%.1f%%)", ColdSamples)
1538 << " samples recorded in cold regions of split functions.\n";
1539 if (ColdSamples > 5.0f)
1540 outs()
1541 << "WARNING: The BOLT-processed binary where samples were collected "
1542 "likely used bad data or your service observed a large shift in "
1543 "profile. You may want to audit this.\n";
1546 return std::error_code();
1549 void DataAggregator::processBranchEvents() {
1550 outs() << "PERF2BOLT: processing branch events...\n";
1551 NamedRegionTimer T("processBranch", "Processing branch events",
1552 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1554 for (const auto &AggrLBR : FallthroughLBRs) {
1555 const Trace &Loc = AggrLBR.first;
1556 const FTInfo &Info = AggrLBR.second;
1557 LBREntry First{Loc.From, Loc.From, false};
1558 LBREntry Second{Loc.To, Loc.To, false};
1559 if (Info.InternCount)
1560 doTrace(First, Second, Info.InternCount);
1561 if (Info.ExternCount) {
1562 First.From = 0;
1563 doTrace(First, Second, Info.ExternCount);
1567 for (const auto &AggrLBR : BranchLBRs) {
1568 const Trace &Loc = AggrLBR.first;
1569 const BranchInfo &Info = AggrLBR.second;
1570 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1574 std::error_code DataAggregator::parseBasicEvents() {
1575 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1576 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1577 TimerGroupDesc, opts::TimeAggregator);
1578 while (hasData()) {
1579 ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1580 if (std::error_code EC = Sample.getError())
1581 return EC;
1583 if (!Sample->PC)
1584 continue;
1586 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1587 BF->setHasProfileAvailable();
1589 ++BasicSamples[Sample->PC];
1590 EventNames.insert(Sample->EventName);
1593 return std::error_code();
1596 void DataAggregator::processBasicEvents() {
1597 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1598 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1599 TimerGroupDesc, opts::TimeAggregator);
1600 uint64_t OutOfRangeSamples = 0;
1601 uint64_t NumSamples = 0;
1602 for (auto &Sample : BasicSamples) {
1603 const uint64_t PC = Sample.first;
1604 const uint64_t HitCount = Sample.second;
1605 NumSamples += HitCount;
1606 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1607 if (!Func) {
1608 OutOfRangeSamples += HitCount;
1609 continue;
1612 doSample(*Func, PC, HitCount);
1614 outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1616 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1617 << OutOfRangeSamples;
1618 float Perc = 0.0f;
1619 if (NumSamples > 0) {
1620 outs() << " (";
1621 Perc = OutOfRangeSamples * 100.0f / NumSamples;
1622 if (outs().has_colors()) {
1623 if (Perc > 60.0f)
1624 outs().changeColor(raw_ostream::RED);
1625 else if (Perc > 40.0f)
1626 outs().changeColor(raw_ostream::YELLOW);
1627 else
1628 outs().changeColor(raw_ostream::GREEN);
1630 outs() << format("%.1f%%", Perc);
1631 if (outs().has_colors())
1632 outs().resetColor();
1633 outs() << ")";
1635 outs() << "\n";
1636 if (Perc > 80.0f)
1637 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1638 "binary is probably not the same binary used during profiling "
1639 "collection. The generated data may be ineffective for improving "
1640 "performance.\n\n";
1643 std::error_code DataAggregator::parseMemEvents() {
1644 outs() << "PERF2BOLT: parsing memory events...\n";
1645 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1646 TimerGroupDesc, opts::TimeAggregator);
1647 while (hasData()) {
1648 ErrorOr<PerfMemSample> Sample = parseMemSample();
1649 if (std::error_code EC = Sample.getError())
1650 return EC;
1652 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1653 BF->setHasProfileAvailable();
1655 MemSamples.emplace_back(std::move(Sample.get()));
1658 return std::error_code();
1661 void DataAggregator::processMemEvents() {
1662 NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1663 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1664 for (const PerfMemSample &Sample : MemSamples) {
1665 uint64_t PC = Sample.PC;
1666 uint64_t Addr = Sample.Addr;
1667 StringRef FuncName;
1668 StringRef MemName;
1670 // Try to resolve symbol for PC
1671 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1672 if (!Func) {
1673 LLVM_DEBUG(if (PC != 0) {
1674 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr);
1676 continue;
1679 FuncName = Func->getOneName();
1680 PC -= Func->getAddress();
1682 // Try to resolve symbol for memory load
1683 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1684 MemName = BD->getName();
1685 Addr -= BD->getAddress();
1686 } else if (opts::FilterMemProfile) {
1687 // Filter out heap/stack accesses
1688 continue;
1691 const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1692 const Location AddrLoc(!MemName.empty(), MemName, Addr);
1694 FuncMemData *MemData = &NamesToMemEvents[FuncName];
1695 MemData->Name = FuncName;
1696 setMemData(*Func, MemData);
1697 MemData->update(FuncLoc, AddrLoc);
1698 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1702 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1703 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1704 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1705 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1706 while (hasData()) {
1707 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1708 if (std::error_code EC = AggrEntry.getError())
1709 return EC;
1711 for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset})
1712 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1713 BF->setHasProfileAvailable();
1715 AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1718 return std::error_code();
1721 void DataAggregator::processPreAggregated() {
1722 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1723 NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1724 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1726 uint64_t NumTraces = 0;
1727 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1728 switch (AggrEntry.EntryType) {
1729 case AggregatedLBREntry::BRANCH:
1730 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1731 AggrEntry.Mispreds);
1732 break;
1733 case AggregatedLBREntry::FT:
1734 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1735 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1736 ? AggrEntry.From.Offset
1737 : 0,
1738 AggrEntry.From.Offset, false};
1739 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1740 doTrace(First, Second, AggrEntry.Count);
1741 NumTraces += AggrEntry.Count;
1742 break;
1747 outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1748 << " aggregated LBR entries\n";
1749 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1750 << NumInvalidTraces;
1751 float Perc = 0.0f;
1752 if (NumTraces > 0) {
1753 outs() << " (";
1754 Perc = NumInvalidTraces * 100.0f / NumTraces;
1755 if (outs().has_colors()) {
1756 if (Perc > 10.0f)
1757 outs().changeColor(raw_ostream::RED);
1758 else if (Perc > 5.0f)
1759 outs().changeColor(raw_ostream::YELLOW);
1760 else
1761 outs().changeColor(raw_ostream::GREEN);
1763 outs() << format("%.1f%%", Perc);
1764 if (outs().has_colors())
1765 outs().resetColor();
1766 outs() << ")";
1768 outs() << "\n";
1769 if (Perc > 10.0f)
1770 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1771 "binary is probably not the same binary used during profiling "
1772 "collection. The generated data may be ineffective for improving "
1773 "performance.\n\n";
1775 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1776 << NumLongRangeTraces;
1777 if (NumTraces > 0)
1778 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1779 outs() << "\n";
1782 std::optional<int32_t> DataAggregator::parseCommExecEvent() {
1783 size_t LineEnd = ParsingBuf.find_first_of("\n");
1784 if (LineEnd == StringRef::npos) {
1785 reportError("expected rest of line");
1786 Diag << "Found: " << ParsingBuf << "\n";
1787 return std::nullopt;
1789 StringRef Line = ParsingBuf.substr(0, LineEnd);
1791 size_t Pos = Line.find("PERF_RECORD_COMM exec");
1792 if (Pos == StringRef::npos)
1793 return std::nullopt;
1794 Line = Line.drop_front(Pos);
1796 // Line:
1797 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1798 StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1799 int32_t PID;
1800 if (PIDStr.getAsInteger(10, PID)) {
1801 reportError("expected PID");
1802 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1803 return std::nullopt;
1806 return PID;
1809 namespace {
1810 std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1811 const StringRef SecTimeStr = TimeStr.split('.').first;
1812 const StringRef USecTimeStr = TimeStr.split('.').second;
1813 uint64_t SecTime;
1814 uint64_t USecTime;
1815 if (SecTimeStr.getAsInteger(10, SecTime) ||
1816 USecTimeStr.getAsInteger(10, USecTime))
1817 return std::nullopt;
1818 return SecTime * 1000000ULL + USecTime;
1822 std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1823 while (checkAndConsumeFS()) {
1826 size_t LineEnd = ParsingBuf.find_first_of("\n");
1827 if (LineEnd == StringRef::npos) {
1828 reportError("expected rest of line");
1829 Diag << "Found: " << ParsingBuf << "\n";
1830 return std::nullopt;
1832 StringRef Line = ParsingBuf.substr(0, LineEnd);
1834 size_t Pos = Line.find("PERF_RECORD_FORK");
1835 if (Pos == StringRef::npos) {
1836 consumeRestOfLine();
1837 return std::nullopt;
1840 ForkInfo FI;
1842 const StringRef TimeStr =
1843 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1844 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1845 FI.Time = *TimeRes;
1848 Line = Line.drop_front(Pos);
1850 // Line:
1851 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1852 const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1853 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1854 reportError("expected PID");
1855 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1856 return std::nullopt;
1859 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1860 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1861 reportError("expected PID");
1862 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1863 return std::nullopt;
1866 consumeRestOfLine();
1868 return FI;
1871 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1872 DataAggregator::parseMMapEvent() {
1873 while (checkAndConsumeFS()) {
1876 MMapInfo ParsedInfo;
1878 size_t LineEnd = ParsingBuf.find_first_of("\n");
1879 if (LineEnd == StringRef::npos) {
1880 reportError("expected rest of line");
1881 Diag << "Found: " << ParsingBuf << "\n";
1882 return make_error_code(llvm::errc::io_error);
1884 StringRef Line = ParsingBuf.substr(0, LineEnd);
1886 size_t Pos = Line.find("PERF_RECORD_MMAP2");
1887 if (Pos == StringRef::npos) {
1888 consumeRestOfLine();
1889 return std::make_pair(StringRef(), ParsedInfo);
1892 // Line:
1893 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1895 const StringRef TimeStr =
1896 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1897 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1898 ParsedInfo.Time = *TimeRes;
1900 Line = Line.drop_front(Pos);
1902 // Line:
1903 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1905 StringRef FileName = Line.rsplit(FieldSeparator).second;
1906 if (FileName.startswith("//") || FileName.startswith("[")) {
1907 consumeRestOfLine();
1908 return std::make_pair(StringRef(), ParsedInfo);
1910 FileName = sys::path::filename(FileName);
1912 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1913 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1914 reportError("expected PID");
1915 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1916 return make_error_code(llvm::errc::io_error);
1919 const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1920 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1921 reportError("expected base address");
1922 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1923 return make_error_code(llvm::errc::io_error);
1926 const StringRef SizeStr = Line.split('(').second.split(')').first;
1927 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1928 reportError("expected mmaped size");
1929 Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1930 return make_error_code(llvm::errc::io_error);
1933 const StringRef OffsetStr =
1934 Line.split('@').second.ltrim().split(FieldSeparator).first;
1935 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1936 reportError("expected mmaped page-aligned offset");
1937 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1938 return make_error_code(llvm::errc::io_error);
1941 consumeRestOfLine();
1943 return std::make_pair(FileName, ParsedInfo);
1946 std::error_code DataAggregator::parseMMapEvents() {
1947 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1948 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1949 TimerGroupDesc, opts::TimeAggregator);
1951 std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1952 while (hasData()) {
1953 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1954 if (std::error_code EC = FileMMapInfoRes.getError())
1955 return EC;
1957 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1958 if (FileMMapInfo.second.PID == -1)
1959 continue;
1960 if (FileMMapInfo.first.equals("(deleted)"))
1961 continue;
1963 // Consider only the first mapping of the file for any given PID
1964 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1965 bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) {
1966 return MI.second.PID == FileMMapInfo.second.PID;
1969 if (PIDExists)
1970 continue;
1972 GlobalMMapInfo.insert(FileMMapInfo);
1975 LLVM_DEBUG({
1976 dbgs() << "FileName -> mmap info:\n"
1977 << " Filename : PID [MMapAddr, Size, Offset]\n";
1978 for (const auto &[Name, MMap] : GlobalMMapInfo)
1979 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID,
1980 MMap.MMapAddress, MMap.Size, MMap.Offset);
1983 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
1984 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
1985 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1986 << "\" for profile matching\n";
1987 NameToUse = BuildIDBinaryName;
1990 auto Range = GlobalMMapInfo.equal_range(NameToUse);
1991 for (MMapInfo &MMapInfo : llvm::make_second_range(make_range(Range))) {
1992 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
1993 // Check that the binary mapping matches one of the segments.
1994 bool MatchFound = llvm::any_of(
1995 llvm::make_second_range(BC->SegmentMapInfo),
1996 [&](SegmentInfo &SegInfo) {
1997 // The mapping is page-aligned and hence the MMapAddress could be
1998 // different from the segment start address. We cannot know the page
1999 // size of the mapping, but we know it should not exceed the segment
2000 // alignment value. Hence we are performing an approximate check.
2001 return SegInfo.Address >= MMapInfo.MMapAddress &&
2002 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment;
2004 if (!MatchFound) {
2005 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2006 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2007 continue;
2011 // Set base address for shared objects.
2012 if (!BC->HasFixedLoadAddress) {
2013 std::optional<uint64_t> BaseAddress =
2014 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2015 if (!BaseAddress) {
2016 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2017 "binary when memory mapped at 0x"
2018 << Twine::utohexstr(MMapInfo.MMapAddress)
2019 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2020 << ". Ignoring profile data for this mapping\n";
2021 continue;
2022 } else {
2023 MMapInfo.BaseAddress = *BaseAddress;
2027 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2030 if (BinaryMMapInfo.empty()) {
2031 if (errs().has_colors())
2032 errs().changeColor(raw_ostream::RED);
2033 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2034 << BC->getFilename() << "\".";
2035 if (!GlobalMMapInfo.empty()) {
2036 errs() << " Profile for the following binary name(s) is available:\n";
2037 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2038 I = GlobalMMapInfo.upper_bound(I->first))
2039 errs() << " " << I->first << '\n';
2040 errs() << "Please rename the input binary.\n";
2041 } else {
2042 errs() << " Failed to extract any binary name from a profile.\n";
2044 if (errs().has_colors())
2045 errs().resetColor();
2047 exit(1);
2050 return std::error_code();
2053 std::error_code DataAggregator::parseTaskEvents() {
2054 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2055 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2056 TimerGroupDesc, opts::TimeAggregator);
2058 while (hasData()) {
2059 if (std::optional<int32_t> CommInfo = parseCommExecEvent()) {
2060 // Remove forked child that ran execve
2061 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2062 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2063 BinaryMMapInfo.erase(MMapInfoIter);
2064 consumeRestOfLine();
2065 continue;
2068 std::optional<ForkInfo> ForkInfo = parseForkEvent();
2069 if (!ForkInfo)
2070 continue;
2072 if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2073 continue;
2075 if (ForkInfo->Time == 0) {
2076 // Process was forked and mmaped before perf ran. In this case the child
2077 // should have its own mmap entry unless it was execve'd.
2078 continue;
2081 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2082 if (MMapInfoIter == BinaryMMapInfo.end())
2083 continue;
2085 MMapInfo MMapInfo = MMapInfoIter->second;
2086 MMapInfo.PID = ForkInfo->ChildPID;
2087 MMapInfo.Forked = true;
2088 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2091 outs() << "PERF2BOLT: input binary is associated with "
2092 << BinaryMMapInfo.size() << " PID(s)\n";
2094 LLVM_DEBUG({
2095 for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo))
2096 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID,
2097 (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress,
2098 MMI.Size);
2101 return std::error_code();
2104 std::optional<std::pair<StringRef, StringRef>>
2105 DataAggregator::parseNameBuildIDPair() {
2106 while (checkAndConsumeFS()) {
2109 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2110 if (std::error_code EC = BuildIDStr.getError())
2111 return std::nullopt;
2113 // If one of the strings is missing, don't issue a parsing error, but still
2114 // do not return a value.
2115 consumeAllRemainingFS();
2116 if (checkNewLine())
2117 return std::nullopt;
2119 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2120 if (std::error_code EC = NameStr.getError())
2121 return std::nullopt;
2123 consumeRestOfLine();
2124 return std::make_pair(NameStr.get(), BuildIDStr.get());
2127 bool DataAggregator::hasAllBuildIDs() {
2128 const StringRef SavedParsingBuf = ParsingBuf;
2130 if (!hasData())
2131 return false;
2133 bool HasInvalidEntries = false;
2134 while (hasData()) {
2135 if (!parseNameBuildIDPair()) {
2136 HasInvalidEntries = true;
2137 break;
2141 ParsingBuf = SavedParsingBuf;
2143 return !HasInvalidEntries;
2146 std::optional<StringRef>
2147 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2148 const StringRef SavedParsingBuf = ParsingBuf;
2150 StringRef FileName;
2151 while (hasData()) {
2152 std::optional<std::pair<StringRef, StringRef>> IDPair =
2153 parseNameBuildIDPair();
2154 if (!IDPair) {
2155 consumeRestOfLine();
2156 continue;
2159 if (IDPair->second.startswith(FileBuildID)) {
2160 FileName = sys::path::filename(IDPair->first);
2161 break;
2165 ParsingBuf = SavedParsingBuf;
2167 if (!FileName.empty())
2168 return FileName;
2170 return std::nullopt;
2173 std::error_code
2174 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2175 std::error_code EC;
2176 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2177 if (EC)
2178 return EC;
2180 bool WriteMemLocs = false;
2182 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2183 if (WriteMemLocs)
2184 OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2185 else
2186 OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2187 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2188 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2191 uint64_t BranchValues = 0;
2192 uint64_t MemValues = 0;
2194 if (BAT)
2195 OutFile << "boltedcollection\n";
2196 if (opts::BasicAggregation) {
2197 OutFile << "no_lbr";
2198 for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
2199 OutFile << " " << Entry.getKey();
2200 OutFile << "\n";
2202 for (const auto &KV : NamesToSamples) {
2203 const FuncSampleData &FSD = KV.second;
2204 for (const SampleInfo &SI : FSD.Data) {
2205 writeLocation(SI.Loc);
2206 OutFile << SI.Hits << "\n";
2207 ++BranchValues;
2210 } else {
2211 for (const auto &KV : NamesToBranches) {
2212 const FuncBranchData &FBD = KV.second;
2213 for (const llvm::bolt::BranchInfo &BI : FBD.Data) {
2214 writeLocation(BI.From);
2215 writeLocation(BI.To);
2216 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2217 ++BranchValues;
2219 for (const llvm::bolt::BranchInfo &BI : FBD.EntryData) {
2220 // Do not output if source is a known symbol, since this was already
2221 // accounted for in the source function
2222 if (BI.From.IsSymbol)
2223 continue;
2224 writeLocation(BI.From);
2225 writeLocation(BI.To);
2226 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2227 ++BranchValues;
2231 WriteMemLocs = true;
2232 for (const auto &KV : NamesToMemEvents) {
2233 const FuncMemData &FMD = KV.second;
2234 for (const MemInfo &MemEvent : FMD.Data) {
2235 writeLocation(MemEvent.Offset);
2236 writeLocation(MemEvent.Addr);
2237 OutFile << MemEvent.Count << "\n";
2238 ++MemValues;
2243 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2244 << " memory objects to " << OutputFilename << "\n";
2246 return std::error_code();
2249 void DataAggregator::dump() const { DataReader::dump(); }
2251 void DataAggregator::dump(const LBREntry &LBR) const {
2252 Diag << "From: " << Twine::utohexstr(LBR.From)
2253 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2254 << "\n";
2257 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2258 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2259 for (const LBREntry &LBR : Sample.LBR)
2260 dump(LBR);
2263 void DataAggregator::dump(const PerfMemSample &Sample) const {
2264 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";