[LLVM][IR] Use splat syntax when printing ConstantExpr based splats. (#116856)
[llvm-project.git] / bolt / lib / Profile / DataAggregator.cpp
blob697cac9fbcaa0886c3da6e20962aea7bc57ef989
1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Passes/BinaryPasses.h"
18 #include "bolt/Profile/BoltAddressTranslation.h"
19 #include "bolt/Profile/Heatmap.h"
20 #include "bolt/Profile/YAMLProfileWriter.h"
21 #include "bolt/Utils/CommandLineOpts.h"
22 #include "bolt/Utils/Utils.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/Errc.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Program.h"
32 #include "llvm/Support/Regex.h"
33 #include "llvm/Support/Timer.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <map>
36 #include <optional>
37 #include <unordered_map>
38 #include <utility>
40 #define DEBUG_TYPE "aggregator"
42 using namespace llvm;
43 using namespace bolt;
45 namespace opts {
47 static cl::opt<bool>
48 BasicAggregation("nl",
49 cl::desc("aggregate basic samples (without LBR info)"),
50 cl::cat(AggregatorCategory));
52 static cl::opt<std::string>
53 ITraceAggregation("itrace",
54 cl::desc("Generate LBR info with perf itrace argument"),
55 cl::cat(AggregatorCategory));
57 static cl::opt<bool>
58 FilterMemProfile("filter-mem-profile",
59 cl::desc("if processing a memory profile, filter out stack or heap accesses "
60 "that won't be useful for BOLT to reduce profile file size"),
61 cl::init(true),
62 cl::cat(AggregatorCategory));
64 static cl::opt<unsigned long long>
65 FilterPID("pid",
66 cl::desc("only use samples from process with specified PID"),
67 cl::init(0),
68 cl::Optional,
69 cl::cat(AggregatorCategory));
71 static cl::opt<bool>
72 IgnoreBuildID("ignore-build-id",
73 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
74 cl::init(false),
75 cl::cat(AggregatorCategory));
77 static cl::opt<bool> IgnoreInterruptLBR(
78 "ignore-interrupt-lbr",
79 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
80 cl::init(true), cl::cat(AggregatorCategory));
82 static cl::opt<unsigned long long>
83 MaxSamples("max-samples",
84 cl::init(-1ULL),
85 cl::desc("maximum number of samples to read from LBR profile"),
86 cl::Optional,
87 cl::Hidden,
88 cl::cat(AggregatorCategory));
90 extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
91 extern cl::opt<bool> ProfileWritePseudoProbes;
92 extern cl::opt<std::string> SaveProfile;
94 cl::opt<bool> ReadPreAggregated(
95 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
96 cl::cat(AggregatorCategory));
98 static cl::opt<bool>
99 TimeAggregator("time-aggr",
100 cl::desc("time BOLT aggregator"),
101 cl::init(false),
102 cl::ZeroOrMore,
103 cl::cat(AggregatorCategory));
105 static cl::opt<bool>
106 UseEventPC("use-event-pc",
107 cl::desc("use event PC in combination with LBR sampling"),
108 cl::cat(AggregatorCategory));
110 static cl::opt<bool> WriteAutoFDOData(
111 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
112 cl::cat(AggregatorCategory));
114 } // namespace opts
116 namespace {
118 const char TimerGroupName[] = "aggregator";
119 const char TimerGroupDesc[] = "Aggregator";
121 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
122 std::vector<SectionNameAndRange> sections;
123 for (BinarySection &Section : BC->sections()) {
124 if (!Section.isText())
125 continue;
126 if (Section.getSize() == 0)
127 continue;
128 sections.push_back(
129 {Section.getName(), Section.getAddress(), Section.getEndAddress()});
131 llvm::sort(sections,
132 [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
133 return A.BeginAddress < B.BeginAddress;
135 return sections;
139 constexpr uint64_t DataAggregator::KernelBaseAddr;
141 DataAggregator::~DataAggregator() { deleteTempFiles(); }
143 namespace {
144 void deleteTempFile(const std::string &FileName) {
145 if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
146 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
147 << " with error " << Errc.message() << "\n";
151 void DataAggregator::deleteTempFiles() {
152 for (std::string &FileName : TempFiles)
153 deleteTempFile(FileName);
154 TempFiles.clear();
157 void DataAggregator::findPerfExecutable() {
158 std::optional<std::string> PerfExecutable =
159 sys::Process::FindInEnvPath("PATH", "perf");
160 if (!PerfExecutable) {
161 outs() << "PERF2BOLT: No perf executable found!\n";
162 exit(1);
164 PerfPath = *PerfExecutable;
167 void DataAggregator::start() {
168 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
170 // Don't launch perf for pre-aggregated files
171 if (opts::ReadPreAggregated)
172 return;
174 findPerfExecutable();
176 if (opts::BasicAggregation) {
177 launchPerfProcess("events without LBR",
178 MainEventsPPI,
179 "script -F pid,event,ip",
180 /*Wait = */false);
181 } else if (!opts::ITraceAggregation.empty()) {
182 std::string ItracePerfScriptArgs = llvm::formatv(
183 "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation);
184 launchPerfProcess("branch events with itrace", MainEventsPPI,
185 ItracePerfScriptArgs.c_str(),
186 /*Wait = */ false);
187 } else {
188 launchPerfProcess("branch events",
189 MainEventsPPI,
190 "script -F pid,ip,brstack",
191 /*Wait = */false);
194 // Note: we launch script for mem events regardless of the option, as the
195 // command fails fairly fast if mem events were not collected.
196 launchPerfProcess("mem events",
197 MemEventsPPI,
198 "script -F pid,event,addr,ip",
199 /*Wait = */false);
201 launchPerfProcess("process events", MMapEventsPPI,
202 "script --show-mmap-events --no-itrace",
203 /*Wait = */ false);
205 launchPerfProcess("task events", TaskEventsPPI,
206 "script --show-task-events --no-itrace",
207 /*Wait = */ false);
210 void DataAggregator::abort() {
211 if (opts::ReadPreAggregated)
212 return;
214 std::string Error;
216 // Kill subprocesses in case they are not finished
217 sys::Wait(TaskEventsPPI.PI, 1, &Error);
218 sys::Wait(MMapEventsPPI.PI, 1, &Error);
219 sys::Wait(MainEventsPPI.PI, 1, &Error);
220 sys::Wait(MemEventsPPI.PI, 1, &Error);
222 deleteTempFiles();
224 exit(1);
227 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
228 const char *ArgsString, bool Wait) {
229 SmallVector<StringRef, 4> Argv;
231 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
232 Argv.push_back(PerfPath.data());
234 StringRef(ArgsString).split(Argv, ' ');
235 Argv.push_back("-f");
236 Argv.push_back("-i");
237 Argv.push_back(Filename.c_str());
239 if (std::error_code Errc =
240 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
241 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
242 << " with error " << Errc.message() << "\n";
243 exit(1);
245 TempFiles.push_back(PPI.StdoutPath.data());
247 if (std::error_code Errc =
248 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
249 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
250 << " with error " << Errc.message() << "\n";
251 exit(1);
253 TempFiles.push_back(PPI.StderrPath.data());
255 std::optional<StringRef> Redirects[] = {
256 std::nullopt, // Stdin
257 StringRef(PPI.StdoutPath.data()), // Stdout
258 StringRef(PPI.StderrPath.data())}; // Stderr
260 LLVM_DEBUG({
261 dbgs() << "Launching perf: ";
262 for (StringRef Arg : Argv)
263 dbgs() << Arg << " ";
264 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
265 << "\n";
268 if (Wait)
269 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
270 /*envp*/ std::nullopt, Redirects);
271 else
272 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ std::nullopt,
273 Redirects);
276 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
277 PerfProcessInfo BuildIDProcessInfo;
278 launchPerfProcess("buildid list",
279 BuildIDProcessInfo,
280 "buildid-list",
281 /*Wait = */true);
283 if (BuildIDProcessInfo.PI.ReturnCode != 0) {
284 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
285 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
286 StringRef ErrBuf = (*MB)->getBuffer();
288 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
289 << '\n';
290 errs() << ErrBuf;
291 return;
294 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
295 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
296 if (std::error_code EC = MB.getError()) {
297 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
298 << EC.message() << "\n";
299 return;
302 FileBuf = std::move(*MB);
303 ParsingBuf = FileBuf->getBuffer();
305 std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
306 if (!FileName) {
307 if (hasAllBuildIDs()) {
308 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
309 "This indicates the input binary supplied for data aggregation "
310 "is not the same recorded by perf when collecting profiling "
311 "data, or there were no samples recorded for the binary. "
312 "Use -ignore-build-id option to override.\n";
313 if (!opts::IgnoreBuildID)
314 abort();
315 } else {
316 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
317 "data was recorded without it\n";
318 return;
320 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
321 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
322 BuildIDBinaryName = std::string(*FileName);
323 } else {
324 outs() << "PERF2BOLT: matched build-id and file name\n";
328 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
329 if (opts::ReadPreAggregated)
330 return true;
332 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
333 if (!FD) {
334 consumeError(FD.takeError());
335 return false;
338 char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
340 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
341 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
342 *FD, MutableArrayRef(Buf, sizeof(Buf)), 0);
343 if (!BytesRead) {
344 consumeError(BytesRead.takeError());
345 return false;
348 if (*BytesRead != 7)
349 return false;
351 if (strncmp(Buf, "PERFILE", 7) == 0)
352 return true;
353 return false;
356 void DataAggregator::parsePreAggregated() {
357 std::string Error;
359 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
360 MemoryBuffer::getFileOrSTDIN(Filename);
361 if (std::error_code EC = MB.getError()) {
362 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
363 << EC.message() << "\n";
364 exit(1);
367 FileBuf = std::move(*MB);
368 ParsingBuf = FileBuf->getBuffer();
369 Col = 0;
370 Line = 1;
371 if (parsePreAggregatedLBRSamples()) {
372 errs() << "PERF2BOLT: failed to parse samples\n";
373 exit(1);
377 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
378 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
379 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
380 TimerGroupDesc, opts::TimeAggregator);
382 std::error_code EC;
383 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
384 if (EC)
385 return EC;
387 // Format:
388 // number of unique traces
389 // from_1-to_1:count_1
390 // from_2-to_2:count_2
391 // ......
392 // from_n-to_n:count_n
393 // number of unique sample addresses
394 // addr_1:count_1
395 // addr_2:count_2
396 // ......
397 // addr_n:count_n
398 // number of unique LBR entries
399 // src_1->dst_1:count_1
400 // src_2->dst_2:count_2
401 // ......
402 // src_n->dst_n:count_n
404 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
406 // AutoFDO addresses are relative to the first allocated loadable program
407 // segment
408 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
409 if (Address < FirstAllocAddress)
410 return 0;
411 return Address - FirstAllocAddress;
414 OutFile << FallthroughLBRs.size() << "\n";
415 for (const auto &[Trace, Info] : FallthroughLBRs) {
416 OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
417 filterAddress(Trace.To),
418 Info.InternCount + Info.ExternCount);
421 OutFile << BasicSamples.size() << "\n";
422 for (const auto [PC, HitCount] : BasicSamples)
423 OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);
425 OutFile << BranchLBRs.size() << "\n";
426 for (const auto &[Trace, Info] : BranchLBRs) {
427 OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
428 filterAddress(Trace.To), Info.TakenCount);
431 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
432 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
433 << " unique branches to " << OutputFilename << "\n";
435 return std::error_code();
438 void DataAggregator::filterBinaryMMapInfo() {
439 if (opts::FilterPID) {
440 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
441 if (MMapInfoIter != BinaryMMapInfo.end()) {
442 MMapInfo MMap = MMapInfoIter->second;
443 BinaryMMapInfo.clear();
444 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
445 } else {
446 if (errs().has_colors())
447 errs().changeColor(raw_ostream::RED);
448 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
449 << opts::FilterPID << "\""
450 << " for binary \"" << BC->getFilename() << "\".";
451 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
452 errs() << " Profile for the following process is available:\n";
453 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
454 outs() << " " << MMI.second.PID
455 << (MMI.second.Forked ? " (forked)\n" : "\n");
457 if (errs().has_colors())
458 errs().resetColor();
460 exit(1);
465 int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
466 PerfProcessErrorCallbackTy Callback) {
467 std::string Error;
468 outs() << "PERF2BOLT: waiting for perf " << Name
469 << " collection to finish...\n";
470 sys::ProcessInfo PI = sys::Wait(Process.PI, std::nullopt, &Error);
472 if (!Error.empty()) {
473 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
474 deleteTempFiles();
475 exit(1);
478 if (PI.ReturnCode != 0) {
479 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
480 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
481 StringRef ErrBuf = (*ErrorMB)->getBuffer();
483 deleteTempFiles();
484 Callback(PI.ReturnCode, ErrBuf);
485 return PI.ReturnCode;
488 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
489 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
490 if (std::error_code EC = MB.getError()) {
491 errs() << "Cannot open " << Process.StdoutPath.data() << ": "
492 << EC.message() << "\n";
493 deleteTempFiles();
494 exit(1);
497 FileBuf = std::move(*MB);
498 ParsingBuf = FileBuf->getBuffer();
499 Col = 0;
500 Line = 1;
501 return PI.ReturnCode;
504 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
505 this->BC = &BC;
507 if (opts::ReadPreAggregated) {
508 parsePreAggregated();
509 return Error::success();
512 if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
513 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
514 processFileBuildID(*FileBuildID);
515 } else {
516 errs() << "BOLT-WARNING: build-id will not be checked because we could "
517 "not read one from input binary\n";
520 auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
521 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
522 exit(1);
525 auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
526 Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
527 "Cannot print 'addr' field.");
528 if (!NoData.match(ErrBuf))
529 ErrorCallback(ReturnCode, ErrBuf);
532 if (BC.IsLinuxKernel) {
533 // Current MMap parsing logic does not work with linux kernel.
534 // MMap entries for linux kernel uses PERF_RECORD_MMAP
535 // format instead of typical PERF_RECORD_MMAP2 format.
536 // Since linux kernel address mapping is absolute (same as
537 // in the ELF file), we avoid parsing MMap in linux kernel mode.
538 // While generating optimized linux kernel binary, we may need
539 // to parse MMap entries.
541 // In linux kernel mode, we analyze and optimize
542 // all linux kernel binary instructions, irrespective
543 // of whether they are due to system calls or due to
544 // interrupts. Therefore, we cannot ignore interrupt
545 // in Linux kernel mode.
546 opts::IgnoreInterruptLBR = false;
547 } else {
548 prepareToParse("mmap events", MMapEventsPPI, ErrorCallback);
549 if (parseMMapEvents())
550 errs() << "PERF2BOLT: failed to parse mmap events\n";
553 prepareToParse("task events", TaskEventsPPI, ErrorCallback);
554 if (parseTaskEvents())
555 errs() << "PERF2BOLT: failed to parse task events\n";
557 filterBinaryMMapInfo();
558 prepareToParse("events", MainEventsPPI, ErrorCallback);
560 if (opts::HeatmapMode) {
561 if (std::error_code EC = printLBRHeatMap()) {
562 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
563 exit(1);
565 exit(0);
568 if ((!opts::BasicAggregation && parseBranchEvents()) ||
569 (opts::BasicAggregation && parseBasicEvents()))
570 errs() << "PERF2BOLT: failed to parse samples\n";
572 // We can finish early if the goal is just to generate data for autofdo
573 if (opts::WriteAutoFDOData) {
574 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
575 errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
577 deleteTempFiles();
578 exit(0);
581 // Special handling for memory events
582 if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
583 return Error::success();
585 if (const std::error_code EC = parseMemEvents())
586 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
587 << '\n';
589 deleteTempFiles();
591 return Error::success();
594 Error DataAggregator::readProfile(BinaryContext &BC) {
595 processProfile(BC);
597 for (auto &BFI : BC.getBinaryFunctions()) {
598 BinaryFunction &Function = BFI.second;
599 convertBranchData(Function);
602 if (opts::AggregateOnly) {
603 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
604 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
605 report_error("cannot create output data file", EC);
607 // BAT YAML is handled by DataAggregator since normal YAML output requires
608 // CFG which is not available in BAT mode.
609 if (usesBAT()) {
610 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
611 if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename))
612 report_error("cannot create output data file", EC);
613 if (!opts::SaveProfile.empty())
614 if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile))
615 report_error("cannot create output data file", EC);
619 return Error::success();
622 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
623 return Function.hasProfileAvailable();
626 void DataAggregator::processProfile(BinaryContext &BC) {
627 if (opts::ReadPreAggregated)
628 processPreAggregated();
629 else if (opts::BasicAggregation)
630 processBasicEvents();
631 else
632 processBranchEvents();
634 processMemEvents();
636 // Mark all functions with registered events as having a valid profile.
637 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
638 : BinaryFunction::PF_LBR;
639 for (auto &BFI : BC.getBinaryFunctions()) {
640 BinaryFunction &BF = BFI.second;
641 FuncBranchData *FBD = getBranchData(BF);
642 if (FBD || getFuncSampleData(BF.getNames())) {
643 BF.markProfiled(Flags);
644 if (FBD)
645 BF.RawBranchCount = FBD->getNumExecutedBranches();
649 for (auto &FuncBranches : NamesToBranches)
650 llvm::stable_sort(FuncBranches.second.Data);
652 for (auto &MemEvents : NamesToMemEvents)
653 llvm::stable_sort(MemEvents.second.Data);
655 // Release intermediate storage.
656 clear(BranchLBRs);
657 clear(FallthroughLBRs);
658 clear(AggregatedLBRs);
659 clear(BasicSamples);
660 clear(MemSamples);
663 BinaryFunction *
664 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
665 if (!BC->containsAddress(Address))
666 return nullptr;
668 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
669 /*UseMaxSize=*/true);
672 BinaryFunction *
673 DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
674 if (BAT)
675 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress()))
676 return getBinaryFunctionContainingAddress(HotAddr);
677 return nullptr;
680 StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
681 bool BAT) {
682 if (!BAT)
683 return Func.getOneName();
685 const BinaryFunction *OrigFunc = &Func;
686 // If it is a local function, prefer the name containing the file name where
687 // the local function was declared
688 for (StringRef AlternativeName : OrigFunc->getNames()) {
689 size_t FileNameIdx = AlternativeName.find('/');
690 // Confirm the alternative name has the pattern Symbol/FileName/1 before
691 // using it
692 if (FileNameIdx == StringRef::npos ||
693 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
694 continue;
695 return AlternativeName;
697 return OrigFunc->getOneName();
700 bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
701 uint64_t Count) {
702 BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
703 BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
704 if (ParentFunc)
705 NumColdSamples += Count;
707 auto I = NamesToSamples.find(Func.getOneName());
708 if (I == NamesToSamples.end()) {
709 bool Success;
710 StringRef LocName = getLocationName(Func, BAT);
711 std::tie(I, Success) = NamesToSamples.insert(
712 std::make_pair(Func.getOneName(),
713 FuncSampleData(LocName, FuncSampleData::ContainerTy())));
716 Address -= Func.getAddress();
717 if (BAT)
718 Address = BAT->translate(Func.getAddress(), Address, /*IsBranchSrc=*/false);
720 I->second.bumpCount(Address, Count);
721 return true;
724 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
725 uint64_t To, uint64_t Count,
726 uint64_t Mispreds) {
727 FuncBranchData *AggrData = getBranchData(Func);
728 if (!AggrData) {
729 AggrData = &NamesToBranches[Func.getOneName()];
730 AggrData->Name = getLocationName(Func, BAT);
731 setBranchData(Func, AggrData);
734 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
735 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
736 AggrData->bumpBranchCount(From, To, Count, Mispreds);
737 return true;
740 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
741 BinaryFunction *ToFunc, uint64_t From,
742 uint64_t To, uint64_t Count,
743 uint64_t Mispreds) {
744 FuncBranchData *FromAggrData = nullptr;
745 FuncBranchData *ToAggrData = nullptr;
746 StringRef SrcFunc;
747 StringRef DstFunc;
748 if (FromFunc) {
749 SrcFunc = getLocationName(*FromFunc, BAT);
750 FromAggrData = getBranchData(*FromFunc);
751 if (!FromAggrData) {
752 FromAggrData = &NamesToBranches[FromFunc->getOneName()];
753 FromAggrData->Name = SrcFunc;
754 setBranchData(*FromFunc, FromAggrData);
757 recordExit(*FromFunc, From, Mispreds, Count);
759 if (ToFunc) {
760 DstFunc = getLocationName(*ToFunc, BAT);
761 ToAggrData = getBranchData(*ToFunc);
762 if (!ToAggrData) {
763 ToAggrData = &NamesToBranches[ToFunc->getOneName()];
764 ToAggrData->Name = DstFunc;
765 setBranchData(*ToFunc, ToAggrData);
768 recordEntry(*ToFunc, To, Mispreds, Count);
771 if (FromAggrData)
772 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
773 Count, Mispreds);
774 if (ToAggrData)
775 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
776 Count, Mispreds);
777 return true;
780 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
781 uint64_t Mispreds, bool IsPreagg) {
782 // Returns whether \p Offset in \p Func contains a return instruction.
783 auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
784 auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
785 return Func.hasInstructions()
786 ? isReturn(Func.getInstructionAtOffset(Offset))
787 : isReturn(Func.disassembleInstructionAtOffset(Offset));
790 // Returns whether \p Offset in \p Func may be a call continuation excluding
791 // entry points and landing pads.
792 auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) {
793 // No call continuation at a function start.
794 if (!Offset)
795 return false;
797 // FIXME: support BAT case where the function might be in empty state
798 // (split fragments declared non-simple).
799 if (!Func.hasCFG())
800 return false;
802 // The offset should not be an entry point or a landing pad.
803 const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
804 return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad();
807 // Mutates \p Addr to an offset into the containing function, performing BAT
808 // offset translation and parent lookup.
810 // Returns the containing function (or BAT parent) and whether the address
811 // corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
812 auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
813 BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
814 if (!Func)
815 return std::pair{Func, false};
817 Addr -= Func->getAddress();
819 bool IsRetOrCallCont =
820 IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);
822 if (BAT)
823 Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
825 BinaryFunction *ParentFunc = getBATParentFunction(*Func);
826 if (!ParentFunc)
827 return std::pair{Func, IsRetOrCallCont};
829 if (IsFrom)
830 NumColdSamples += Count;
832 return std::pair{ParentFunc, IsRetOrCallCont};
835 uint64_t ToOrig = To;
836 auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true);
837 auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false);
838 if (!FromFunc && !ToFunc)
839 return false;
841 // Record call to continuation trace.
842 if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) {
843 LBREntry First{ToOrig - 1, ToOrig - 1, false};
844 LBREntry Second{ToOrig, ToOrig, false};
845 return doTrace(First, Second, Count);
847 // Ignore returns.
848 if (IsReturn)
849 return true;
851 // Treat recursive control transfers as inter-branches.
852 if (FromFunc == ToFunc && To != 0) {
853 recordBranch(*FromFunc, From, To, Count, Mispreds);
854 return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
857 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
860 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
861 uint64_t Count) {
862 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
863 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
864 if (!FromFunc || !ToFunc) {
865 LLVM_DEBUG({
866 dbgs() << "Out of range trace starting in ";
867 if (FromFunc)
868 dbgs() << formatv("{0} @ {1:x}", *FromFunc,
869 First.To - FromFunc->getAddress());
870 else
871 dbgs() << Twine::utohexstr(First.To);
872 dbgs() << " and ending in ";
873 if (ToFunc)
874 dbgs() << formatv("{0} @ {1:x}", *ToFunc,
875 Second.From - ToFunc->getAddress());
876 else
877 dbgs() << Twine::utohexstr(Second.From);
878 dbgs() << '\n';
880 NumLongRangeTraces += Count;
881 return false;
883 if (FromFunc != ToFunc) {
884 NumInvalidTraces += Count;
885 LLVM_DEBUG({
886 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
887 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
888 << " and ending in " << ToFunc->getPrintName()
889 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
891 return false;
894 // Set ParentFunc to BAT parent function or FromFunc itself.
895 BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
896 if (!ParentFunc)
897 ParentFunc = FromFunc;
898 ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);
900 std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
901 BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
902 Second.From)
903 : getFallthroughsInTrace(*FromFunc, First, Second, Count);
904 if (!FTs) {
905 LLVM_DEBUG(
906 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
907 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
908 << " and ending in " << ToFunc->getPrintName() << " @ "
909 << ToFunc->getPrintName() << " @ "
910 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
911 NumInvalidTraces += Count;
912 return false;
915 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
916 << FromFunc->getPrintName() << ":"
917 << Twine::utohexstr(First.To) << " to "
918 << Twine::utohexstr(Second.From) << ".\n");
919 for (auto [From, To] : *FTs) {
920 if (BAT) {
921 From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
922 To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
924 doIntraBranch(*ParentFunc, From, To, Count, false);
927 return true;
930 std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
931 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
932 const LBREntry &FirstLBR,
933 const LBREntry &SecondLBR,
934 uint64_t Count) const {
935 SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches;
937 BinaryContext &BC = BF.getBinaryContext();
939 if (!BF.isSimple())
940 return std::nullopt;
942 assert(BF.hasCFG() && "can only record traces in CFG state");
944 // Offsets of the trace within this function.
945 const uint64_t From = FirstLBR.To - BF.getAddress();
946 const uint64_t To = SecondLBR.From - BF.getAddress();
948 if (From > To)
949 return std::nullopt;
951 const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
952 const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
954 if (!FromBB || !ToBB)
955 return std::nullopt;
957 // Adjust FromBB if the first LBR is a return from the last instruction in
958 // the previous block (that instruction should be a call).
959 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
960 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
961 const BinaryBasicBlock *PrevBB =
962 BF.getLayout().getBlock(FromBB->getIndex() - 1);
963 if (PrevBB->getSuccessor(FromBB->getLabel())) {
964 const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
965 if (Instr && BC.MIB->isCall(*Instr))
966 FromBB = PrevBB;
967 else
968 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
969 << '\n');
970 } else {
971 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
975 // Fill out information for fall-through edges. The From and To could be
976 // within the same basic block, e.g. when two call instructions are in the
977 // same block. In this case we skip the processing.
978 if (FromBB == ToBB)
979 return Branches;
981 // Process blocks in the original layout order.
982 BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex());
983 assert(BB == FromBB && "index mismatch");
984 while (BB != ToBB) {
985 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1);
986 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
988 // Check for bad LBRs.
989 if (!BB->getSuccessor(NextBB->getLabel())) {
990 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
991 << " " << FirstLBR << '\n'
992 << " " << SecondLBR << '\n');
993 return std::nullopt;
996 const MCInst *Instr = BB->getLastNonPseudoInstr();
997 uint64_t Offset = 0;
998 if (Instr)
999 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
1000 else
1001 Offset = BB->getOffset();
1003 Branches.emplace_back(Offset, NextBB->getOffset());
1005 BB = NextBB;
1008 // Record fall-through jumps
1009 for (const auto &[FromOffset, ToOffset] : Branches) {
1010 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(FromOffset);
1011 BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(ToOffset);
1012 assert(FromBB && ToBB);
1013 BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
1014 BI.Count += Count;
1017 return Branches;
1020 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
1021 uint64_t Count) const {
1022 if (To > BF.getSize())
1023 return false;
1025 if (!BF.hasProfile())
1026 BF.ExecutionCount = 0;
1028 BinaryBasicBlock *EntryBB = nullptr;
1029 if (To == 0) {
1030 BF.ExecutionCount += Count;
1031 if (!BF.empty())
1032 EntryBB = &BF.front();
1033 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
1034 if (BB->isEntryPoint())
1035 EntryBB = BB;
1038 if (EntryBB)
1039 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
1041 return true;
1044 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
1045 uint64_t Count) const {
1046 if (!BF.isSimple() || From > BF.getSize())
1047 return false;
1049 if (!BF.hasProfile())
1050 BF.ExecutionCount = 0;
1052 return true;
1055 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
1056 LBREntry Res;
1057 ErrorOr<StringRef> FromStrRes = parseString('/');
1058 if (std::error_code EC = FromStrRes.getError())
1059 return EC;
1060 StringRef OffsetStr = FromStrRes.get();
1061 if (OffsetStr.getAsInteger(0, Res.From)) {
1062 reportError("expected hexadecimal number with From address");
1063 Diag << "Found: " << OffsetStr << "\n";
1064 return make_error_code(llvm::errc::io_error);
1067 ErrorOr<StringRef> ToStrRes = parseString('/');
1068 if (std::error_code EC = ToStrRes.getError())
1069 return EC;
1070 OffsetStr = ToStrRes.get();
1071 if (OffsetStr.getAsInteger(0, Res.To)) {
1072 reportError("expected hexadecimal number with To address");
1073 Diag << "Found: " << OffsetStr << "\n";
1074 return make_error_code(llvm::errc::io_error);
1077 ErrorOr<StringRef> MispredStrRes = parseString('/');
1078 if (std::error_code EC = MispredStrRes.getError())
1079 return EC;
1080 StringRef MispredStr = MispredStrRes.get();
1081 if (MispredStr.size() != 1 ||
1082 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1083 reportError("expected single char for mispred bit");
1084 Diag << "Found: " << MispredStr << "\n";
1085 return make_error_code(llvm::errc::io_error);
1087 Res.Mispred = MispredStr[0] == 'M';
1089 static bool MispredWarning = true;
1090 if (MispredStr[0] == '-' && MispredWarning) {
1091 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1092 MispredWarning = false;
1095 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1096 if (std::error_code EC = Rest.getError())
1097 return EC;
1098 if (Rest.get().size() < 5) {
1099 reportError("expected rest of LBR entry");
1100 Diag << "Found: " << Rest.get() << "\n";
1101 return make_error_code(llvm::errc::io_error);
1103 return Res;
1106 bool DataAggregator::checkAndConsumeFS() {
1107 if (ParsingBuf[0] != FieldSeparator)
1108 return false;
1110 ParsingBuf = ParsingBuf.drop_front(1);
1111 Col += 1;
1112 return true;
1115 void DataAggregator::consumeRestOfLine() {
1116 size_t LineEnd = ParsingBuf.find_first_of('\n');
1117 if (LineEnd == StringRef::npos) {
1118 ParsingBuf = StringRef();
1119 Col = 0;
1120 Line += 1;
1121 return;
1123 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1124 Col = 0;
1125 Line += 1;
1128 bool DataAggregator::checkNewLine() {
1129 return ParsingBuf[0] == '\n';
1132 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1133 PerfBranchSample Res;
1135 while (checkAndConsumeFS()) {
1138 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1139 if (std::error_code EC = PIDRes.getError())
1140 return EC;
1141 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1142 if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) {
1143 consumeRestOfLine();
1144 return make_error_code(errc::no_such_process);
1147 while (checkAndConsumeFS()) {
1150 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1151 if (std::error_code EC = PCRes.getError())
1152 return EC;
1153 Res.PC = PCRes.get();
1155 if (checkAndConsumeNewLine())
1156 return Res;
1158 while (!checkAndConsumeNewLine()) {
1159 checkAndConsumeFS();
1161 ErrorOr<LBREntry> LBRRes = parseLBREntry();
1162 if (std::error_code EC = LBRRes.getError())
1163 return EC;
1164 LBREntry LBR = LBRRes.get();
1165 if (ignoreKernelInterrupt(LBR))
1166 continue;
1167 if (!BC->HasFixedLoadAddress)
1168 adjustLBR(LBR, MMapInfoIter->second);
1169 Res.LBR.push_back(LBR);
1172 return Res;
1175 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1176 while (checkAndConsumeFS()) {
1179 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1180 if (std::error_code EC = PIDRes.getError())
1181 return EC;
1183 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1184 if (MMapInfoIter == BinaryMMapInfo.end()) {
1185 consumeRestOfLine();
1186 return PerfBasicSample{StringRef(), 0};
1189 while (checkAndConsumeFS()) {
1192 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1193 if (std::error_code EC = Event.getError())
1194 return EC;
1196 while (checkAndConsumeFS()) {
1199 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1200 if (std::error_code EC = AddrRes.getError())
1201 return EC;
1203 if (!checkAndConsumeNewLine()) {
1204 reportError("expected end of line");
1205 return make_error_code(llvm::errc::io_error);
1208 uint64_t Address = *AddrRes;
1209 if (!BC->HasFixedLoadAddress)
1210 adjustAddress(Address, MMapInfoIter->second);
1212 return PerfBasicSample{Event.get(), Address};
1215 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1216 PerfMemSample Res{0, 0};
1218 while (checkAndConsumeFS()) {
1221 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1222 if (std::error_code EC = PIDRes.getError())
1223 return EC;
1225 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1226 if (MMapInfoIter == BinaryMMapInfo.end()) {
1227 consumeRestOfLine();
1228 return Res;
1231 while (checkAndConsumeFS()) {
1234 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1235 if (std::error_code EC = Event.getError())
1236 return EC;
1237 if (!Event.get().contains("mem-loads")) {
1238 consumeRestOfLine();
1239 return Res;
1242 while (checkAndConsumeFS()) {
1245 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1246 if (std::error_code EC = AddrRes.getError())
1247 return EC;
1249 while (checkAndConsumeFS()) {
1252 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1253 if (std::error_code EC = PCRes.getError()) {
1254 consumeRestOfLine();
1255 return EC;
1258 if (!checkAndConsumeNewLine()) {
1259 reportError("expected end of line");
1260 return make_error_code(llvm::errc::io_error);
1263 uint64_t Address = *AddrRes;
1264 if (!BC->HasFixedLoadAddress)
1265 adjustAddress(Address, MMapInfoIter->second);
1267 return PerfMemSample{PCRes.get(), Address};
1270 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1271 auto parseOffset = [this]() -> ErrorOr<Location> {
1272 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1273 if (std::error_code EC = Res.getError())
1274 return EC;
1275 return Location(Res.get());
1278 size_t Sep = ParsingBuf.find_first_of(" \n");
1279 if (Sep == StringRef::npos)
1280 return parseOffset();
1281 StringRef LookAhead = ParsingBuf.substr(0, Sep);
1282 if (!LookAhead.contains(':'))
1283 return parseOffset();
1285 ErrorOr<StringRef> BuildID = parseString(':');
1286 if (std::error_code EC = BuildID.getError())
1287 return EC;
1288 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1289 if (std::error_code EC = Offset.getError())
1290 return EC;
1291 return Location(true, BuildID.get(), Offset.get());
1294 ErrorOr<DataAggregator::AggregatedLBREntry>
1295 DataAggregator::parseAggregatedLBREntry() {
1296 while (checkAndConsumeFS()) {
1299 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1300 if (std::error_code EC = TypeOrErr.getError())
1301 return EC;
1302 auto Type = AggregatedLBREntry::BRANCH;
1303 if (TypeOrErr.get() == "B") {
1304 Type = AggregatedLBREntry::BRANCH;
1305 } else if (TypeOrErr.get() == "F") {
1306 Type = AggregatedLBREntry::FT;
1307 } else if (TypeOrErr.get() == "f") {
1308 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1309 } else {
1310 reportError("expected B, F or f");
1311 return make_error_code(llvm::errc::io_error);
1314 while (checkAndConsumeFS()) {
1316 ErrorOr<Location> From = parseLocationOrOffset();
1317 if (std::error_code EC = From.getError())
1318 return EC;
1320 while (checkAndConsumeFS()) {
1322 ErrorOr<Location> To = parseLocationOrOffset();
1323 if (std::error_code EC = To.getError())
1324 return EC;
1326 while (checkAndConsumeFS()) {
1328 ErrorOr<int64_t> Frequency =
1329 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1330 if (std::error_code EC = Frequency.getError())
1331 return EC;
1333 uint64_t Mispreds = 0;
1334 if (Type == AggregatedLBREntry::BRANCH) {
1335 while (checkAndConsumeFS()) {
1337 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1338 if (std::error_code EC = MispredsOrErr.getError())
1339 return EC;
1340 Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1343 if (!checkAndConsumeNewLine()) {
1344 reportError("expected end of line");
1345 return make_error_code(llvm::errc::io_error);
1348 return AggregatedLBREntry{From.get(), To.get(),
1349 static_cast<uint64_t>(Frequency.get()), Mispreds,
1350 Type};
1353 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1354 return opts::IgnoreInterruptLBR &&
1355 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1358 std::error_code DataAggregator::printLBRHeatMap() {
1359 outs() << "PERF2BOLT: parse branch events...\n";
1360 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1361 TimerGroupDesc, opts::TimeAggregator);
1363 if (BC->IsLinuxKernel) {
1364 opts::HeatmapMaxAddress = 0xffffffffffffffff;
1365 opts::HeatmapMinAddress = KernelBaseAddr;
1367 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1368 opts::HeatmapMaxAddress, getTextSections(BC));
1369 uint64_t NumTotalSamples = 0;
1371 if (opts::BasicAggregation) {
1372 while (hasData()) {
1373 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1374 if (std::error_code EC = SampleRes.getError()) {
1375 if (EC == errc::no_such_process)
1376 continue;
1377 return EC;
1379 PerfBasicSample &Sample = SampleRes.get();
1380 HM.registerAddress(Sample.PC);
1381 NumTotalSamples++;
1383 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1384 } else {
1385 while (hasData()) {
1386 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1387 if (std::error_code EC = SampleRes.getError()) {
1388 if (EC == errc::no_such_process)
1389 continue;
1390 return EC;
1393 PerfBranchSample &Sample = SampleRes.get();
1395 // LBRs are stored in reverse execution order. NextLBR refers to the next
1396 // executed branch record.
1397 const LBREntry *NextLBR = nullptr;
1398 for (const LBREntry &LBR : Sample.LBR) {
1399 if (NextLBR) {
1400 // Record fall-through trace.
1401 const uint64_t TraceFrom = LBR.To;
1402 const uint64_t TraceTo = NextLBR->From;
1403 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1405 NextLBR = &LBR;
1407 if (!Sample.LBR.empty()) {
1408 HM.registerAddress(Sample.LBR.front().To);
1409 HM.registerAddress(Sample.LBR.back().From);
1411 NumTotalSamples += Sample.LBR.size();
1413 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1414 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1417 if (!NumTotalSamples) {
1418 if (opts::BasicAggregation) {
1419 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1420 "Cannot build heatmap.";
1421 } else {
1422 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1423 "Cannot build heatmap. Use -nl for building heatmap from "
1424 "basic events.\n";
1426 exit(1);
1429 outs() << "HEATMAP: building heat map...\n";
1431 for (const auto &LBR : FallthroughLBRs) {
1432 const Trace &Trace = LBR.first;
1433 const FTInfo &Info = LBR.second;
1434 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1437 if (HM.getNumInvalidRanges())
1438 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1440 if (!HM.size()) {
1441 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1442 exit(1);
1445 HM.print(opts::OutputFilename);
1446 if (opts::OutputFilename == "-")
1447 HM.printCDF(opts::OutputFilename);
1448 else
1449 HM.printCDF(opts::OutputFilename + ".csv");
1450 if (opts::OutputFilename == "-")
1451 HM.printSectionHotness(opts::OutputFilename);
1452 else
1453 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1455 return std::error_code();
1458 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1459 bool NeedsSkylakeFix) {
1460 uint64_t NumTraces{0};
1461 // LBRs are stored in reverse execution order. NextPC refers to the next
1462 // recorded executed PC.
1463 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1464 uint32_t NumEntry = 0;
1465 for (const LBREntry &LBR : Sample.LBR) {
1466 ++NumEntry;
1467 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1468 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1469 // us to likely record an invalid trace and generate a stale function for
1470 // BAT mode (non BAT disassembles the function and is able to ignore this
1471 // trace at aggregation time). Drop first 2 entries (last two, in
1472 // chronological order)
1473 if (NeedsSkylakeFix && NumEntry <= 2)
1474 continue;
1475 if (NextPC) {
1476 // Record fall-through trace.
1477 const uint64_t TraceFrom = LBR.To;
1478 const uint64_t TraceTo = NextPC;
1479 const BinaryFunction *TraceBF =
1480 getBinaryFunctionContainingAddress(TraceFrom);
1481 if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1482 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1483 if (TraceBF->containsAddress(LBR.From))
1484 ++Info.InternCount;
1485 else
1486 ++Info.ExternCount;
1487 } else {
1488 const BinaryFunction *ToFunc =
1489 getBinaryFunctionContainingAddress(TraceTo);
1490 if (TraceBF && ToFunc) {
1491 LLVM_DEBUG({
1492 dbgs() << "Invalid trace starting in " << TraceBF->getPrintName()
1493 << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress())
1494 << formatv(" and ending @ {0:x}\n", TraceTo);
1496 ++NumInvalidTraces;
1497 } else {
1498 LLVM_DEBUG({
1499 dbgs() << "Out of range trace starting in "
1500 << (TraceBF ? TraceBF->getPrintName() : "None")
1501 << formatv(" @ {0:x}",
1502 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1503 << " and ending in "
1504 << (ToFunc ? ToFunc->getPrintName() : "None")
1505 << formatv(" @ {0:x}\n",
1506 TraceTo - (ToFunc ? ToFunc->getAddress() : 0));
1508 ++NumLongRangeTraces;
1511 ++NumTraces;
1513 NextPC = LBR.From;
1515 uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
1516 uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
1517 if (!From && !To)
1518 continue;
1519 TakenBranchInfo &Info = BranchLBRs[Trace(From, To)];
1520 ++Info.TakenCount;
1521 Info.MispredCount += LBR.Mispred;
1523 return NumTraces;
1526 std::error_code DataAggregator::parseBranchEvents() {
1527 outs() << "PERF2BOLT: parse branch events...\n";
1528 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1529 TimerGroupDesc, opts::TimeAggregator);
1531 uint64_t NumTotalSamples = 0;
1532 uint64_t NumEntries = 0;
1533 uint64_t NumSamples = 0;
1534 uint64_t NumSamplesNoLBR = 0;
1535 uint64_t NumTraces = 0;
1536 bool NeedsSkylakeFix = false;
1538 while (hasData() && NumTotalSamples < opts::MaxSamples) {
1539 ++NumTotalSamples;
1541 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1542 if (std::error_code EC = SampleRes.getError()) {
1543 if (EC == errc::no_such_process)
1544 continue;
1545 return EC;
1547 ++NumSamples;
1549 PerfBranchSample &Sample = SampleRes.get();
1550 if (opts::WriteAutoFDOData)
1551 ++BasicSamples[Sample.PC];
1553 if (Sample.LBR.empty()) {
1554 ++NumSamplesNoLBR;
1555 continue;
1558 NumEntries += Sample.LBR.size();
1559 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1560 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1561 NeedsSkylakeFix = true;
1564 NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
1567 for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
1568 for (const uint64_t Addr : {Trace.From, Trace.To})
1569 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1570 BF->setHasProfileAvailable();
1572 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1573 OS << " (";
1574 if (OS.has_colors()) {
1575 if (Percent > T2)
1576 OS.changeColor(raw_ostream::RED);
1577 else if (Percent > T1)
1578 OS.changeColor(raw_ostream::YELLOW);
1579 else
1580 OS.changeColor(raw_ostream::GREEN);
1582 OS << format("%.1f%%", Percent);
1583 if (OS.has_colors())
1584 OS.resetColor();
1585 OS << ")";
1588 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1589 << " LBR entries\n";
1590 if (NumTotalSamples) {
1591 if (NumSamples && NumSamplesNoLBR == NumSamples) {
1592 // Note: we don't know if perf2bolt is being used to parse memory samples
1593 // at this point. In this case, it is OK to parse zero LBRs.
1594 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1595 "LBR. Record profile with perf record -j any or run perf2bolt "
1596 "in no-LBR mode with -nl (the performance improvement in -nl "
1597 "mode may be limited)\n";
1598 } else {
1599 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1600 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1601 outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1602 printColored(outs(), PercentIgnored, 20, 50);
1603 outs() << " were ignored\n";
1604 if (PercentIgnored > 50.0f)
1605 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1606 "were attributed to the input binary\n";
1609 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1610 << NumInvalidTraces;
1611 float Perc = 0.0f;
1612 if (NumTraces > 0) {
1613 Perc = NumInvalidTraces * 100.0f / NumTraces;
1614 printColored(outs(), Perc, 5, 10);
1616 outs() << "\n";
1617 if (Perc > 10.0f)
1618 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1619 "binary is probably not the same binary used during profiling "
1620 "collection. The generated data may be ineffective for improving "
1621 "performance.\n\n";
1623 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1624 << NumLongRangeTraces;
1625 if (NumTraces > 0)
1626 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1627 outs() << "\n";
1629 if (NumColdSamples > 0) {
1630 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1631 outs() << "PERF2BOLT: " << NumColdSamples
1632 << format(" (%.1f%%)", ColdSamples)
1633 << " samples recorded in cold regions of split functions.\n";
1634 if (ColdSamples > 5.0f)
1635 outs()
1636 << "WARNING: The BOLT-processed binary where samples were collected "
1637 "likely used bad data or your service observed a large shift in "
1638 "profile. You may want to audit this.\n";
1641 return std::error_code();
1644 void DataAggregator::processBranchEvents() {
1645 outs() << "PERF2BOLT: processing branch events...\n";
1646 NamedRegionTimer T("processBranch", "Processing branch events",
1647 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1649 for (const auto &AggrLBR : FallthroughLBRs) {
1650 const Trace &Loc = AggrLBR.first;
1651 const FTInfo &Info = AggrLBR.second;
1652 LBREntry First{Loc.From, Loc.From, false};
1653 LBREntry Second{Loc.To, Loc.To, false};
1654 if (Info.InternCount)
1655 doTrace(First, Second, Info.InternCount);
1656 if (Info.ExternCount) {
1657 First.From = 0;
1658 doTrace(First, Second, Info.ExternCount);
1662 for (const auto &AggrLBR : BranchLBRs) {
1663 const Trace &Loc = AggrLBR.first;
1664 const TakenBranchInfo &Info = AggrLBR.second;
1665 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
1666 /*IsPreagg*/ false);
1670 std::error_code DataAggregator::parseBasicEvents() {
1671 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1672 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1673 TimerGroupDesc, opts::TimeAggregator);
1674 while (hasData()) {
1675 ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1676 if (std::error_code EC = Sample.getError())
1677 return EC;
1679 if (!Sample->PC)
1680 continue;
1682 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1683 BF->setHasProfileAvailable();
1685 ++BasicSamples[Sample->PC];
1686 EventNames.insert(Sample->EventName);
1689 return std::error_code();
1692 void DataAggregator::processBasicEvents() {
1693 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1694 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1695 TimerGroupDesc, opts::TimeAggregator);
1696 uint64_t OutOfRangeSamples = 0;
1697 uint64_t NumSamples = 0;
1698 for (auto &Sample : BasicSamples) {
1699 const uint64_t PC = Sample.first;
1700 const uint64_t HitCount = Sample.second;
1701 NumSamples += HitCount;
1702 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1703 if (!Func) {
1704 OutOfRangeSamples += HitCount;
1705 continue;
1708 doSample(*Func, PC, HitCount);
1710 outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1712 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1713 << OutOfRangeSamples;
1714 float Perc = 0.0f;
1715 if (NumSamples > 0) {
1716 outs() << " (";
1717 Perc = OutOfRangeSamples * 100.0f / NumSamples;
1718 if (outs().has_colors()) {
1719 if (Perc > 60.0f)
1720 outs().changeColor(raw_ostream::RED);
1721 else if (Perc > 40.0f)
1722 outs().changeColor(raw_ostream::YELLOW);
1723 else
1724 outs().changeColor(raw_ostream::GREEN);
1726 outs() << format("%.1f%%", Perc);
1727 if (outs().has_colors())
1728 outs().resetColor();
1729 outs() << ")";
1731 outs() << "\n";
1732 if (Perc > 80.0f)
1733 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1734 "binary is probably not the same binary used during profiling "
1735 "collection. The generated data may be ineffective for improving "
1736 "performance.\n\n";
1739 std::error_code DataAggregator::parseMemEvents() {
1740 outs() << "PERF2BOLT: parsing memory events...\n";
1741 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1742 TimerGroupDesc, opts::TimeAggregator);
1743 while (hasData()) {
1744 ErrorOr<PerfMemSample> Sample = parseMemSample();
1745 if (std::error_code EC = Sample.getError())
1746 return EC;
1748 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1749 BF->setHasProfileAvailable();
1751 MemSamples.emplace_back(std::move(Sample.get()));
1754 return std::error_code();
1757 void DataAggregator::processMemEvents() {
1758 NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1759 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1760 for (const PerfMemSample &Sample : MemSamples) {
1761 uint64_t PC = Sample.PC;
1762 uint64_t Addr = Sample.Addr;
1763 StringRef FuncName;
1764 StringRef MemName;
1766 // Try to resolve symbol for PC
1767 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1768 if (!Func) {
1769 LLVM_DEBUG(if (PC != 0) {
1770 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr);
1772 continue;
1775 FuncName = Func->getOneName();
1776 PC -= Func->getAddress();
1778 // Try to resolve symbol for memory load
1779 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1780 MemName = BD->getName();
1781 Addr -= BD->getAddress();
1782 } else if (opts::FilterMemProfile) {
1783 // Filter out heap/stack accesses
1784 continue;
1787 const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1788 const Location AddrLoc(!MemName.empty(), MemName, Addr);
1790 FuncMemData *MemData = &NamesToMemEvents[FuncName];
1791 MemData->Name = FuncName;
1792 setMemData(*Func, MemData);
1793 MemData->update(FuncLoc, AddrLoc);
1794 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1798 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1799 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1800 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1801 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1802 while (hasData()) {
1803 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1804 if (std::error_code EC = AggrEntry.getError())
1805 return EC;
1807 for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset})
1808 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1809 BF->setHasProfileAvailable();
1811 AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1814 return std::error_code();
1817 void DataAggregator::processPreAggregated() {
1818 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1819 NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1820 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1822 uint64_t NumTraces = 0;
1823 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1824 switch (AggrEntry.EntryType) {
1825 case AggregatedLBREntry::BRANCH:
1826 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1827 AggrEntry.Mispreds, /*IsPreagg*/ true);
1828 break;
1829 case AggregatedLBREntry::FT:
1830 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1831 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1832 ? AggrEntry.From.Offset
1833 : 0,
1834 AggrEntry.From.Offset, false};
1835 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1836 doTrace(First, Second, AggrEntry.Count);
1837 NumTraces += AggrEntry.Count;
1838 break;
1843 outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1844 << " aggregated LBR entries\n";
1845 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1846 << NumInvalidTraces;
1847 float Perc = 0.0f;
1848 if (NumTraces > 0) {
1849 outs() << " (";
1850 Perc = NumInvalidTraces * 100.0f / NumTraces;
1851 if (outs().has_colors()) {
1852 if (Perc > 10.0f)
1853 outs().changeColor(raw_ostream::RED);
1854 else if (Perc > 5.0f)
1855 outs().changeColor(raw_ostream::YELLOW);
1856 else
1857 outs().changeColor(raw_ostream::GREEN);
1859 outs() << format("%.1f%%", Perc);
1860 if (outs().has_colors())
1861 outs().resetColor();
1862 outs() << ")";
1864 outs() << "\n";
1865 if (Perc > 10.0f)
1866 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1867 "binary is probably not the same binary used during profiling "
1868 "collection. The generated data may be ineffective for improving "
1869 "performance.\n\n";
1871 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1872 << NumLongRangeTraces;
1873 if (NumTraces > 0)
1874 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1875 outs() << "\n";
1878 std::optional<int32_t> DataAggregator::parseCommExecEvent() {
1879 size_t LineEnd = ParsingBuf.find_first_of("\n");
1880 if (LineEnd == StringRef::npos) {
1881 reportError("expected rest of line");
1882 Diag << "Found: " << ParsingBuf << "\n";
1883 return std::nullopt;
1885 StringRef Line = ParsingBuf.substr(0, LineEnd);
1887 size_t Pos = Line.find("PERF_RECORD_COMM exec");
1888 if (Pos == StringRef::npos)
1889 return std::nullopt;
1890 Line = Line.drop_front(Pos);
1892 // Line:
1893 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1894 StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1895 int32_t PID;
1896 if (PIDStr.getAsInteger(10, PID)) {
1897 reportError("expected PID");
1898 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1899 return std::nullopt;
1902 return PID;
1905 namespace {
1906 std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1907 const StringRef SecTimeStr = TimeStr.split('.').first;
1908 const StringRef USecTimeStr = TimeStr.split('.').second;
1909 uint64_t SecTime;
1910 uint64_t USecTime;
1911 if (SecTimeStr.getAsInteger(10, SecTime) ||
1912 USecTimeStr.getAsInteger(10, USecTime))
1913 return std::nullopt;
1914 return SecTime * 1000000ULL + USecTime;
1918 std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1919 while (checkAndConsumeFS()) {
1922 size_t LineEnd = ParsingBuf.find_first_of("\n");
1923 if (LineEnd == StringRef::npos) {
1924 reportError("expected rest of line");
1925 Diag << "Found: " << ParsingBuf << "\n";
1926 return std::nullopt;
1928 StringRef Line = ParsingBuf.substr(0, LineEnd);
1930 size_t Pos = Line.find("PERF_RECORD_FORK");
1931 if (Pos == StringRef::npos) {
1932 consumeRestOfLine();
1933 return std::nullopt;
1936 ForkInfo FI;
1938 const StringRef TimeStr =
1939 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1940 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1941 FI.Time = *TimeRes;
1944 Line = Line.drop_front(Pos);
1946 // Line:
1947 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1948 const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1949 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1950 reportError("expected PID");
1951 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1952 return std::nullopt;
1955 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1956 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1957 reportError("expected PID");
1958 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1959 return std::nullopt;
1962 consumeRestOfLine();
1964 return FI;
1967 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1968 DataAggregator::parseMMapEvent() {
1969 while (checkAndConsumeFS()) {
1972 MMapInfo ParsedInfo;
1974 size_t LineEnd = ParsingBuf.find_first_of("\n");
1975 if (LineEnd == StringRef::npos) {
1976 reportError("expected rest of line");
1977 Diag << "Found: " << ParsingBuf << "\n";
1978 return make_error_code(llvm::errc::io_error);
1980 StringRef Line = ParsingBuf.substr(0, LineEnd);
1982 size_t Pos = Line.find("PERF_RECORD_MMAP2");
1983 if (Pos == StringRef::npos) {
1984 consumeRestOfLine();
1985 return std::make_pair(StringRef(), ParsedInfo);
1988 // Line:
1989 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1991 const StringRef TimeStr =
1992 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1993 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1994 ParsedInfo.Time = *TimeRes;
1996 Line = Line.drop_front(Pos);
1998 // Line:
1999 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
2001 StringRef FileName = Line.rsplit(FieldSeparator).second;
2002 if (FileName.starts_with("//") || FileName.starts_with("[")) {
2003 consumeRestOfLine();
2004 return std::make_pair(StringRef(), ParsedInfo);
2006 FileName = sys::path::filename(FileName);
2008 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
2009 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
2010 reportError("expected PID");
2011 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
2012 return make_error_code(llvm::errc::io_error);
2015 const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
2016 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
2017 reportError("expected base address");
2018 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
2019 return make_error_code(llvm::errc::io_error);
2022 const StringRef SizeStr = Line.split('(').second.split(')').first;
2023 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
2024 reportError("expected mmaped size");
2025 Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
2026 return make_error_code(llvm::errc::io_error);
2029 const StringRef OffsetStr =
2030 Line.split('@').second.ltrim().split(FieldSeparator).first;
2031 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
2032 reportError("expected mmaped page-aligned offset");
2033 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
2034 return make_error_code(llvm::errc::io_error);
2037 consumeRestOfLine();
2039 return std::make_pair(FileName, ParsedInfo);
2042 std::error_code DataAggregator::parseMMapEvents() {
2043 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
2044 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
2045 TimerGroupDesc, opts::TimeAggregator);
2047 std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
2048 while (hasData()) {
2049 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
2050 if (std::error_code EC = FileMMapInfoRes.getError())
2051 return EC;
2053 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
2054 if (FileMMapInfo.second.PID == -1)
2055 continue;
2056 if (FileMMapInfo.first == "(deleted)")
2057 continue;
2059 // Consider only the first mapping of the file for any given PID
2060 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
2061 bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) {
2062 return MI.second.PID == FileMMapInfo.second.PID;
2065 if (PIDExists)
2066 continue;
2068 GlobalMMapInfo.insert(FileMMapInfo);
2071 LLVM_DEBUG({
2072 dbgs() << "FileName -> mmap info:\n"
2073 << " Filename : PID [MMapAddr, Size, Offset]\n";
2074 for (const auto &[Name, MMap] : GlobalMMapInfo)
2075 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID,
2076 MMap.MMapAddress, MMap.Size, MMap.Offset);
2079 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
2080 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2081 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2082 << "\" for profile matching\n";
2083 NameToUse = BuildIDBinaryName;
2086 auto Range = GlobalMMapInfo.equal_range(NameToUse);
2087 for (MMapInfo &MMapInfo : llvm::make_second_range(make_range(Range))) {
2088 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2089 // Check that the binary mapping matches one of the segments.
2090 bool MatchFound = llvm::any_of(
2091 llvm::make_second_range(BC->SegmentMapInfo),
2092 [&](SegmentInfo &SegInfo) {
2093 // The mapping is page-aligned and hence the MMapAddress could be
2094 // different from the segment start address. We cannot know the page
2095 // size of the mapping, but we know it should not exceed the segment
2096 // alignment value. Hence we are performing an approximate check.
2097 return SegInfo.Address >= MMapInfo.MMapAddress &&
2098 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment &&
2099 SegInfo.IsExecutable;
2101 if (!MatchFound) {
2102 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2103 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2104 continue;
2108 // Set base address for shared objects.
2109 if (!BC->HasFixedLoadAddress) {
2110 std::optional<uint64_t> BaseAddress =
2111 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2112 if (!BaseAddress) {
2113 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2114 "binary when memory mapped at 0x"
2115 << Twine::utohexstr(MMapInfo.MMapAddress)
2116 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2117 << ". Ignoring profile data for this mapping\n";
2118 continue;
2119 } else {
2120 MMapInfo.BaseAddress = *BaseAddress;
2124 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2127 if (BinaryMMapInfo.empty()) {
2128 if (errs().has_colors())
2129 errs().changeColor(raw_ostream::RED);
2130 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2131 << BC->getFilename() << "\".";
2132 if (!GlobalMMapInfo.empty()) {
2133 errs() << " Profile for the following binary name(s) is available:\n";
2134 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2135 I = GlobalMMapInfo.upper_bound(I->first))
2136 errs() << " " << I->first << '\n';
2137 errs() << "Please rename the input binary.\n";
2138 } else {
2139 errs() << " Failed to extract any binary name from a profile.\n";
2141 if (errs().has_colors())
2142 errs().resetColor();
2144 exit(1);
2147 return std::error_code();
2150 std::error_code DataAggregator::parseTaskEvents() {
2151 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2152 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2153 TimerGroupDesc, opts::TimeAggregator);
2155 while (hasData()) {
2156 if (std::optional<int32_t> CommInfo = parseCommExecEvent()) {
2157 // Remove forked child that ran execve
2158 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2159 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2160 BinaryMMapInfo.erase(MMapInfoIter);
2161 consumeRestOfLine();
2162 continue;
2165 std::optional<ForkInfo> ForkInfo = parseForkEvent();
2166 if (!ForkInfo)
2167 continue;
2169 if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2170 continue;
2172 if (ForkInfo->Time == 0) {
2173 // Process was forked and mmaped before perf ran. In this case the child
2174 // should have its own mmap entry unless it was execve'd.
2175 continue;
2178 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2179 if (MMapInfoIter == BinaryMMapInfo.end())
2180 continue;
2182 MMapInfo MMapInfo = MMapInfoIter->second;
2183 MMapInfo.PID = ForkInfo->ChildPID;
2184 MMapInfo.Forked = true;
2185 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2188 outs() << "PERF2BOLT: input binary is associated with "
2189 << BinaryMMapInfo.size() << " PID(s)\n";
2191 LLVM_DEBUG({
2192 for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo))
2193 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID,
2194 (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress,
2195 MMI.Size);
2198 return std::error_code();
2201 std::optional<std::pair<StringRef, StringRef>>
2202 DataAggregator::parseNameBuildIDPair() {
2203 while (checkAndConsumeFS()) {
2206 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2207 if (std::error_code EC = BuildIDStr.getError())
2208 return std::nullopt;
2210 // If one of the strings is missing, don't issue a parsing error, but still
2211 // do not return a value.
2212 consumeAllRemainingFS();
2213 if (checkNewLine())
2214 return std::nullopt;
2216 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2217 if (std::error_code EC = NameStr.getError())
2218 return std::nullopt;
2220 consumeRestOfLine();
2221 return std::make_pair(NameStr.get(), BuildIDStr.get());
2224 bool DataAggregator::hasAllBuildIDs() {
2225 const StringRef SavedParsingBuf = ParsingBuf;
2227 if (!hasData())
2228 return false;
2230 bool HasInvalidEntries = false;
2231 while (hasData()) {
2232 if (!parseNameBuildIDPair()) {
2233 HasInvalidEntries = true;
2234 break;
2238 ParsingBuf = SavedParsingBuf;
2240 return !HasInvalidEntries;
2243 std::optional<StringRef>
2244 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2245 const StringRef SavedParsingBuf = ParsingBuf;
2247 StringRef FileName;
2248 while (hasData()) {
2249 std::optional<std::pair<StringRef, StringRef>> IDPair =
2250 parseNameBuildIDPair();
2251 if (!IDPair) {
2252 consumeRestOfLine();
2253 continue;
2256 if (IDPair->second.starts_with(FileBuildID)) {
2257 FileName = sys::path::filename(IDPair->first);
2258 break;
2262 ParsingBuf = SavedParsingBuf;
2264 if (!FileName.empty())
2265 return FileName;
2267 return std::nullopt;
2270 std::error_code
2271 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2272 std::error_code EC;
2273 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2274 if (EC)
2275 return EC;
2277 bool WriteMemLocs = false;
2279 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2280 if (WriteMemLocs)
2281 OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2282 else
2283 OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2284 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2285 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2288 uint64_t BranchValues = 0;
2289 uint64_t MemValues = 0;
2291 if (BAT)
2292 OutFile << "boltedcollection\n";
2293 if (opts::BasicAggregation) {
2294 OutFile << "no_lbr";
2295 for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
2296 OutFile << " " << Entry.getKey();
2297 OutFile << "\n";
2299 for (const auto &KV : NamesToSamples) {
2300 const FuncSampleData &FSD = KV.second;
2301 for (const SampleInfo &SI : FSD.Data) {
2302 writeLocation(SI.Loc);
2303 OutFile << SI.Hits << "\n";
2304 ++BranchValues;
2307 } else {
2308 for (const auto &KV : NamesToBranches) {
2309 const FuncBranchData &FBD = KV.second;
2310 for (const BranchInfo &BI : FBD.Data) {
2311 writeLocation(BI.From);
2312 writeLocation(BI.To);
2313 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2314 ++BranchValues;
2316 for (const BranchInfo &BI : FBD.EntryData) {
2317 // Do not output if source is a known symbol, since this was already
2318 // accounted for in the source function
2319 if (BI.From.IsSymbol)
2320 continue;
2321 writeLocation(BI.From);
2322 writeLocation(BI.To);
2323 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2324 ++BranchValues;
2328 WriteMemLocs = true;
2329 for (const auto &KV : NamesToMemEvents) {
2330 const FuncMemData &FMD = KV.second;
2331 for (const MemInfo &MemEvent : FMD.Data) {
2332 writeLocation(MemEvent.Offset);
2333 writeLocation(MemEvent.Addr);
2334 OutFile << MemEvent.Count << "\n";
2335 ++MemValues;
2340 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2341 << " memory objects to " << OutputFilename << "\n";
2343 return std::error_code();
2346 std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
2347 StringRef OutputFilename) const {
2348 std::error_code EC;
2349 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2350 if (EC)
2351 return EC;
2353 yaml::bolt::BinaryProfile BP;
2355 const MCPseudoProbeDecoder *PseudoProbeDecoder =
2356 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
2358 // Fill out the header info.
2359 BP.Header.Version = 1;
2360 BP.Header.FileName = std::string(BC.getFilename());
2361 std::optional<StringRef> BuildID = BC.getFileBuildID();
2362 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
2363 BP.Header.Origin = std::string(getReaderName());
2364 // Only the input binary layout order is supported.
2365 BP.Header.IsDFSOrder = false;
2366 // FIXME: Need to match hash function used to produce BAT hashes.
2367 BP.Header.HashFunction = HashFunction::Default;
2369 ListSeparator LS(",");
2370 raw_string_ostream EventNamesOS(BP.Header.EventNames);
2371 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
2372 EventNamesOS << LS << EventEntry.first().str();
2374 BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
2375 : BinaryFunction::PF_LBR;
2377 // Add probe inline tree nodes.
2378 YAMLProfileWriter::InlineTreeDesc InlineTree;
2379 if (PseudoProbeDecoder)
2380 std::tie(BP.PseudoProbeDesc, InlineTree) =
2381 YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder);
2383 if (!opts::BasicAggregation) {
2384 // Convert profile for functions not covered by BAT
2385 for (auto &BFI : BC.getBinaryFunctions()) {
2386 BinaryFunction &Function = BFI.second;
2387 if (!Function.hasProfile())
2388 continue;
2389 if (BAT->isBATFunction(Function.getAddress()))
2390 continue;
2391 BP.Functions.emplace_back(YAMLProfileWriter::convert(
2392 Function, /*UseDFS=*/false, InlineTree, BAT));
2395 for (const auto &KV : NamesToBranches) {
2396 const StringRef FuncName = KV.first;
2397 const FuncBranchData &Branches = KV.second;
2398 yaml::bolt::BinaryFunctionProfile YamlBF;
2399 BinaryData *BD = BC.getBinaryDataByName(FuncName);
2400 assert(BD);
2401 uint64_t FuncAddress = BD->getAddress();
2402 if (!BAT->isBATFunction(FuncAddress))
2403 continue;
2404 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
2405 assert(BF);
2406 YamlBF.Name = getLocationName(*BF, BAT);
2407 YamlBF.Id = BF->getFunctionNumber();
2408 YamlBF.Hash = BAT->getBFHash(FuncAddress);
2409 YamlBF.ExecCount = BF->getKnownExecutionCount();
2410 YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
2411 const BoltAddressTranslation::BBHashMapTy &BlockMap =
2412 BAT->getBBHashMap(FuncAddress);
2413 YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);
2415 for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) {
2416 const auto &Block = Entry.second;
2417 YamlBB.Hash = Block.Hash;
2418 YamlBB.Index = Block.Index;
2421 // Lookup containing basic block offset and index
2422 auto getBlock = [&BlockMap](uint32_t Offset) {
2423 auto BlockIt = BlockMap.upper_bound(Offset);
2424 if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) {
2425 errs() << "BOLT-ERROR: invalid BAT section\n";
2426 exit(1);
2428 --BlockIt;
2429 return std::pair(BlockIt->first, BlockIt->second.Index);
2432 for (const BranchInfo &BI : Branches.Data) {
2433 using namespace yaml::bolt;
2434 const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset);
2435 BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
2436 if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) {
2437 // Internal branch
2438 const unsigned SuccIndex = getBlock(BI.To.Offset).second;
2439 auto &SI = YamlBB.Successors.emplace_back(SuccessorInfo{SuccIndex});
2440 SI.Count = BI.Branches;
2441 SI.Mispreds = BI.Mispreds;
2442 } else {
2443 // Call
2444 const uint32_t Offset = BI.From.Offset - BlockOffset;
2445 auto &CSI = YamlBB.CallSites.emplace_back(CallSiteInfo{Offset});
2446 CSI.Count = BI.Branches;
2447 CSI.Mispreds = BI.Mispreds;
2448 if (const BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
2449 YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT,
2450 BI.To.Offset);
2453 // Set entry counts, similar to DataReader::readProfile.
2454 for (const BranchInfo &BI : Branches.EntryData) {
2455 if (!BlockMap.isInputBlock(BI.To.Offset)) {
2456 if (opts::Verbosity >= 1)
2457 errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName
2458 << " at 0x" << Twine::utohexstr(BI.To.Offset) << '\n';
2459 continue;
2461 const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
2462 YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
2464 if (PseudoProbeDecoder) {
2465 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
2466 InlineTreeNodeId;
2467 if (BF->getGUID()) {
2468 std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
2469 YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
2470 InlineTree, BF->getGUID());
2472 // Fetch probes belonging to all fragments
2473 const AddressProbesMap &ProbeMap =
2474 PseudoProbeDecoder->getAddress2ProbesMap();
2475 BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
2476 Fragments.insert(BF);
2477 DenseMap<
2478 uint32_t,
2479 std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
2480 BlockProbes;
2481 for (const BinaryFunction *F : Fragments) {
2482 const uint64_t FuncAddr = F->getAddress();
2483 for (const MCDecodedPseudoProbe &Probe :
2484 ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
2485 const uint32_t OutputAddress = Probe.getAddress();
2486 const uint32_t InputOffset = BAT->translate(
2487 FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
2488 const unsigned BlockIndex = getBlock(InputOffset).second;
2489 BlockProbes[BlockIndex].emplace_back(Probe);
2493 for (auto &[Block, Probes] : BlockProbes) {
2494 YamlBF.Blocks[Block].PseudoProbes =
2495 YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
2498 // Skip printing if there's no profile data
2499 llvm::erase_if(
2500 YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2501 auto HasCount = [](const auto &SI) { return SI.Count; };
2502 bool HasAnyCount = YamlBB.ExecCount ||
2503 llvm::any_of(YamlBB.Successors, HasCount) ||
2504 llvm::any_of(YamlBB.CallSites, HasCount);
2505 return !HasAnyCount;
2507 BP.Functions.emplace_back(YamlBF);
2511 // Write the profile.
2512 yaml::Output Out(OutFile, nullptr, 0);
2513 Out << BP;
2514 return std::error_code();
2517 void DataAggregator::dump() const { DataReader::dump(); }
2519 void DataAggregator::dump(const LBREntry &LBR) const {
2520 Diag << "From: " << Twine::utohexstr(LBR.From)
2521 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2522 << "\n";
2525 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2526 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2527 for (const LBREntry &LBR : Sample.LBR)
2528 dump(LBR);
2531 void DataAggregator::dump(const PerfMemSample &Sample) const {
2532 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";