Bump version to 19.1.0 (final)
[llvm-project.git] / bolt / lib / Profile / YAMLProfileReader.cpp
blob604a9fb4813be4a08e560edb07557773ea6c2e2a
1 //===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Profile/YAMLProfileReader.h"
10 #include "bolt/Core/BinaryBasicBlock.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Passes/MCF.h"
13 #include "bolt/Profile/ProfileYAMLMapping.h"
14 #include "bolt/Utils/NameResolver.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/edit_distance.h"
18 #include "llvm/Demangle/Demangle.h"
19 #include "llvm/Support/CommandLine.h"
21 using namespace llvm;
23 namespace opts {
25 extern cl::opt<unsigned> Verbosity;
26 extern cl::OptionCategory BoltOptCategory;
27 extern cl::opt<bool> InferStaleProfile;
28 extern cl::opt<bool> Lite;
30 cl::opt<unsigned> NameSimilarityFunctionMatchingThreshold(
31 "name-similarity-function-matching-threshold",
32 cl::desc("Match functions using namespace and edit distance"), cl::init(0),
33 cl::Hidden, cl::cat(BoltOptCategory));
35 static llvm::cl::opt<bool>
36 IgnoreHash("profile-ignore-hash",
37 cl::desc("ignore hash while reading function profile"),
38 cl::Hidden, cl::cat(BoltOptCategory));
40 llvm::cl::opt<bool>
41 MatchProfileWithFunctionHash("match-profile-with-function-hash",
42 cl::desc("Match profile with function hash"),
43 cl::Hidden, cl::cat(BoltOptCategory));
44 llvm::cl::opt<bool>
45 MatchWithCallGraph("match-with-call-graph",
46 cl::desc("Match functions with call graph"), cl::Hidden,
47 cl::cat(BoltOptCategory));
49 llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
50 cl::desc("use DFS order for YAML profile"),
51 cl::Hidden, cl::cat(BoltOptCategory));
52 } // namespace opts
54 namespace llvm {
55 namespace bolt {
57 YAMLProfileReader::CallGraphMatcher::CallGraphMatcher(
58 BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
59 ProfileLookupMap &IdToYAMLBF) {
60 constructBFCG(BC, YamlBP);
61 constructYAMLFCG(YamlBP, IdToYAMLBF);
62 computeBFNeighborHashes(BC);
65 void YAMLProfileReader::CallGraphMatcher::constructBFCG(
66 BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP) {
67 for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
68 for (const BinaryBasicBlock &BB : BF->blocks()) {
69 for (const MCInst &Instr : BB) {
70 if (!BC.MIB->isCall(Instr))
71 continue;
72 const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
73 if (!CallSymbol)
74 continue;
75 BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName());
76 if (!BD)
77 continue;
78 BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol());
79 if (!CalleeBF)
80 continue;
82 BFAdjacencyMap[CalleeBF].insert(BF);
83 BFAdjacencyMap[BF].insert(CalleeBF);
89 void YAMLProfileReader::CallGraphMatcher::computeBFNeighborHashes(
90 BinaryContext &BC) {
91 for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
92 auto It = BFAdjacencyMap.find(BF);
93 if (It == BFAdjacencyMap.end())
94 continue;
95 auto &AdjacentBFs = It->second;
96 std::string HashStr;
97 for (BinaryFunction *BF : AdjacentBFs)
98 HashStr += BF->getOneName();
99 uint64_t Hash = std::hash<std::string>{}(HashStr);
100 NeighborHashToBFs[Hash].push_back(BF);
104 void YAMLProfileReader::CallGraphMatcher::constructYAMLFCG(
105 yaml::bolt::BinaryProfile &YamlBP, ProfileLookupMap &IdToYAMLBF) {
107 for (auto &CallerYamlBF : YamlBP.Functions) {
108 for (auto &YamlBB : CallerYamlBF.Blocks) {
109 for (auto &CallSite : YamlBB.CallSites) {
110 auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId);
111 if (IdToYAMLBFIt == IdToYAMLBF.end())
112 continue;
113 YamlBFAdjacencyMap[&CallerYamlBF].insert(IdToYAMLBFIt->second);
114 YamlBFAdjacencyMap[IdToYAMLBFIt->second].insert(&CallerYamlBF);
120 bool YAMLProfileReader::isYAML(const StringRef Filename) {
121 if (auto MB = MemoryBuffer::getFileOrSTDIN(Filename)) {
122 StringRef Buffer = (*MB)->getBuffer();
123 return Buffer.starts_with("---\n");
124 } else {
125 report_error(Filename, MB.getError());
127 return false;
130 void YAMLProfileReader::buildNameMaps(BinaryContext &BC) {
131 auto lookupFunction = [&](StringRef Name) -> BinaryFunction * {
132 if (BinaryData *BD = BC.getBinaryDataByName(Name))
133 return BC.getFunctionForSymbol(BD->getSymbol());
134 return nullptr;
137 ProfileBFs.reserve(YamlBP.Functions.size());
139 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
140 StringRef Name = YamlBF.Name;
141 const size_t Pos = Name.find("(*");
142 if (Pos != StringRef::npos)
143 Name = Name.substr(0, Pos);
144 ProfileFunctionNames.insert(Name);
145 ProfileBFs.push_back(lookupFunction(Name));
146 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
147 LTOCommonNameMap[*CommonName].push_back(&YamlBF);
149 for (auto &[Symbol, BF] : BC.SymbolToFunctionMap) {
150 StringRef Name = Symbol->getName();
151 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
152 LTOCommonNameFunctionMap[*CommonName].insert(BF);
156 bool YAMLProfileReader::hasLocalsWithFileName() const {
157 return llvm::any_of(ProfileFunctionNames.keys(), [](StringRef FuncName) {
158 return FuncName.count('/') == 2 && FuncName[0] != '/';
162 bool YAMLProfileReader::parseFunctionProfile(
163 BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
164 BinaryContext &BC = BF.getBinaryContext();
166 const bool IsDFSOrder = YamlBP.Header.IsDFSOrder;
167 const HashFunction HashFunction = YamlBP.Header.HashFunction;
168 bool ProfileMatched = true;
169 uint64_t MismatchedBlocks = 0;
170 uint64_t MismatchedCalls = 0;
171 uint64_t MismatchedEdges = 0;
173 uint64_t FunctionExecutionCount = 0;
175 BF.setExecutionCount(YamlBF.ExecCount);
177 uint64_t FuncRawBranchCount = 0;
178 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
179 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
180 FuncRawBranchCount += YamlSI.Count;
181 BF.setRawBranchCount(FuncRawBranchCount);
183 if (BF.empty())
184 return true;
186 if (!opts::IgnoreHash) {
187 if (!BF.getHash())
188 BF.computeHash(IsDFSOrder, HashFunction);
189 if (YamlBF.Hash != BF.getHash()) {
190 if (opts::Verbosity >= 1)
191 errs() << "BOLT-WARNING: function hash mismatch\n";
192 ProfileMatched = false;
196 if (YamlBF.NumBasicBlocks != BF.size()) {
197 if (opts::Verbosity >= 1)
198 errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
199 ProfileMatched = false;
202 BinaryFunction::BasicBlockOrderType Order;
203 if (IsDFSOrder)
204 llvm::copy(BF.dfs(), std::back_inserter(Order));
205 else
206 llvm::copy(BF.getLayout().blocks(), std::back_inserter(Order));
208 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
209 if (YamlBB.Index >= Order.size()) {
210 if (opts::Verbosity >= 2)
211 errs() << "BOLT-WARNING: index " << YamlBB.Index
212 << " is out of bounds\n";
213 ++MismatchedBlocks;
214 continue;
217 BinaryBasicBlock &BB = *Order[YamlBB.Index];
219 // Basic samples profile (without LBR) does not have branches information
220 // and needs a special processing.
221 if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
222 if (!YamlBB.EventCount) {
223 BB.setExecutionCount(0);
224 continue;
226 uint64_t NumSamples = YamlBB.EventCount * 1000;
227 if (NormalizeByInsnCount && BB.getNumNonPseudos())
228 NumSamples /= BB.getNumNonPseudos();
229 else if (NormalizeByCalls)
230 NumSamples /= BB.getNumCalls() + 1;
232 BB.setExecutionCount(NumSamples);
233 if (BB.isEntryPoint())
234 FunctionExecutionCount += NumSamples;
235 continue;
238 BB.setExecutionCount(YamlBB.ExecCount);
240 for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
241 BinaryFunction *Callee = YamlCSI.DestId < YamlProfileToFunction.size()
242 ? YamlProfileToFunction[YamlCSI.DestId]
243 : nullptr;
244 bool IsFunction = Callee ? true : false;
245 MCSymbol *CalleeSymbol = nullptr;
246 if (IsFunction)
247 CalleeSymbol = Callee->getSymbolForEntryID(YamlCSI.EntryDiscriminator);
249 BF.getAllCallSites().emplace_back(CalleeSymbol, YamlCSI.Count,
250 YamlCSI.Mispreds, YamlCSI.Offset);
252 if (YamlCSI.Offset >= BB.getOriginalSize()) {
253 if (opts::Verbosity >= 2)
254 errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
255 << " out of bounds in block " << BB.getName() << '\n';
256 ++MismatchedCalls;
257 continue;
260 MCInst *Instr =
261 BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset);
262 if (!Instr) {
263 if (opts::Verbosity >= 2)
264 errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
265 << " in block " << BB.getName() << '\n';
266 ++MismatchedCalls;
267 continue;
269 if (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)) {
270 if (opts::Verbosity >= 2)
271 errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
272 << " in block " << BB.getName() << '\n';
273 ++MismatchedCalls;
274 continue;
277 auto setAnnotation = [&](StringRef Name, uint64_t Count) {
278 if (BC.MIB->hasAnnotation(*Instr, Name)) {
279 if (opts::Verbosity >= 1)
280 errs() << "BOLT-WARNING: ignoring duplicate " << Name
281 << " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset)
282 << " in function " << BF << '\n';
283 return;
285 BC.MIB->addAnnotation(*Instr, Name, Count);
288 if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
289 auto &CSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
290 *Instr, "CallProfile");
291 CSP.emplace_back(CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds);
292 } else if (BC.MIB->getConditionalTailCall(*Instr)) {
293 setAnnotation("CTCTakenCount", YamlCSI.Count);
294 setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
295 } else {
296 setAnnotation("Count", YamlCSI.Count);
300 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
301 if (YamlSI.Index >= Order.size()) {
302 if (opts::Verbosity >= 1)
303 errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
304 ++MismatchedEdges;
305 continue;
308 BinaryBasicBlock *ToBB = Order[YamlSI.Index];
309 if (!BB.getSuccessor(ToBB->getLabel())) {
310 // Allow passthrough blocks.
311 BinaryBasicBlock *FTSuccessor = BB.getConditionalSuccessor(false);
312 if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
313 FTSuccessor->getSuccessor(ToBB->getLabel())) {
314 BinaryBasicBlock::BinaryBranchInfo &FTBI =
315 FTSuccessor->getBranchInfo(*ToBB);
316 FTBI.Count += YamlSI.Count;
317 FTBI.MispredictedCount += YamlSI.Mispreds;
318 ToBB = FTSuccessor;
319 } else {
320 if (opts::Verbosity >= 1)
321 errs() << "BOLT-WARNING: no successor for block " << BB.getName()
322 << " that matches index " << YamlSI.Index << " or block "
323 << ToBB->getName() << '\n';
324 ++MismatchedEdges;
325 continue;
329 BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(*ToBB);
330 BI.Count += YamlSI.Count;
331 BI.MispredictedCount += YamlSI.Mispreds;
335 // If basic block profile wasn't read it should be 0.
336 for (BinaryBasicBlock &BB : BF)
337 if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
338 BB.setExecutionCount(0);
340 if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE)
341 BF.setExecutionCount(FunctionExecutionCount);
343 ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
345 if (!ProfileMatched) {
346 if (opts::Verbosity >= 1)
347 errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
348 << MismatchedCalls << " calls, and " << MismatchedEdges
349 << " edges in profile did not match function " << BF << '\n';
351 if (YamlBF.NumBasicBlocks != BF.size())
352 ++BC.Stats.NumStaleFuncsWithEqualBlockCount;
354 if (opts::InferStaleProfile && inferStaleProfile(BF, YamlBF))
355 ProfileMatched = true;
357 if (ProfileMatched)
358 BF.markProfiled(YamlBP.Header.Flags);
360 return ProfileMatched;
363 Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
364 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
365 MemoryBuffer::getFileOrSTDIN(Filename);
366 if (std::error_code EC = MB.getError()) {
367 errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n";
368 return errorCodeToError(EC);
370 yaml::Input YamlInput(MB.get()->getBuffer());
372 // Consume YAML file.
373 YamlInput >> YamlBP;
374 if (YamlInput.error()) {
375 errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
376 << " : " << YamlInput.error().message() << '\n';
377 return errorCodeToError(YamlInput.error());
380 // Sanity check.
381 if (YamlBP.Header.Version != 1)
382 return make_error<StringError>(
383 Twine("cannot read profile : unsupported version"),
384 inconvertibleErrorCode());
386 if (YamlBP.Header.EventNames.find(',') != StringRef::npos)
387 return make_error<StringError>(
388 Twine("multiple events in profile are not supported"),
389 inconvertibleErrorCode());
391 // Match profile to function based on a function name.
392 buildNameMaps(BC);
394 // Preliminary assign function execution count.
395 for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
396 if (!BF)
397 continue;
398 if (!BF->hasProfile()) {
399 BF->setExecutionCount(YamlBF.ExecCount);
400 } else {
401 if (opts::Verbosity >= 1) {
402 errs() << "BOLT-WARNING: dropping duplicate profile for " << YamlBF.Name
403 << '\n';
405 BF = nullptr;
409 return Error::success();
412 bool YAMLProfileReader::profileMatches(
413 const yaml::bolt::BinaryFunctionProfile &Profile, const BinaryFunction &BF) {
414 if (opts::IgnoreHash)
415 return Profile.NumBasicBlocks == BF.size();
416 return Profile.Hash == static_cast<uint64_t>(BF.getHash());
419 bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
420 if (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)
421 return true;
422 for (StringRef Name : BF.getNames())
423 if (ProfileFunctionNames.contains(Name))
424 return true;
425 for (StringRef Name : BF.getNames()) {
426 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name)) {
427 if (LTOCommonNameMap.contains(*CommonName))
428 return true;
432 return false;
435 size_t YAMLProfileReader::matchWithExactName() {
436 size_t MatchedWithExactName = 0;
437 // This first pass assigns profiles that match 100% by name and by hash.
438 for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
439 if (!BF)
440 continue;
441 BinaryFunction &Function = *BF;
442 // Clear function call count that may have been set while pre-processing
443 // the profile.
444 Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
446 if (profileMatches(YamlBF, Function)) {
447 matchProfileToFunction(YamlBF, Function);
448 ++MatchedWithExactName;
451 return MatchedWithExactName;
454 size_t YAMLProfileReader::matchWithHash(BinaryContext &BC) {
455 // Iterates through profiled functions to match the first binary function with
456 // the same exact hash. Serves to match identical, renamed functions.
457 // Collisions are possible where multiple functions share the same exact hash.
458 size_t MatchedWithHash = 0;
459 if (opts::MatchProfileWithFunctionHash) {
460 DenseMap<size_t, BinaryFunction *> StrictHashToBF;
461 StrictHashToBF.reserve(BC.getBinaryFunctions().size());
463 for (auto &[_, BF] : BC.getBinaryFunctions())
464 StrictHashToBF[BF.getHash()] = &BF;
466 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
467 if (YamlBF.Used)
468 continue;
469 auto It = StrictHashToBF.find(YamlBF.Hash);
470 if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
471 BinaryFunction *BF = It->second;
472 matchProfileToFunction(YamlBF, *BF);
473 ++MatchedWithHash;
477 return MatchedWithHash;
480 size_t YAMLProfileReader::matchWithLTOCommonName() {
481 // This second pass allows name ambiguity for LTO private functions.
482 size_t MatchedWithLTOCommonName = 0;
483 for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
484 if (!LTOCommonNameFunctionMap.contains(CommonName))
485 continue;
486 std::unordered_set<BinaryFunction *> &Functions =
487 LTOCommonNameFunctionMap[CommonName];
488 // Return true if a given profile is matched to one of BinaryFunctions with
489 // matching LTO common name.
490 auto matchProfile = [&](yaml::bolt::BinaryFunctionProfile *YamlBF) {
491 if (YamlBF->Used)
492 return false;
493 for (BinaryFunction *BF : Functions) {
494 if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
495 matchProfileToFunction(*YamlBF, *BF);
496 ++MatchedWithLTOCommonName;
497 return true;
500 return false;
502 bool ProfileMatched = llvm::any_of(LTOProfiles, matchProfile);
504 // If there's only one function with a given name, try to match it
505 // partially.
506 if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
507 !LTOProfiles.front()->Used &&
508 !ProfiledFunctions.count(*Functions.begin())) {
509 matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
510 ++MatchedWithLTOCommonName;
513 return MatchedWithLTOCommonName;
516 size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
517 if (!opts::MatchWithCallGraph)
518 return 0;
520 size_t MatchedWithCallGraph = 0;
521 CallGraphMatcher CGMatcher(BC, YamlBP, IdToYamLBF);
523 ItaniumPartialDemangler Demangler;
524 auto GetBaseName = [&](std::string &FunctionName) {
525 if (Demangler.partialDemangle(FunctionName.c_str()))
526 return std::string("");
527 size_t BufferSize = 1;
528 char *Buffer = static_cast<char *>(std::malloc(BufferSize));
529 char *BaseName = Demangler.getFunctionBaseName(Buffer, &BufferSize);
530 if (!BaseName) {
531 std::free(Buffer);
532 return std::string("");
534 if (Buffer != BaseName)
535 Buffer = BaseName;
536 std::string BaseNameStr(Buffer, BufferSize);
537 std::free(Buffer);
538 return BaseNameStr;
541 // Matches YAMLBF to BFs with neighbor hashes.
542 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
543 if (YamlBF.Used)
544 continue;
545 auto AdjacentYamlBFsOpt = CGMatcher.getAdjacentYamlBFs(YamlBF);
546 if (!AdjacentYamlBFsOpt)
547 continue;
548 std::set<yaml::bolt::BinaryFunctionProfile *> AdjacentYamlBFs =
549 AdjacentYamlBFsOpt.value();
550 std::string AdjacentYamlBFsHashStr;
551 for (auto *AdjacentYamlBF : AdjacentYamlBFs)
552 AdjacentYamlBFsHashStr += AdjacentYamlBF->Name;
553 uint64_t Hash = std::hash<std::string>{}(AdjacentYamlBFsHashStr);
554 auto BFsWithSameHashOpt = CGMatcher.getBFsWithNeighborHash(Hash);
555 if (!BFsWithSameHashOpt)
556 continue;
557 std::vector<BinaryFunction *> BFsWithSameHash = BFsWithSameHashOpt.value();
558 // Finds the binary function with the longest common prefix to the profiled
559 // function and matches.
560 BinaryFunction *ClosestBF = nullptr;
561 size_t LCP = 0;
562 std::string YamlBFBaseName = GetBaseName(YamlBF.Name);
563 for (BinaryFunction *BF : BFsWithSameHash) {
564 if (ProfiledFunctions.count(BF))
565 continue;
566 std::string BFName = std::string(BF->getOneName());
567 std::string BFBaseName = GetBaseName(BFName);
568 size_t PrefixLength = 0;
569 size_t N = std::min(YamlBFBaseName.size(), BFBaseName.size());
570 for (size_t I = 0; I < N; ++I) {
571 if (YamlBFBaseName[I] != BFBaseName[I])
572 break;
573 ++PrefixLength;
575 if (PrefixLength >= LCP) {
576 LCP = PrefixLength;
577 ClosestBF = BF;
580 if (ClosestBF) {
581 matchProfileToFunction(YamlBF, *ClosestBF);
582 ++MatchedWithCallGraph;
586 return MatchedWithCallGraph;
589 size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
590 if (opts::NameSimilarityFunctionMatchingThreshold == 0)
591 return 0;
593 size_t MatchedWithNameSimilarity = 0;
594 ItaniumPartialDemangler Demangler;
596 // Demangle and derive namespace from function name.
597 auto DemangleName = [&](std::string &FunctionName) {
598 StringRef RestoredName = NameResolver::restore(FunctionName);
599 return demangle(RestoredName);
601 auto DeriveNameSpace = [&](std::string &DemangledName) {
602 if (Demangler.partialDemangle(DemangledName.c_str()))
603 return std::string("");
604 std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
605 size_t BufferSize;
606 char *NameSpace =
607 Demangler.getFunctionDeclContextName(&Buffer[0], &BufferSize);
608 return std::string(NameSpace, BufferSize);
611 // Maps namespaces to associated function block counts and gets profile
612 // function names and namespaces to minimize the number of BFs to process and
613 // avoid repeated name demangling/namespace derivation.
614 StringMap<std::set<uint32_t>> NamespaceToProfiledBFSizes;
615 std::vector<std::string> ProfileBFDemangledNames;
616 ProfileBFDemangledNames.reserve(YamlBP.Functions.size());
617 std::vector<std::string> ProfiledBFNamespaces;
618 ProfiledBFNamespaces.reserve(YamlBP.Functions.size());
620 for (auto &YamlBF : YamlBP.Functions) {
621 std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
622 ProfileBFDemangledNames.push_back(YamlBFDemangledName);
623 std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
624 ProfiledBFNamespaces.push_back(YamlBFNamespace);
625 NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks);
628 StringMap<std::vector<BinaryFunction *>> NamespaceToBFs;
630 // Maps namespaces to BFs excluding binary functions with no equal sized
631 // profiled functions belonging to the same namespace.
632 for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
633 std::string DemangledName = BF->getDemangledName();
634 std::string Namespace = DeriveNameSpace(DemangledName);
636 auto NamespaceToProfiledBFSizesIt =
637 NamespaceToProfiledBFSizes.find(Namespace);
638 // Skip if there are no ProfileBFs with a given \p Namespace.
639 if (NamespaceToProfiledBFSizesIt == NamespaceToProfiledBFSizes.end())
640 continue;
641 // Skip if there are no ProfileBFs in a given \p Namespace with
642 // equal number of blocks.
643 if (NamespaceToProfiledBFSizesIt->second.count(BF->size()) == 0)
644 continue;
645 auto NamespaceToBFsIt = NamespaceToBFs.find(Namespace);
646 if (NamespaceToBFsIt == NamespaceToBFs.end())
647 NamespaceToBFs[Namespace] = {BF};
648 else
649 NamespaceToBFsIt->second.push_back(BF);
652 // Iterates through all profiled functions and binary functions belonging to
653 // the same namespace and matches based on edit distance threshold.
654 assert(YamlBP.Functions.size() == ProfiledBFNamespaces.size() &&
655 ProfiledBFNamespaces.size() == ProfileBFDemangledNames.size());
656 for (size_t I = 0; I < YamlBP.Functions.size(); ++I) {
657 yaml::bolt::BinaryFunctionProfile &YamlBF = YamlBP.Functions[I];
658 std::string &YamlBFNamespace = ProfiledBFNamespaces[I];
659 if (YamlBF.Used)
660 continue;
661 // Skip if there are no BFs in a given \p Namespace.
662 auto It = NamespaceToBFs.find(YamlBFNamespace);
663 if (It == NamespaceToBFs.end())
664 continue;
666 std::string &YamlBFDemangledName = ProfileBFDemangledNames[I];
667 std::vector<BinaryFunction *> BFs = It->second;
668 unsigned MinEditDistance = UINT_MAX;
669 BinaryFunction *ClosestNameBF = nullptr;
671 // Determines BF the closest to the profiled function, in the
672 // same namespace.
673 for (BinaryFunction *BF : BFs) {
674 if (ProfiledFunctions.count(BF))
675 continue;
676 if (BF->size() != YamlBF.NumBasicBlocks)
677 continue;
678 std::string BFDemangledName = BF->getDemangledName();
679 unsigned BFEditDistance =
680 StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
681 if (BFEditDistance < MinEditDistance) {
682 MinEditDistance = BFEditDistance;
683 ClosestNameBF = BF;
687 if (ClosestNameBF &&
688 MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
689 matchProfileToFunction(YamlBF, *ClosestNameBF);
690 ++MatchedWithNameSimilarity;
694 return MatchedWithNameSimilarity;
697 Error YAMLProfileReader::readProfile(BinaryContext &BC) {
698 if (opts::Verbosity >= 1) {
699 outs() << "BOLT-INFO: YAML profile with hash: ";
700 switch (YamlBP.Header.HashFunction) {
701 case HashFunction::StdHash:
702 outs() << "std::hash\n";
703 break;
704 case HashFunction::XXH3:
705 outs() << "xxh3\n";
706 break;
709 YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);
711 // Computes hash for binary functions.
712 if (opts::MatchProfileWithFunctionHash) {
713 for (auto &[_, BF] : BC.getBinaryFunctions()) {
714 BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
716 } else if (!opts::IgnoreHash) {
717 for (BinaryFunction *BF : ProfileBFs) {
718 if (!BF)
719 continue;
720 BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
724 // Map profiled function ids to names.
725 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
726 IdToYamLBF[YamlBF.Id] = &YamlBF;
728 const size_t MatchedWithExactName = matchWithExactName();
729 const size_t MatchedWithHash = matchWithHash(BC);
730 const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
731 const size_t MatchedWithCallGraph = matchWithCallGraph(BC);
732 const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);
734 for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
735 if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
736 matchProfileToFunction(YamlBF, *BF);
739 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
740 if (!YamlBF.Used && opts::Verbosity >= 1)
741 errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
742 << '\n';
744 if (opts::Verbosity >= 1) {
745 outs() << "BOLT-INFO: matched " << MatchedWithExactName
746 << " functions with identical names\n";
747 outs() << "BOLT-INFO: matched " << MatchedWithHash
748 << " functions with hash\n";
749 outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
750 << " functions with matching LTO common names\n";
751 outs() << "BOLT-INFO: matched " << MatchedWithCallGraph
752 << " functions with call graph\n";
753 outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
754 << " functions with similar names\n";
757 // Set for parseFunctionProfile().
758 NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
759 NormalizeByCalls = usesEvent("branches");
760 uint64_t NumUnused = 0;
761 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
762 if (YamlBF.Id >= YamlProfileToFunction.size()) {
763 // Such profile was ignored.
764 ++NumUnused;
765 continue;
767 if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id])
768 parseFunctionProfile(*BF, YamlBF);
769 else
770 ++NumUnused;
773 BC.setNumUnusedProfiledObjects(NumUnused);
775 if (opts::Lite &&
776 (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)) {
777 for (BinaryFunction *BF : BC.getAllBinaryFunctions())
778 if (!BF->hasProfile())
779 BF->setIgnored();
782 return Error::success();
785 bool YAMLProfileReader::usesEvent(StringRef Name) const {
786 return YamlBP.Header.EventNames.find(std::string(Name)) != StringRef::npos;
789 } // end namespace bolt
790 } // end namespace llvm