1 //===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Profile/YAMLProfileReader.h"
10 #include "bolt/Core/BinaryBasicBlock.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Passes/MCF.h"
13 #include "bolt/Profile/ProfileYAMLMapping.h"
14 #include "bolt/Utils/Utils.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/Support/CommandLine.h"
22 extern cl::opt
<unsigned> Verbosity
;
23 extern cl::OptionCategory BoltOptCategory
;
24 extern cl::opt
<bool> InferStaleProfile
;
26 static llvm::cl::opt
<bool>
27 IgnoreHash("profile-ignore-hash",
28 cl::desc("ignore hash while reading function profile"),
29 cl::Hidden
, cl::cat(BoltOptCategory
));
31 llvm::cl::opt
<bool> ProfileUseDFS("profile-use-dfs",
32 cl::desc("use DFS order for YAML profile"),
33 cl::Hidden
, cl::cat(BoltOptCategory
));
39 bool YAMLProfileReader::isYAML(const StringRef Filename
) {
40 if (auto MB
= MemoryBuffer::getFileOrSTDIN(Filename
)) {
41 StringRef Buffer
= (*MB
)->getBuffer();
42 return Buffer
.starts_with("---\n");
44 report_error(Filename
, MB
.getError());
49 void YAMLProfileReader::buildNameMaps(BinaryContext
&BC
) {
50 auto lookupFunction
= [&](StringRef Name
) -> BinaryFunction
* {
51 if (BinaryData
*BD
= BC
.getBinaryDataByName(Name
))
52 return BC
.getFunctionForSymbol(BD
->getSymbol());
56 ProfileBFs
.reserve(YamlBP
.Functions
.size());
58 for (yaml::bolt::BinaryFunctionProfile
&YamlBF
: YamlBP
.Functions
) {
59 StringRef Name
= YamlBF
.Name
;
60 const size_t Pos
= Name
.find("(*");
61 if (Pos
!= StringRef::npos
)
62 Name
= Name
.substr(0, Pos
);
63 ProfileFunctionNames
.insert(Name
);
64 ProfileBFs
.push_back(lookupFunction(Name
));
65 if (const std::optional
<StringRef
> CommonName
= getLTOCommonName(Name
))
66 LTOCommonNameMap
[*CommonName
].push_back(&YamlBF
);
68 for (auto &[Symbol
, BF
] : BC
.SymbolToFunctionMap
) {
69 StringRef Name
= Symbol
->getName();
70 if (const std::optional
<StringRef
> CommonName
= getLTOCommonName(Name
))
71 LTOCommonNameFunctionMap
[*CommonName
].insert(BF
);
75 bool YAMLProfileReader::hasLocalsWithFileName() const {
76 return llvm::any_of(ProfileFunctionNames
.keys(), [](StringRef FuncName
) {
77 return FuncName
.count('/') == 2 && FuncName
[0] != '/';
81 bool YAMLProfileReader::parseFunctionProfile(
82 BinaryFunction
&BF
, const yaml::bolt::BinaryFunctionProfile
&YamlBF
) {
83 BinaryContext
&BC
= BF
.getBinaryContext();
85 const bool IsDFSOrder
= YamlBP
.Header
.IsDFSOrder
;
86 const HashFunction HashFunction
= YamlBP
.Header
.HashFunction
;
87 bool ProfileMatched
= true;
88 uint64_t MismatchedBlocks
= 0;
89 uint64_t MismatchedCalls
= 0;
90 uint64_t MismatchedEdges
= 0;
92 uint64_t FunctionExecutionCount
= 0;
94 BF
.setExecutionCount(YamlBF
.ExecCount
);
96 uint64_t FuncRawBranchCount
= 0;
97 for (const yaml::bolt::BinaryBasicBlockProfile
&YamlBB
: YamlBF
.Blocks
)
98 for (const yaml::bolt::SuccessorInfo
&YamlSI
: YamlBB
.Successors
)
99 FuncRawBranchCount
+= YamlSI
.Count
;
100 BF
.setRawBranchCount(FuncRawBranchCount
);
102 if (!opts::IgnoreHash
&&
103 YamlBF
.Hash
!= BF
.computeHash(IsDFSOrder
, HashFunction
)) {
104 if (opts::Verbosity
>= 1)
105 errs() << "BOLT-WARNING: function hash mismatch\n";
106 ProfileMatched
= false;
109 if (YamlBF
.NumBasicBlocks
!= BF
.size()) {
110 if (opts::Verbosity
>= 1)
111 errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
112 ProfileMatched
= false;
115 BinaryFunction::BasicBlockOrderType Order
;
117 llvm::copy(BF
.dfs(), std::back_inserter(Order
));
119 llvm::copy(BF
.getLayout().blocks(), std::back_inserter(Order
));
121 for (const yaml::bolt::BinaryBasicBlockProfile
&YamlBB
: YamlBF
.Blocks
) {
122 if (YamlBB
.Index
>= Order
.size()) {
123 if (opts::Verbosity
>= 2)
124 errs() << "BOLT-WARNING: index " << YamlBB
.Index
125 << " is out of bounds\n";
130 BinaryBasicBlock
&BB
= *Order
[YamlBB
.Index
];
132 // Basic samples profile (without LBR) does not have branches information
133 // and needs a special processing.
134 if (YamlBP
.Header
.Flags
& BinaryFunction::PF_SAMPLE
) {
135 if (!YamlBB
.EventCount
) {
136 BB
.setExecutionCount(0);
139 uint64_t NumSamples
= YamlBB
.EventCount
* 1000;
140 if (NormalizeByInsnCount
&& BB
.getNumNonPseudos())
141 NumSamples
/= BB
.getNumNonPseudos();
142 else if (NormalizeByCalls
)
143 NumSamples
/= BB
.getNumCalls() + 1;
145 BB
.setExecutionCount(NumSamples
);
146 if (BB
.isEntryPoint())
147 FunctionExecutionCount
+= NumSamples
;
151 BB
.setExecutionCount(YamlBB
.ExecCount
);
153 for (const yaml::bolt::CallSiteInfo
&YamlCSI
: YamlBB
.CallSites
) {
154 BinaryFunction
*Callee
= YamlCSI
.DestId
< YamlProfileToFunction
.size()
155 ? YamlProfileToFunction
[YamlCSI
.DestId
]
157 bool IsFunction
= Callee
? true : false;
158 MCSymbol
*CalleeSymbol
= nullptr;
160 CalleeSymbol
= Callee
->getSymbolForEntryID(YamlCSI
.EntryDiscriminator
);
162 BF
.getAllCallSites().emplace_back(CalleeSymbol
, YamlCSI
.Count
,
163 YamlCSI
.Mispreds
, YamlCSI
.Offset
);
165 if (YamlCSI
.Offset
>= BB
.getOriginalSize()) {
166 if (opts::Verbosity
>= 2)
167 errs() << "BOLT-WARNING: offset " << YamlCSI
.Offset
168 << " out of bounds in block " << BB
.getName() << '\n';
174 BF
.getInstructionAtOffset(BB
.getInputOffset() + YamlCSI
.Offset
);
176 if (opts::Verbosity
>= 2)
177 errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI
.Offset
178 << " in block " << BB
.getName() << '\n';
182 if (!BC
.MIB
->isCall(*Instr
) && !BC
.MIB
->isIndirectBranch(*Instr
)) {
183 if (opts::Verbosity
>= 2)
184 errs() << "BOLT-WARNING: expected call at offset " << YamlCSI
.Offset
185 << " in block " << BB
.getName() << '\n';
190 auto setAnnotation
= [&](StringRef Name
, uint64_t Count
) {
191 if (BC
.MIB
->hasAnnotation(*Instr
, Name
)) {
192 if (opts::Verbosity
>= 1)
193 errs() << "BOLT-WARNING: ignoring duplicate " << Name
194 << " info for offset 0x" << Twine::utohexstr(YamlCSI
.Offset
)
195 << " in function " << BF
<< '\n';
198 BC
.MIB
->addAnnotation(*Instr
, Name
, Count
);
201 if (BC
.MIB
->isIndirectCall(*Instr
) || BC
.MIB
->isIndirectBranch(*Instr
)) {
202 auto &CSP
= BC
.MIB
->getOrCreateAnnotationAs
<IndirectCallSiteProfile
>(
203 *Instr
, "CallProfile");
204 CSP
.emplace_back(CalleeSymbol
, YamlCSI
.Count
, YamlCSI
.Mispreds
);
205 } else if (BC
.MIB
->getConditionalTailCall(*Instr
)) {
206 setAnnotation("CTCTakenCount", YamlCSI
.Count
);
207 setAnnotation("CTCMispredCount", YamlCSI
.Mispreds
);
209 setAnnotation("Count", YamlCSI
.Count
);
213 for (const yaml::bolt::SuccessorInfo
&YamlSI
: YamlBB
.Successors
) {
214 if (YamlSI
.Index
>= Order
.size()) {
215 if (opts::Verbosity
>= 1)
216 errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
221 BinaryBasicBlock
&SuccessorBB
= *Order
[YamlSI
.Index
];
222 if (!BB
.getSuccessor(SuccessorBB
.getLabel())) {
223 if (opts::Verbosity
>= 1)
224 errs() << "BOLT-WARNING: no successor for block " << BB
.getName()
225 << " that matches index " << YamlSI
.Index
<< " or block "
226 << SuccessorBB
.getName() << '\n';
231 BinaryBasicBlock::BinaryBranchInfo
&BI
= BB
.getBranchInfo(SuccessorBB
);
232 BI
.Count
+= YamlSI
.Count
;
233 BI
.MispredictedCount
+= YamlSI
.Mispreds
;
237 // If basic block profile wasn't read it should be 0.
238 for (BinaryBasicBlock
&BB
: BF
)
239 if (BB
.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE
)
240 BB
.setExecutionCount(0);
242 if (YamlBP
.Header
.Flags
& BinaryFunction::PF_SAMPLE
) {
243 BF
.setExecutionCount(FunctionExecutionCount
);
244 estimateEdgeCounts(BF
);
247 ProfileMatched
&= !MismatchedBlocks
&& !MismatchedCalls
&& !MismatchedEdges
;
250 BF
.markProfiled(YamlBP
.Header
.Flags
);
252 if (!ProfileMatched
&& opts::Verbosity
>= 1)
253 errs() << "BOLT-WARNING: " << MismatchedBlocks
<< " blocks, "
254 << MismatchedCalls
<< " calls, and " << MismatchedEdges
255 << " edges in profile did not match function " << BF
<< '\n';
257 if (!ProfileMatched
&& opts::InferStaleProfile
) {
258 if (inferStaleProfile(BF
, YamlBF
)) {
259 ProfileMatched
= true;
260 BF
.markProfiled(YamlBP
.Header
.Flags
);
264 return ProfileMatched
;
267 Error
YAMLProfileReader::preprocessProfile(BinaryContext
&BC
) {
268 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
269 MemoryBuffer::getFileOrSTDIN(Filename
);
270 if (std::error_code EC
= MB
.getError()) {
271 errs() << "ERROR: cannot open " << Filename
<< ": " << EC
.message() << "\n";
272 return errorCodeToError(EC
);
274 yaml::Input
YamlInput(MB
.get()->getBuffer());
276 // Consume YAML file.
278 if (YamlInput
.error()) {
279 errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
280 << " : " << YamlInput
.error().message() << '\n';
281 return errorCodeToError(YamlInput
.error());
285 if (YamlBP
.Header
.Version
!= 1)
286 return make_error
<StringError
>(
287 Twine("cannot read profile : unsupported version"),
288 inconvertibleErrorCode());
290 if (YamlBP
.Header
.EventNames
.find(',') != StringRef::npos
)
291 return make_error
<StringError
>(
292 Twine("multiple events in profile are not supported"),
293 inconvertibleErrorCode());
295 // Match profile to function based on a function name.
298 // Preliminary assign function execution count.
299 for (auto [YamlBF
, BF
] : llvm::zip_equal(YamlBP
.Functions
, ProfileBFs
)) {
302 if (!BF
->hasProfile()) {
303 BF
->setExecutionCount(YamlBF
.ExecCount
);
305 if (opts::Verbosity
>= 1) {
306 errs() << "BOLT-WARNING: dropping duplicate profile for " << YamlBF
.Name
313 return Error::success();
316 bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction
&BF
) {
317 for (StringRef Name
: BF
.getNames())
318 if (ProfileFunctionNames
.contains(Name
))
320 for (StringRef Name
: BF
.getNames()) {
321 if (const std::optional
<StringRef
> CommonName
= getLTOCommonName(Name
)) {
322 if (LTOCommonNameMap
.contains(*CommonName
))
330 Error
YAMLProfileReader::readProfile(BinaryContext
&BC
) {
331 if (opts::Verbosity
>= 1) {
332 outs() << "BOLT-INFO: YAML profile with hash: ";
333 switch (YamlBP
.Header
.HashFunction
) {
334 case HashFunction::StdHash
:
335 outs() << "std::hash\n";
337 case HashFunction::XXH3
:
342 YamlProfileToFunction
.resize(YamlBP
.Functions
.size() + 1);
344 auto profileMatches
= [](const yaml::bolt::BinaryFunctionProfile
&Profile
,
345 BinaryFunction
&BF
) {
346 if (opts::IgnoreHash
)
347 return Profile
.NumBasicBlocks
== BF
.size();
348 return Profile
.Hash
== static_cast<uint64_t>(BF
.getHash());
351 // We have to do 2 passes since LTO introduces an ambiguity in function
352 // names. The first pass assigns profiles that match 100% by name and
353 // by hash. The second pass allows name ambiguity for LTO private functions.
354 for (auto [YamlBF
, BF
] : llvm::zip_equal(YamlBP
.Functions
, ProfileBFs
)) {
357 BinaryFunction
&Function
= *BF
;
358 // Clear function call count that may have been set while pre-processing
360 Function
.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE
);
362 // Recompute hash once per function.
363 if (!opts::IgnoreHash
)
364 Function
.computeHash(YamlBP
.Header
.IsDFSOrder
,
365 YamlBP
.Header
.HashFunction
);
367 if (profileMatches(YamlBF
, Function
))
368 matchProfileToFunction(YamlBF
, Function
);
371 for (const auto &[CommonName
, LTOProfiles
] : LTOCommonNameMap
) {
372 if (!LTOCommonNameFunctionMap
.contains(CommonName
))
374 std::unordered_set
<BinaryFunction
*> &Functions
=
375 LTOCommonNameFunctionMap
[CommonName
];
376 // Return true if a given profile is matched to one of BinaryFunctions with
377 // matching LTO common name.
378 auto matchProfile
= [&](yaml::bolt::BinaryFunctionProfile
*YamlBF
) {
381 for (BinaryFunction
*BF
: Functions
) {
382 if (!ProfiledFunctions
.count(BF
) && profileMatches(*YamlBF
, *BF
)) {
383 matchProfileToFunction(*YamlBF
, *BF
);
389 bool ProfileMatched
= llvm::any_of(LTOProfiles
, matchProfile
);
391 // If there's only one function with a given name, try to match it
393 if (!ProfileMatched
&& LTOProfiles
.size() == 1 && Functions
.size() == 1 &&
394 !LTOProfiles
.front()->Used
&&
395 !ProfiledFunctions
.count(*Functions
.begin()))
396 matchProfileToFunction(*LTOProfiles
.front(), **Functions
.begin());
399 for (auto [YamlBF
, BF
] : llvm::zip_equal(YamlBP
.Functions
, ProfileBFs
))
400 if (!YamlBF
.Used
&& BF
&& !ProfiledFunctions
.count(BF
))
401 matchProfileToFunction(YamlBF
, *BF
);
403 for (yaml::bolt::BinaryFunctionProfile
&YamlBF
: YamlBP
.Functions
)
404 if (!YamlBF
.Used
&& opts::Verbosity
>= 1)
405 errs() << "BOLT-WARNING: profile ignored for function " << YamlBF
.Name
408 // Set for parseFunctionProfile().
409 NormalizeByInsnCount
= usesEvent("cycles") || usesEvent("instructions");
410 NormalizeByCalls
= usesEvent("branches");
412 uint64_t NumUnused
= 0;
413 for (yaml::bolt::BinaryFunctionProfile
&YamlBF
: YamlBP
.Functions
) {
414 if (YamlBF
.Id
>= YamlProfileToFunction
.size()) {
415 // Such profile was ignored.
419 if (BinaryFunction
*BF
= YamlProfileToFunction
[YamlBF
.Id
])
420 parseFunctionProfile(*BF
, YamlBF
);
425 BC
.setNumUnusedProfiledObjects(NumUnused
);
427 return Error::success();
430 bool YAMLProfileReader::usesEvent(StringRef Name
) const {
431 return YamlBP
.Header
.EventNames
.find(std::string(Name
)) != StringRef::npos
;
434 } // end namespace bolt
435 } // end namespace llvm