Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Profile / StaleProfileMatching.cpp
blobd00bf87ffc8ad871ecba876be40dc8b822a5503b
1 //===- bolt/Profile/StaleProfileMatching.cpp - Profile data matching ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // BOLT often has to deal with profiles collected on binaries built from several
10 // revisions behind release. As a result, a certain percentage of functions is
11 // considered stale and not optimized. This file implements an ability to match
12 // profile to functions that are not 100% binary identical, and thus, increasing
13 // the optimization coverage and boost the performance of applications.
15 // The algorithm consists of two phases: matching and inference:
16 // - At the matching phase, we try to "guess" as many block and jump counts from
17 // the stale profile as possible. To this end, the content of each basic block
18 // is hashed and stored in the (yaml) profile. When BOLT optimizes a binary,
19 // it computes block hashes and identifies the corresponding entries in the
20 // stale profile. It yields a partial profile for every CFG in the binary.
21 // - At the inference phase, we employ a network flow-based algorithm (profi) to
22 // reconstruct "realistic" block and jump counts from the partial profile
23 // generated at the first stage. In practice, we don't always produce proper
24 // profile data but the majority (e.g., >90%) of CFGs get the correct counts.
26 //===----------------------------------------------------------------------===//
28 #include "bolt/Core/HashUtilities.h"
29 #include "bolt/Profile/YAMLProfileReader.h"
30 #include "llvm/ADT/Bitfields.h"
31 #include "llvm/ADT/Hashing.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Transforms/Utils/SampleProfileInference.h"
35 #include <queue>
37 using namespace llvm;
39 #undef DEBUG_TYPE
40 #define DEBUG_TYPE "bolt-prof"
42 namespace opts {
44 extern cl::OptionCategory BoltOptCategory;
46 cl::opt<bool>
47 InferStaleProfile("infer-stale-profile",
48 cl::desc("Infer counts from stale profile data."),
49 cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));
51 cl::opt<unsigned> StaleMatchingMaxFuncSize(
52 "stale-matching-max-func-size",
53 cl::desc("The maximum size of a function to consider for inference."),
54 cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory));
56 // Parameters of the profile inference algorithm. The default values are tuned
57 // on several benchmarks.
58 cl::opt<bool> StaleMatchingEvenFlowDistribution(
59 "stale-matching-even-flow-distribution",
60 cl::desc("Try to evenly distribute flow when there are multiple equally "
61 "likely options."),
62 cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));
64 cl::opt<bool> StaleMatchingRebalanceUnknown(
65 "stale-matching-rebalance-unknown",
66 cl::desc("Evenly re-distribute flow among unknown subgraphs."),
67 cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));
69 cl::opt<bool> StaleMatchingJoinIslands(
70 "stale-matching-join-islands",
71 cl::desc("Join isolated components having positive flow."), cl::init(true),
72 cl::ReallyHidden, cl::cat(BoltOptCategory));
74 cl::opt<unsigned> StaleMatchingCostBlockInc(
75 "stale-matching-cost-block-inc",
76 cl::desc("The cost of increasing a block count by one."), cl::init(150),
77 cl::ReallyHidden, cl::cat(BoltOptCategory));
79 cl::opt<unsigned> StaleMatchingCostBlockDec(
80 "stale-matching-cost-block-dec",
81 cl::desc("The cost of decreasing a block count by one."), cl::init(150),
82 cl::ReallyHidden, cl::cat(BoltOptCategory));
84 cl::opt<unsigned> StaleMatchingCostJumpInc(
85 "stale-matching-cost-jump-inc",
86 cl::desc("The cost of increasing a jump count by one."), cl::init(150),
87 cl::ReallyHidden, cl::cat(BoltOptCategory));
89 cl::opt<unsigned> StaleMatchingCostJumpDec(
90 "stale-matching-cost-jump-dec",
91 cl::desc("The cost of decreasing a jump count by one."), cl::init(150),
92 cl::ReallyHidden, cl::cat(BoltOptCategory));
94 cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
95 "stale-matching-cost-block-unknown-inc",
96 cl::desc("The cost of increasing an unknown block count by one."),
97 cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory));
99 cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
100 "stale-matching-cost-jump-unknown-inc",
101 cl::desc("The cost of increasing an unknown jump count by one."),
102 cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory));
104 cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
105 "stale-matching-cost-jump-unknown-ft-inc",
106 cl::desc(
107 "The cost of increasing an unknown fall-through jump count by one."),
108 cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory));
110 } // namespace opts
112 namespace llvm {
113 namespace bolt {
115 /// An object wrapping several components of a basic block hash. The combined
116 /// (blended) hash is represented and stored as one uint64_t, while individual
117 /// components are of smaller size (e.g., uint16_t or uint8_t).
118 struct BlendedBlockHash {
119 private:
120 using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
121 using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
122 using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
123 using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
124 using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
126 public:
127 explicit BlendedBlockHash() {}
129 explicit BlendedBlockHash(uint64_t Hash) {
130 Offset = Bitfield::get<ValueOffset>(Hash);
131 OpcodeHash = Bitfield::get<ValueOpcode>(Hash);
132 InstrHash = Bitfield::get<ValueInstr>(Hash);
133 PredHash = Bitfield::get<ValuePred>(Hash);
134 SuccHash = Bitfield::get<ValueSucc>(Hash);
137 /// Combine the blended hash into uint64_t.
138 uint64_t combine() const {
139 uint64_t Hash = 0;
140 Bitfield::set<ValueOffset>(Hash, Offset);
141 Bitfield::set<ValueOpcode>(Hash, OpcodeHash);
142 Bitfield::set<ValueInstr>(Hash, InstrHash);
143 Bitfield::set<ValuePred>(Hash, PredHash);
144 Bitfield::set<ValueSucc>(Hash, SuccHash);
145 return Hash;
148 /// Compute a distance between two given blended hashes. The smaller the
149 /// distance, the more similar two blocks are. For identical basic blocks,
150 /// the distance is zero.
151 uint64_t distance(const BlendedBlockHash &BBH) const {
152 assert(OpcodeHash == BBH.OpcodeHash &&
153 "incorrect blended hash distance computation");
154 uint64_t Dist = 0;
155 // Account for NeighborHash
156 Dist += SuccHash == BBH.SuccHash ? 0 : 1;
157 Dist += PredHash == BBH.PredHash ? 0 : 1;
158 Dist <<= 16;
159 // Account for InstrHash
160 Dist += InstrHash == BBH.InstrHash ? 0 : 1;
161 Dist <<= 16;
162 // Account for Offset
163 Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
164 return Dist;
167 /// The offset of the basic block from the function start.
168 uint16_t Offset{0};
169 /// (Loose) Hash of the basic block instructions, excluding operands.
170 uint16_t OpcodeHash{0};
171 /// (Strong) Hash of the basic block instructions, including opcodes and
172 /// operands.
173 uint16_t InstrHash{0};
174 /// (Loose) Hashes of the predecessors of the basic block.
175 uint8_t PredHash{0};
176 /// (Loose) Hashes of the successors of the basic block.
177 uint8_t SuccHash{0};
180 /// The object is used to identify and match basic blocks in a BinaryFunction
181 /// given their hashes computed on a binary built from several revisions behind
182 /// release.
183 class StaleMatcher {
184 public:
185 /// Initialize stale matcher.
186 void init(const std::vector<FlowBlock *> &Blocks,
187 const std::vector<BlendedBlockHash> &Hashes) {
188 assert(Blocks.size() == Hashes.size() &&
189 "incorrect matcher initialization");
190 for (size_t I = 0; I < Blocks.size(); I++) {
191 FlowBlock *Block = Blocks[I];
192 uint16_t OpHash = Hashes[I].OpcodeHash;
193 OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block));
197 /// Find the most similar block for a given hash.
198 const FlowBlock *matchBlock(BlendedBlockHash BlendedHash) const {
199 auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash);
200 if (BlockIt == OpHashToBlocks.end())
201 return nullptr;
202 FlowBlock *BestBlock = nullptr;
203 uint64_t BestDist = std::numeric_limits<uint64_t>::max();
204 for (const auto &[Hash, Block] : BlockIt->second) {
205 uint64_t Dist = Hash.distance(BlendedHash);
206 if (BestBlock == nullptr || Dist < BestDist) {
207 BestDist = Dist;
208 BestBlock = Block;
211 return BestBlock;
214 /// Returns true if the two basic blocks (in the binary and in the profile)
215 /// corresponding to the given hashes are matched to each other with a high
216 /// confidence.
217 static bool isHighConfidenceMatch(BlendedBlockHash Hash1,
218 BlendedBlockHash Hash2) {
219 return Hash1.InstrHash == Hash2.InstrHash;
222 private:
223 using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
224 std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
227 void BinaryFunction::computeBlockHashes() const {
228 if (size() == 0)
229 return;
231 assert(hasCFG() && "the function is expected to have CFG");
233 std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size());
234 std::vector<uint64_t> OpcodeHashes(BasicBlocks.size());
235 // Initialize hash components.
236 for (size_t I = 0; I < BasicBlocks.size(); I++) {
237 const BinaryBasicBlock *BB = BasicBlocks[I];
238 assert(BB->getIndex() == I && "incorrect block index");
239 BlendedHashes[I].Offset = BB->getOffset();
240 // Hashing complete instructions.
241 std::string InstrHashStr = hashBlock(
242 BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); });
243 uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr);
244 BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash);
245 // Hashing opcodes.
246 std::string OpcodeHashStr = hashBlockLoose(BC, *BB);
247 OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr);
248 BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]);
251 // Initialize neighbor hash.
252 for (size_t I = 0; I < BasicBlocks.size(); I++) {
253 const BinaryBasicBlock *BB = BasicBlocks[I];
254 // Append hashes of successors.
255 uint64_t Hash = 0;
256 for (BinaryBasicBlock *SuccBB : BB->successors()) {
257 uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()];
258 Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
260 BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash);
262 // Append hashes of predecessors.
263 Hash = 0;
264 for (BinaryBasicBlock *PredBB : BB->predecessors()) {
265 uint64_t PredHash = OpcodeHashes[PredBB->getIndex()];
266 Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
268 BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash);
271 // Assign hashes.
272 for (size_t I = 0; I < BasicBlocks.size(); I++) {
273 const BinaryBasicBlock *BB = BasicBlocks[I];
274 BB->setHash(BlendedHashes[I].combine());
278 /// Create a wrapper flow function to use with the profile inference algorithm,
279 /// and initialize its jumps and metadata.
280 FlowFunction
281 createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
282 FlowFunction Func;
284 // Add a special "dummy" source so that there is always a unique entry point.
285 // Because of the extra source, for all other blocks in FlowFunction it holds
286 // that Block.Index == BB->getIndex() + 1
287 FlowBlock EntryBlock;
288 EntryBlock.Index = 0;
289 Func.Blocks.push_back(EntryBlock);
291 // Create FlowBlock for every basic block in the binary function
292 for (const BinaryBasicBlock *BB : BlockOrder) {
293 Func.Blocks.emplace_back();
294 FlowBlock &Block = Func.Blocks.back();
295 Block.Index = Func.Blocks.size() - 1;
296 (void)BB;
297 assert(Block.Index == BB->getIndex() + 1 &&
298 "incorrectly assigned basic block index");
301 // Create FlowJump for each jump between basic blocks in the binary function
302 std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
303 for (const BinaryBasicBlock *SrcBB : BlockOrder) {
304 std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
305 // Collect regular jumps
306 for (const BinaryBasicBlock *DstBB : SrcBB->successors()) {
307 // Ignoring parallel edges
308 if (UniqueSuccs.find(DstBB) != UniqueSuccs.end())
309 continue;
311 Func.Jumps.emplace_back();
312 FlowJump &Jump = Func.Jumps.back();
313 Jump.Source = SrcBB->getIndex() + 1;
314 Jump.Target = DstBB->getIndex() + 1;
315 InDegree[Jump.Target]++;
316 UniqueSuccs.insert(DstBB);
318 // Collect jumps to landing pads
319 for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) {
320 // Ignoring parallel edges
321 if (UniqueSuccs.find(DstBB) != UniqueSuccs.end())
322 continue;
324 Func.Jumps.emplace_back();
325 FlowJump &Jump = Func.Jumps.back();
326 Jump.Source = SrcBB->getIndex() + 1;
327 Jump.Target = DstBB->getIndex() + 1;
328 InDegree[Jump.Target]++;
329 UniqueSuccs.insert(DstBB);
333 // Add dummy edges to the extra sources. If there are multiple entry blocks,
334 // add an unlikely edge from 0 to the subsequent ones
335 assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
336 for (uint64_t I = 1; I < Func.Blocks.size(); I++) {
337 const BinaryBasicBlock *BB = BlockOrder[I - 1];
338 if (BB->isEntryPoint() || InDegree[I] == 0) {
339 Func.Jumps.emplace_back();
340 FlowJump &Jump = Func.Jumps.back();
341 Jump.Source = 0;
342 Jump.Target = I;
343 if (!BB->isEntryPoint())
344 Jump.IsUnlikely = true;
348 // Create necessary metadata for the flow function
349 for (FlowJump &Jump : Func.Jumps) {
350 Func.Blocks.at(Jump.Source).SuccJumps.push_back(&Jump);
351 Func.Blocks.at(Jump.Target).PredJumps.push_back(&Jump);
353 return Func;
356 /// Assign initial block/jump weights based on the stale profile data. The goal
357 /// is to extract as much information from the stale profile as possible. Here
358 /// we assume that each basic block is specified via a hash value computed from
359 /// its content and the hashes of the unchanged basic blocks stay the same
360 /// across different revisions of the binary.
361 /// Whenever there is a count in the profile with the hash corresponding to one
362 /// of the basic blocks in the binary, the count is "matched" to the block.
363 /// Similarly, if both the source and the target of a count in the profile are
364 /// matched to a jump in the binary, the count is recorded in CFG.
365 void matchWeightsByHashes(BinaryContext &BC,
366 const BinaryFunction::BasicBlockOrderType &BlockOrder,
367 const yaml::bolt::BinaryFunctionProfile &YamlBF,
368 FlowFunction &Func) {
369 assert(Func.Blocks.size() == BlockOrder.size() + 1);
371 std::vector<FlowBlock *> Blocks;
372 std::vector<BlendedBlockHash> BlendedHashes;
373 for (uint64_t I = 0; I < BlockOrder.size(); I++) {
374 const BinaryBasicBlock *BB = BlockOrder[I];
375 assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
376 Blocks.push_back(&Func.Blocks[I + 1]);
377 BlendedBlockHash BlendedHash(BB->getHash());
378 BlendedHashes.push_back(BlendedHash);
379 LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
380 << Twine::utohexstr(BB->getHash()) << "\n");
382 StaleMatcher Matcher;
383 Matcher.init(Blocks, BlendedHashes);
385 // Index in yaml profile => corresponding (matched) block
386 DenseMap<uint64_t, const FlowBlock *> MatchedBlocks;
387 // Match blocks from the profile to the blocks in CFG
388 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
389 assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
390 BlendedBlockHash YamlHash(YamlBB.Hash);
391 const FlowBlock *MatchedBlock = Matcher.matchBlock(YamlHash);
392 // Always match the entry block.
393 if (MatchedBlock == nullptr && YamlBB.Index == 0)
394 MatchedBlock = Blocks[0];
395 if (MatchedBlock != nullptr) {
396 MatchedBlocks[YamlBB.Index] = MatchedBlock;
397 BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1];
398 LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBB.Index << ")"
399 << " with hash " << Twine::utohexstr(YamlBB.Hash)
400 << " to BB (index = " << MatchedBlock->Index - 1 << ")"
401 << " with hash " << Twine::utohexstr(BinHash.combine())
402 << "\n");
403 // Update matching stats accounting for the matched block.
404 if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) {
405 ++BC.Stats.NumMatchedBlocks;
406 BC.Stats.MatchedSampleCount += YamlBB.ExecCount;
407 LLVM_DEBUG(dbgs() << " exact match\n");
409 } else {
410 LLVM_DEBUG(
411 dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index << ")"
412 << " with hash " << Twine::utohexstr(YamlBB.Hash) << "\n");
415 // Update matching stats.
416 ++BC.Stats.NumStaleBlocks;
417 BC.Stats.StaleSampleCount += YamlBB.ExecCount;
420 // Match jumps from the profile to the jumps from CFG
421 std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
422 std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
423 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
424 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
425 if (YamlSI.Count == 0)
426 continue;
428 // Try to find the jump for a given (src, dst) pair from the profile and
429 // assign the jump weight based on the profile count
430 const uint64_t SrcIndex = YamlBB.Index;
431 const uint64_t DstIndex = YamlSI.Index;
433 const FlowBlock *MatchedSrcBlock = MatchedBlocks.lookup(SrcIndex);
434 const FlowBlock *MatchedDstBlock = MatchedBlocks.lookup(DstIndex);
436 if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) {
437 // Find a jump between the two blocks
438 FlowJump *Jump = nullptr;
439 for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) {
440 if (SuccJump->Target == MatchedDstBlock->Index) {
441 Jump = SuccJump;
442 break;
445 // Assign the weight, if the corresponding jump is found
446 if (Jump != nullptr) {
447 Jump->Weight = YamlSI.Count;
448 Jump->HasUnknownWeight = false;
451 // Assign the weight for the src block, if it is found
452 if (MatchedSrcBlock != nullptr)
453 OutWeight[MatchedSrcBlock->Index] += YamlSI.Count;
454 // Assign the weight for the dst block, if it is found
455 if (MatchedDstBlock != nullptr)
456 InWeight[MatchedDstBlock->Index] += YamlSI.Count;
460 // Assign block counts based on in-/out- jumps
461 for (FlowBlock &Block : Func.Blocks) {
462 if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) {
463 assert(Block.HasUnknownWeight && "unmatched block with a positive count");
464 continue;
466 Block.HasUnknownWeight = false;
467 Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
471 /// The function finds all blocks that are (i) reachable from the Entry block
472 /// and (ii) do not have a path to an exit, and marks all such blocks 'cold'
473 /// so that profi does not send any flow to such blocks.
474 void preprocessUnreachableBlocks(FlowFunction &Func) {
475 const uint64_t NumBlocks = Func.Blocks.size();
477 // Start bfs from the source
478 std::queue<uint64_t> Queue;
479 std::vector<bool> VisitedEntry(NumBlocks, false);
480 for (uint64_t I = 0; I < NumBlocks; I++) {
481 FlowBlock &Block = Func.Blocks[I];
482 if (Block.isEntry()) {
483 Queue.push(I);
484 VisitedEntry[I] = true;
485 break;
488 while (!Queue.empty()) {
489 const uint64_t Src = Queue.front();
490 Queue.pop();
491 for (FlowJump *Jump : Func.Blocks[Src].SuccJumps) {
492 const uint64_t Dst = Jump->Target;
493 if (!VisitedEntry[Dst]) {
494 Queue.push(Dst);
495 VisitedEntry[Dst] = true;
500 // Start bfs from all sinks
501 std::vector<bool> VisitedExit(NumBlocks, false);
502 for (uint64_t I = 0; I < NumBlocks; I++) {
503 FlowBlock &Block = Func.Blocks[I];
504 if (Block.isExit() && VisitedEntry[I]) {
505 Queue.push(I);
506 VisitedExit[I] = true;
509 while (!Queue.empty()) {
510 const uint64_t Src = Queue.front();
511 Queue.pop();
512 for (FlowJump *Jump : Func.Blocks[Src].PredJumps) {
513 const uint64_t Dst = Jump->Source;
514 if (!VisitedExit[Dst]) {
515 Queue.push(Dst);
516 VisitedExit[Dst] = true;
521 // Make all blocks of zero weight so that flow is not sent
522 for (uint64_t I = 0; I < NumBlocks; I++) {
523 FlowBlock &Block = Func.Blocks[I];
524 if (Block.Weight == 0)
525 continue;
526 if (!VisitedEntry[I] || !VisitedExit[I]) {
527 Block.Weight = 0;
528 Block.HasUnknownWeight = true;
529 Block.IsUnlikely = true;
530 for (FlowJump *Jump : Block.SuccJumps) {
531 if (Jump->Source == Block.Index && Jump->Target == Block.Index) {
532 Jump->Weight = 0;
533 Jump->HasUnknownWeight = true;
534 Jump->IsUnlikely = true;
541 /// Decide if stale profile matching can be applied for a given function.
542 /// Currently we skip inference for (very) large instances and for instances
543 /// having "unexpected" control flow (e.g., having no sink basic blocks).
544 bool canApplyInference(const FlowFunction &Func) {
545 if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
546 return false;
548 bool HasExitBlocks = llvm::any_of(
549 Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
550 if (!HasExitBlocks)
551 return false;
553 return true;
556 /// Apply the profile inference algorithm for a given flow function.
557 void applyInference(FlowFunction &Func) {
558 ProfiParams Params;
559 // Set the params from the command-line flags.
560 Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution;
561 Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown;
562 Params.JoinIslands = opts::StaleMatchingJoinIslands;
564 Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
565 Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
566 Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
567 Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
568 Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;
570 Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
571 Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
572 Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
573 Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
574 Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
575 Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;
577 applyFlowInference(Params, Func);
580 /// Collect inferred counts from the flow function and update annotations in
581 /// the binary function.
582 void assignProfile(BinaryFunction &BF,
583 const BinaryFunction::BasicBlockOrderType &BlockOrder,
584 FlowFunction &Func) {
585 BinaryContext &BC = BF.getBinaryContext();
587 assert(Func.Blocks.size() == BlockOrder.size() + 1);
588 for (uint64_t I = 0; I < BlockOrder.size(); I++) {
589 FlowBlock &Block = Func.Blocks[I + 1];
590 BinaryBasicBlock *BB = BlockOrder[I];
592 // Update block's count
593 BB->setExecutionCount(Block.Flow);
595 // Update jump counts: (i) clean existing counts and then (ii) set new ones
596 auto BI = BB->branch_info_begin();
597 for (const BinaryBasicBlock *DstBB : BB->successors()) {
598 (void)DstBB;
599 BI->Count = 0;
600 BI->MispredictedCount = 0;
601 ++BI;
603 for (FlowJump *Jump : Block.SuccJumps) {
604 if (Jump->IsUnlikely)
605 continue;
606 if (Jump->Flow == 0)
607 continue;
609 BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];
610 // Check if the edge corresponds to a regular jump or a landing pad
611 if (BB->getSuccessor(SuccBB.getLabel())) {
612 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(SuccBB);
613 BI.Count += Jump->Flow;
614 } else {
615 BinaryBasicBlock *LP = BB->getLandingPad(SuccBB.getLabel());
616 if (LP && LP->getKnownExecutionCount() < Jump->Flow)
617 LP->setExecutionCount(Jump->Flow);
621 // Update call-site annotations
622 auto setOrUpdateAnnotation = [&](MCInst &Instr, StringRef Name,
623 uint64_t Count) {
624 if (BC.MIB->hasAnnotation(Instr, Name))
625 BC.MIB->removeAnnotation(Instr, Name);
626 // Do not add zero-count annotations
627 if (Count == 0)
628 return;
629 BC.MIB->addAnnotation(Instr, Name, Count);
632 for (MCInst &Instr : *BB) {
633 // Ignore pseudo instructions
634 if (BC.MIB->isPseudo(Instr))
635 continue;
636 // Ignore jump tables
637 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
638 if (BC.MIB->getJumpTable(*LastInstr) && LastInstr == &Instr)
639 continue;
641 if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) {
642 auto &ICSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
643 Instr, "CallProfile");
644 if (!ICSP.empty()) {
645 // Try to evenly distribute the counts among the call sites
646 const uint64_t TotalCount = Block.Flow;
647 const uint64_t NumSites = ICSP.size();
648 for (uint64_t Idx = 0; Idx < ICSP.size(); Idx++) {
649 IndirectCallProfile &CSP = ICSP[Idx];
650 uint64_t CountPerSite = TotalCount / NumSites;
651 // When counts cannot be exactly distributed, increase by 1 the
652 // counts of the first (TotalCount % NumSites) call sites
653 if (Idx < TotalCount % NumSites)
654 CountPerSite++;
655 CSP.Count = CountPerSite;
657 } else {
658 ICSP.emplace_back(nullptr, Block.Flow, 0);
660 } else if (BC.MIB->getConditionalTailCall(Instr)) {
661 // We don't know exactly the number of times the conditional tail call
662 // is executed; conservatively, setting it to the count of the block
663 setOrUpdateAnnotation(Instr, "CTCTakenCount", Block.Flow);
664 BC.MIB->removeAnnotation(Instr, "CTCMispredCount");
665 } else if (BC.MIB->isCall(Instr)) {
666 setOrUpdateAnnotation(Instr, "Count", Block.Flow);
671 // Update function's execution count and mark the function inferred.
672 BF.setExecutionCount(Func.Blocks[0].Flow);
673 BF.setHasInferredProfile(true);
676 bool YAMLProfileReader::inferStaleProfile(
677 BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
678 LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for "
679 << "\"" << BF.getPrintName() << "\"\n");
681 // Make sure that block hashes are up to date.
682 BF.computeBlockHashes();
684 const BinaryFunction::BasicBlockOrderType BlockOrder(
685 BF.getLayout().block_begin(), BF.getLayout().block_end());
687 // Create a wrapper flow function to use with the profile inference algorithm.
688 FlowFunction Func = createFlowFunction(BlockOrder);
690 // Match as many block/jump counts from the stale profile as possible
691 matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
693 // Adjust the flow function by marking unreachable blocks Unlikely so that
694 // they don't get any counts assigned.
695 preprocessUnreachableBlocks(Func);
697 // Check if profile inference can be applied for the instance.
698 if (!canApplyInference(Func))
699 return false;
701 // Apply the profile inference algorithm.
702 applyInference(Func);
704 // Collect inferred counts and update function annotations.
705 assignProfile(BF, BlockOrder, Func);
707 // As of now, we always mark the binary function having "correct" profile.
708 // In the future, we may discard the results for instances with poor inference
709 // metrics and keep such functions un-optimized.
710 return true;
713 } // end namespace bolt
714 } // end namespace llvm