[libc] Switch to using the generic `<gpuintrin.h>` implementations (#121810)
[llvm-project.git] / bolt / lib / Profile / YAMLProfileWriter.cpp
blobe394858163560b4b9d6c6090d8578c1e4c7d715e
1 //===- bolt/Profile/YAMLProfileWriter.cpp - YAML profile serializer -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Profile/YAMLProfileWriter.h"
10 #include "bolt/Core/BinaryBasicBlock.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Profile/BoltAddressTranslation.h"
13 #include "bolt/Profile/DataAggregator.h"
14 #include "bolt/Profile/ProfileReaderBase.h"
15 #include "bolt/Rewrite/RewriteInstance.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/MC/MCPseudoProbe.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/raw_ostream.h"
22 #undef DEBUG_TYPE
23 #define DEBUG_TYPE "bolt-prof"
25 namespace opts {
26 using namespace llvm;
27 extern cl::opt<bool> ProfileUseDFS;
28 cl::opt<bool> ProfileWritePseudoProbes(
29 "profile-write-pseudo-probes",
30 cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
31 cl::cat(BoltOptCategory));
32 } // namespace opts
34 namespace llvm {
35 namespace bolt {
37 const BinaryFunction *YAMLProfileWriter::setCSIDestination(
38 const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
39 const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
40 uint32_t Offset) {
41 CSI.DestId = 0; // designated for unknown functions
42 CSI.EntryDiscriminator = 0;
44 if (Symbol) {
45 uint64_t EntryID = 0;
46 if (const BinaryFunction *Callee =
47 BC.getFunctionForSymbol(Symbol, &EntryID)) {
48 if (BAT && BAT->isBATFunction(Callee->getAddress()))
49 std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset);
50 else if (const BinaryBasicBlock *BB =
51 Callee->getBasicBlockContainingOffset(Offset))
52 BC.getFunctionForSymbol(Callee->getSecondaryEntryPointSymbol(*BB),
53 &EntryID);
54 CSI.DestId = Callee->getFunctionNumber();
55 CSI.EntryDiscriminator = EntryID;
56 return Callee;
59 return nullptr;
62 std::vector<YAMLProfileWriter::InlineTreeNode>
63 YAMLProfileWriter::collectInlineTree(
64 const MCPseudoProbeDecoder &Decoder,
65 const MCDecodedPseudoProbeInlineTree &Root) {
66 auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) {
67 return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash;
69 std::vector<InlineTreeNode> InlineTree(
70 {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}});
71 uint32_t ParentId = 0;
72 while (ParentId != InlineTree.size()) {
73 const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree;
74 for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren())
75 InlineTree.emplace_back(
76 InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId,
77 std::get<1>(Child.getInlineSite())});
78 ++ParentId;
81 return InlineTree;
84 std::tuple<yaml::bolt::ProfilePseudoProbeDesc,
85 YAMLProfileWriter::InlineTreeDesc>
86 YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
87 yaml::bolt::ProfilePseudoProbeDesc Desc;
88 InlineTreeDesc InlineTree;
90 for (const MCDecodedPseudoProbeInlineTree &TopLev :
91 Decoder.getDummyInlineRoot().getChildren())
92 InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
94 for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
95 ++InlineTree.HashIdxMap[FuncDesc.FuncHash];
97 InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size());
98 for (const auto &Node : Decoder.getInlineTreeVec())
99 ++InlineTree.GUIDIdxMap[Node.Guid];
101 std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec;
102 GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size());
103 for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap)
104 GUIDFreqVec.emplace_back(Cnt, GUID);
105 llvm::sort(GUIDFreqVec);
107 std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec;
108 HashFreqVec.reserve(InlineTree.HashIdxMap.size());
109 for (const auto [Hash, Cnt] : InlineTree.HashIdxMap)
110 HashFreqVec.emplace_back(Cnt, Hash);
111 llvm::sort(HashFreqVec);
113 uint32_t Index = 0;
114 Desc.Hash.reserve(HashFreqVec.size());
115 for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) {
116 Desc.Hash.emplace_back(Hash);
117 InlineTree.HashIdxMap[Hash] = Index++;
120 Index = 0;
121 Desc.GUID.reserve(GUIDFreqVec.size());
122 for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) {
123 Desc.GUID.emplace_back(GUID);
124 InlineTree.GUIDIdxMap[GUID] = Index++;
125 uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash;
126 Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]);
129 return {Desc, InlineTree};
132 std::vector<yaml::bolt::PseudoProbeInfo>
133 YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
134 struct BlockProbeInfoHasher {
135 size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
136 auto HashCombine = [](auto &Range) {
137 return llvm::hash_combine_range(Range.begin(), Range.end());
139 return llvm::hash_combine(HashCombine(BPI.BlockProbes),
140 HashCombine(BPI.CallProbes),
141 HashCombine(BPI.IndCallProbes));
145 // Check identical BlockProbeInfo structs and merge them
146 std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
147 BlockProbeInfoHasher>
148 BPIToNodes;
149 for (auto &[NodeId, Probes] : NodeProbes) {
150 yaml::bolt::PseudoProbeInfo BPI;
151 BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
152 BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
153 BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
154 BPIToNodes[BPI].push_back(NodeId);
157 auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
158 for (auto Id : Ids)
159 if (Id > 64)
160 Vec.emplace_back(Id);
161 else
162 Mask |= 1ull << (Id - 1);
165 // Add to YAML with merged nodes/block mask optimizations
166 std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
167 YamlProbes.reserve(BPIToNodes.size());
168 for (const auto &[BPI, Nodes] : BPIToNodes) {
169 auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
170 YamlBPI.CallProbes = BPI.CallProbes;
171 YamlBPI.IndCallProbes = BPI.IndCallProbes;
172 if (Nodes.size() == 1)
173 YamlBPI.InlineTreeIndex = Nodes.front();
174 else
175 YamlBPI.InlineTreeNodes = Nodes;
176 handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
178 return YamlProbes;
181 std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
182 YAMLProfileWriter::InlineTreeMapTy>
183 YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
184 const InlineTreeDesc &InlineTree,
185 uint64_t GUID) {
186 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
187 std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
188 auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
189 if (It == InlineTree.TopLevelGUIDToInlineTree.end())
190 return {YamlInlineTree, InlineTreeNodeId};
191 const MCDecodedPseudoProbeInlineTree *Root = It->second;
192 assert(Root && "Malformed TopLevelGUIDToInlineTree");
193 uint32_t Index = 0;
194 uint32_t PrevParent = 0;
195 uint32_t PrevGUIDIdx = 0;
196 for (const auto &Node : collectInlineTree(Decoder, *Root)) {
197 InlineTreeNodeId[Node.InlineTree] = Index++;
198 auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID);
199 assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap");
200 uint32_t GUIDIdx = GUIDIdxIt->second;
201 if (GUIDIdx == PrevGUIDIdx)
202 GUIDIdx = UINT32_MAX;
203 else
204 PrevGUIDIdx = GUIDIdx;
205 YamlInlineTree.emplace_back(yaml::bolt::InlineTreeNode{
206 Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx, 0, 0});
207 PrevParent = Node.ParentId;
209 return {YamlInlineTree, InlineTreeNodeId};
212 yaml::bolt::BinaryFunctionProfile
213 YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
214 const InlineTreeDesc &InlineTree,
215 const BoltAddressTranslation *BAT) {
216 yaml::bolt::BinaryFunctionProfile YamlBF;
217 const BinaryContext &BC = BF.getBinaryContext();
218 const MCPseudoProbeDecoder *PseudoProbeDecoder =
219 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
221 const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
223 // Prepare function and block hashes
224 BF.computeHash(UseDFS);
225 BF.computeBlockHashes();
227 YamlBF.Name = DataAggregator::getLocationName(BF, BAT);
228 YamlBF.Id = BF.getFunctionNumber();
229 YamlBF.Hash = BF.getHash();
230 YamlBF.NumBasicBlocks = BF.size();
231 YamlBF.ExecCount = BF.getKnownExecutionCount();
232 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
233 if (PseudoProbeDecoder && BF.getGUID()) {
234 std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
235 convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID());
238 BinaryFunction::BasicBlockOrderType Order;
239 llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
240 std::back_inserter(Order));
242 const FunctionLayout Layout = BF.getLayout();
243 Layout.updateLayoutIndices(Order);
245 for (const BinaryBasicBlock *BB : Order) {
246 yaml::bolt::BinaryBasicBlockProfile YamlBB;
247 YamlBB.Index = BB->getLayoutIndex();
248 YamlBB.NumInstructions = BB->getNumNonPseudos();
249 YamlBB.Hash = BB->getHash();
251 if (!LBRProfile) {
252 YamlBB.EventCount = BB->getKnownExecutionCount();
253 if (YamlBB.EventCount)
254 YamlBF.Blocks.emplace_back(YamlBB);
255 continue;
258 YamlBB.ExecCount = BB->getKnownExecutionCount();
260 for (const MCInst &Instr : *BB) {
261 if (!BC.MIB->isCall(Instr) && !BC.MIB->isIndirectBranch(Instr))
262 continue;
264 SmallVector<std::pair<StringRef, yaml::bolt::CallSiteInfo>> CSTargets;
265 yaml::bolt::CallSiteInfo CSI;
266 std::optional<uint32_t> Offset = BC.MIB->getOffset(Instr);
267 if (!Offset || *Offset < BB->getInputOffset())
268 continue;
269 CSI.Offset = *Offset - BB->getInputOffset();
271 if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) {
272 const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
273 Instr, "CallProfile");
274 if (!ICSP)
275 continue;
276 for (const IndirectCallProfile &CSP : ICSP.get()) {
277 StringRef TargetName = "";
278 const BinaryFunction *Callee =
279 setCSIDestination(BC, CSI, CSP.Symbol, BAT);
280 if (Callee)
281 TargetName = Callee->getOneName();
282 CSI.Count = CSP.Count;
283 CSI.Mispreds = CSP.Mispreds;
284 CSTargets.emplace_back(TargetName, CSI);
286 } else { // direct call or a tail call
287 StringRef TargetName = "";
288 const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr);
289 const BinaryFunction *const Callee =
290 setCSIDestination(BC, CSI, CalleeSymbol, BAT);
291 if (Callee)
292 TargetName = Callee->getOneName();
294 auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) {
295 return BC.MIB->getAnnotationWithDefault(Instr, Ann, 0ull);
297 if (BC.MIB->getConditionalTailCall(Instr)) {
298 CSI.Count = getAnnotationWithDefault(Instr, "CTCTakenCount");
299 CSI.Mispreds = getAnnotationWithDefault(Instr, "CTCMispredCount");
300 } else {
301 CSI.Count = getAnnotationWithDefault(Instr, "Count");
304 if (CSI.Count)
305 CSTargets.emplace_back(TargetName, CSI);
307 // Sort targets in a similar way to getBranchData, see Location::operator<
308 llvm::sort(CSTargets, [](const auto &RHS, const auto &LHS) {
309 if (RHS.first != LHS.first)
310 return RHS.first < LHS.first;
311 return RHS.second.Offset < LHS.second.Offset;
313 for (auto &KV : CSTargets)
314 YamlBB.CallSites.push_back(KV.second);
317 // Skip printing if there's no profile data for non-entry basic block.
318 // Include landing pads with non-zero execution count.
319 if (YamlBB.CallSites.empty() && !BB->isEntryPoint() &&
320 !(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) {
321 // Include blocks having successors or predecessors with positive counts.
322 uint64_t SuccessorExecCount = 0;
323 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
324 BB->branch_info())
325 SuccessorExecCount += BranchInfo.Count;
326 uint64_t PredecessorExecCount = 0;
327 for (auto Pred : BB->predecessors())
328 PredecessorExecCount += Pred->getBranchInfo(*BB).Count;
329 if (!SuccessorExecCount && !PredecessorExecCount)
330 continue;
333 auto BranchInfo = BB->branch_info_begin();
334 for (const BinaryBasicBlock *Successor : BB->successors()) {
335 yaml::bolt::SuccessorInfo YamlSI;
336 YamlSI.Index = Successor->getLayoutIndex();
337 YamlSI.Count = BranchInfo->Count;
338 YamlSI.Mispreds = BranchInfo->MispredictedCount;
340 YamlBB.Successors.emplace_back(YamlSI);
342 ++BranchInfo;
345 if (PseudoProbeDecoder) {
346 const AddressProbesMap &ProbeMap =
347 PseudoProbeDecoder->getAddress2ProbesMap();
348 const uint64_t FuncAddr = BF.getAddress();
349 const std::pair<uint64_t, uint64_t> &BlockRange =
350 BB->getInputAddressRange();
351 const std::pair<uint64_t, uint64_t> BlockAddrRange = {
352 FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
353 auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
354 YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
357 YamlBF.Blocks.emplace_back(YamlBB);
359 return YamlBF;
362 std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
363 const BinaryContext &BC = RI.getBinaryContext();
364 const auto &Functions = BC.getBinaryFunctions();
366 std::error_code EC;
367 OS = std::make_unique<raw_fd_ostream>(Filename, EC, sys::fs::OF_None);
368 if (EC) {
369 errs() << "BOLT-WARNING: " << EC.message() << " : unable to open "
370 << Filename << " for output.\n";
371 return EC;
374 yaml::bolt::BinaryProfile BP;
376 // Fill out the header info.
377 BP.Header.Version = 1;
378 BP.Header.FileName = std::string(BC.getFilename());
379 std::optional<StringRef> BuildID = BC.getFileBuildID();
380 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
381 BP.Header.Origin = std::string(RI.getProfileReader()->getReaderName());
382 BP.Header.IsDFSOrder = opts::ProfileUseDFS;
383 BP.Header.HashFunction = HashFunction::Default;
385 StringSet<> EventNames = RI.getProfileReader()->getEventNames();
386 if (!EventNames.empty()) {
387 std::string Sep;
388 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) {
389 BP.Header.EventNames += Sep + EventEntry.first().str();
390 Sep = ",";
394 // Make sure the profile is consistent across all functions.
395 uint16_t ProfileFlags = BinaryFunction::PF_NONE;
396 for (const auto &BFI : Functions) {
397 const BinaryFunction &BF = BFI.second;
398 if (BF.hasProfile() && !BF.empty()) {
399 assert(BF.getProfileFlags() != BinaryFunction::PF_NONE);
400 if (ProfileFlags == BinaryFunction::PF_NONE)
401 ProfileFlags = BF.getProfileFlags();
403 assert(BF.getProfileFlags() == ProfileFlags &&
404 "expected consistent profile flags across all functions");
407 BP.Header.Flags = ProfileFlags;
409 // Add probe inline tree nodes.
410 InlineTreeDesc InlineTree;
411 if (const MCPseudoProbeDecoder *Decoder =
412 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
413 std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder);
415 // Add all function objects.
416 for (const auto &BFI : Functions) {
417 const BinaryFunction &BF = BFI.second;
418 if (BF.hasProfile()) {
419 if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
420 continue;
422 BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree));
426 // Write the profile.
427 yaml::Output Out(*OS, nullptr, 0);
428 Out << BP;
430 return std::error_code();
433 } // namespace bolt
434 } // namespace llvm