1 //===- bolt/Profile/YAMLProfileWriter.cpp - YAML profile serializer -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Profile/YAMLProfileWriter.h"
10 #include "bolt/Core/BinaryBasicBlock.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Profile/BoltAddressTranslation.h"
13 #include "bolt/Profile/DataAggregator.h"
14 #include "bolt/Profile/ProfileReaderBase.h"
15 #include "bolt/Rewrite/RewriteInstance.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/MC/MCPseudoProbe.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/raw_ostream.h"
23 #define DEBUG_TYPE "bolt-prof"
27 extern cl::opt
<bool> ProfileUseDFS
;
28 cl::opt
<bool> ProfileWritePseudoProbes(
29 "profile-write-pseudo-probes",
30 cl::desc("Use pseudo probes in profile generation"), cl::Hidden
,
31 cl::cat(BoltOptCategory
));
37 const BinaryFunction
*YAMLProfileWriter::setCSIDestination(
38 const BinaryContext
&BC
, yaml::bolt::CallSiteInfo
&CSI
,
39 const MCSymbol
*Symbol
, const BoltAddressTranslation
*BAT
,
41 CSI
.DestId
= 0; // designated for unknown functions
42 CSI
.EntryDiscriminator
= 0;
46 if (const BinaryFunction
*Callee
=
47 BC
.getFunctionForSymbol(Symbol
, &EntryID
)) {
48 if (BAT
&& BAT
->isBATFunction(Callee
->getAddress()))
49 std::tie(Callee
, EntryID
) = BAT
->translateSymbol(BC
, *Symbol
, Offset
);
50 else if (const BinaryBasicBlock
*BB
=
51 Callee
->getBasicBlockContainingOffset(Offset
))
52 BC
.getFunctionForSymbol(Callee
->getSecondaryEntryPointSymbol(*BB
),
54 CSI
.DestId
= Callee
->getFunctionNumber();
55 CSI
.EntryDiscriminator
= EntryID
;
62 std::vector
<YAMLProfileWriter::InlineTreeNode
>
63 YAMLProfileWriter::collectInlineTree(
64 const MCPseudoProbeDecoder
&Decoder
,
65 const MCDecodedPseudoProbeInlineTree
&Root
) {
66 auto getHash
= [&](const MCDecodedPseudoProbeInlineTree
&Node
) {
67 return Decoder
.getFuncDescForGUID(Node
.Guid
)->FuncHash
;
69 std::vector
<InlineTreeNode
> InlineTree(
70 {InlineTreeNode
{&Root
, Root
.Guid
, getHash(Root
), 0, 0}});
71 uint32_t ParentId
= 0;
72 while (ParentId
!= InlineTree
.size()) {
73 const MCDecodedPseudoProbeInlineTree
*Cur
= InlineTree
[ParentId
].InlineTree
;
74 for (const MCDecodedPseudoProbeInlineTree
&Child
: Cur
->getChildren())
75 InlineTree
.emplace_back(
76 InlineTreeNode
{&Child
, Child
.Guid
, getHash(Child
), ParentId
,
77 std::get
<1>(Child
.getInlineSite())});
84 std::tuple
<yaml::bolt::ProfilePseudoProbeDesc
,
85 YAMLProfileWriter::InlineTreeDesc
>
86 YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder
&Decoder
) {
87 yaml::bolt::ProfilePseudoProbeDesc Desc
;
88 InlineTreeDesc InlineTree
;
90 for (const MCDecodedPseudoProbeInlineTree
&TopLev
:
91 Decoder
.getDummyInlineRoot().getChildren())
92 InlineTree
.TopLevelGUIDToInlineTree
[TopLev
.Guid
] = &TopLev
;
94 for (const auto &FuncDesc
: Decoder
.getGUID2FuncDescMap())
95 ++InlineTree
.HashIdxMap
[FuncDesc
.FuncHash
];
97 InlineTree
.GUIDIdxMap
.reserve(Decoder
.getGUID2FuncDescMap().size());
98 for (const auto &Node
: Decoder
.getInlineTreeVec())
99 ++InlineTree
.GUIDIdxMap
[Node
.Guid
];
101 std::vector
<std::pair
<uint32_t, uint64_t>> GUIDFreqVec
;
102 GUIDFreqVec
.reserve(InlineTree
.GUIDIdxMap
.size());
103 for (const auto [GUID
, Cnt
] : InlineTree
.GUIDIdxMap
)
104 GUIDFreqVec
.emplace_back(Cnt
, GUID
);
105 llvm::sort(GUIDFreqVec
);
107 std::vector
<std::pair
<uint32_t, uint64_t>> HashFreqVec
;
108 HashFreqVec
.reserve(InlineTree
.HashIdxMap
.size());
109 for (const auto [Hash
, Cnt
] : InlineTree
.HashIdxMap
)
110 HashFreqVec
.emplace_back(Cnt
, Hash
);
111 llvm::sort(HashFreqVec
);
114 Desc
.Hash
.reserve(HashFreqVec
.size());
115 for (uint64_t Hash
: llvm::make_second_range(llvm::reverse(HashFreqVec
))) {
116 Desc
.Hash
.emplace_back(Hash
);
117 InlineTree
.HashIdxMap
[Hash
] = Index
++;
121 Desc
.GUID
.reserve(GUIDFreqVec
.size());
122 for (uint64_t GUID
: llvm::make_second_range(llvm::reverse(GUIDFreqVec
))) {
123 Desc
.GUID
.emplace_back(GUID
);
124 InlineTree
.GUIDIdxMap
[GUID
] = Index
++;
125 uint64_t Hash
= Decoder
.getFuncDescForGUID(GUID
)->FuncHash
;
126 Desc
.GUIDHashIdx
.emplace_back(InlineTree
.HashIdxMap
[Hash
]);
129 return {Desc
, InlineTree
};
132 std::vector
<yaml::bolt::PseudoProbeInfo
>
133 YAMLProfileWriter::convertNodeProbes(NodeIdToProbes
&NodeProbes
) {
134 struct BlockProbeInfoHasher
{
135 size_t operator()(const yaml::bolt::PseudoProbeInfo
&BPI
) const {
136 auto HashCombine
= [](auto &Range
) {
137 return llvm::hash_combine_range(Range
.begin(), Range
.end());
139 return llvm::hash_combine(HashCombine(BPI
.BlockProbes
),
140 HashCombine(BPI
.CallProbes
),
141 HashCombine(BPI
.IndCallProbes
));
145 // Check identical BlockProbeInfo structs and merge them
146 std::unordered_map
<yaml::bolt::PseudoProbeInfo
, std::vector
<uint32_t>,
147 BlockProbeInfoHasher
>
149 for (auto &[NodeId
, Probes
] : NodeProbes
) {
150 yaml::bolt::PseudoProbeInfo BPI
;
151 BPI
.BlockProbes
= std::vector(Probes
[0].begin(), Probes
[0].end());
152 BPI
.IndCallProbes
= std::vector(Probes
[1].begin(), Probes
[1].end());
153 BPI
.CallProbes
= std::vector(Probes
[2].begin(), Probes
[2].end());
154 BPIToNodes
[BPI
].push_back(NodeId
);
157 auto handleMask
= [](const auto &Ids
, auto &Vec
, auto &Mask
) {
160 Vec
.emplace_back(Id
);
162 Mask
|= 1ull << (Id
- 1);
165 // Add to YAML with merged nodes/block mask optimizations
166 std::vector
<yaml::bolt::PseudoProbeInfo
> YamlProbes
;
167 YamlProbes
.reserve(BPIToNodes
.size());
168 for (const auto &[BPI
, Nodes
] : BPIToNodes
) {
169 auto &YamlBPI
= YamlProbes
.emplace_back(yaml::bolt::PseudoProbeInfo());
170 YamlBPI
.CallProbes
= BPI
.CallProbes
;
171 YamlBPI
.IndCallProbes
= BPI
.IndCallProbes
;
172 if (Nodes
.size() == 1)
173 YamlBPI
.InlineTreeIndex
= Nodes
.front();
175 YamlBPI
.InlineTreeNodes
= Nodes
;
176 handleMask(BPI
.BlockProbes
, YamlBPI
.BlockProbes
, YamlBPI
.BlockMask
);
181 std::tuple
<std::vector
<yaml::bolt::InlineTreeNode
>,
182 YAMLProfileWriter::InlineTreeMapTy
>
183 YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder
&Decoder
,
184 const InlineTreeDesc
&InlineTree
,
186 DenseMap
<const MCDecodedPseudoProbeInlineTree
*, uint32_t> InlineTreeNodeId
;
187 std::vector
<yaml::bolt::InlineTreeNode
> YamlInlineTree
;
188 auto It
= InlineTree
.TopLevelGUIDToInlineTree
.find(GUID
);
189 if (It
== InlineTree
.TopLevelGUIDToInlineTree
.end())
190 return {YamlInlineTree
, InlineTreeNodeId
};
191 const MCDecodedPseudoProbeInlineTree
*Root
= It
->second
;
192 assert(Root
&& "Malformed TopLevelGUIDToInlineTree");
194 uint32_t PrevParent
= 0;
195 uint32_t PrevGUIDIdx
= 0;
196 for (const auto &Node
: collectInlineTree(Decoder
, *Root
)) {
197 InlineTreeNodeId
[Node
.InlineTree
] = Index
++;
198 auto GUIDIdxIt
= InlineTree
.GUIDIdxMap
.find(Node
.GUID
);
199 assert(GUIDIdxIt
!= InlineTree
.GUIDIdxMap
.end() && "Malformed GUIDIdxMap");
200 uint32_t GUIDIdx
= GUIDIdxIt
->second
;
201 if (GUIDIdx
== PrevGUIDIdx
)
202 GUIDIdx
= UINT32_MAX
;
204 PrevGUIDIdx
= GUIDIdx
;
205 YamlInlineTree
.emplace_back(yaml::bolt::InlineTreeNode
{
206 Node
.ParentId
- PrevParent
, Node
.InlineSite
, GUIDIdx
, 0, 0});
207 PrevParent
= Node
.ParentId
;
209 return {YamlInlineTree
, InlineTreeNodeId
};
212 yaml::bolt::BinaryFunctionProfile
213 YAMLProfileWriter::convert(const BinaryFunction
&BF
, bool UseDFS
,
214 const InlineTreeDesc
&InlineTree
,
215 const BoltAddressTranslation
*BAT
) {
216 yaml::bolt::BinaryFunctionProfile YamlBF
;
217 const BinaryContext
&BC
= BF
.getBinaryContext();
218 const MCPseudoProbeDecoder
*PseudoProbeDecoder
=
219 opts::ProfileWritePseudoProbes
? BC
.getPseudoProbeDecoder() : nullptr;
221 const uint16_t LBRProfile
= BF
.getProfileFlags() & BinaryFunction::PF_LBR
;
223 // Prepare function and block hashes
224 BF
.computeHash(UseDFS
);
225 BF
.computeBlockHashes();
227 YamlBF
.Name
= DataAggregator::getLocationName(BF
, BAT
);
228 YamlBF
.Id
= BF
.getFunctionNumber();
229 YamlBF
.Hash
= BF
.getHash();
230 YamlBF
.NumBasicBlocks
= BF
.size();
231 YamlBF
.ExecCount
= BF
.getKnownExecutionCount();
232 DenseMap
<const MCDecodedPseudoProbeInlineTree
*, uint32_t> InlineTreeNodeId
;
233 if (PseudoProbeDecoder
&& BF
.getGUID()) {
234 std::tie(YamlBF
.InlineTree
, InlineTreeNodeId
) =
235 convertBFInlineTree(*PseudoProbeDecoder
, InlineTree
, BF
.getGUID());
238 BinaryFunction::BasicBlockOrderType Order
;
239 llvm::copy(UseDFS
? BF
.dfs() : BF
.getLayout().blocks(),
240 std::back_inserter(Order
));
242 const FunctionLayout Layout
= BF
.getLayout();
243 Layout
.updateLayoutIndices(Order
);
245 for (const BinaryBasicBlock
*BB
: Order
) {
246 yaml::bolt::BinaryBasicBlockProfile YamlBB
;
247 YamlBB
.Index
= BB
->getLayoutIndex();
248 YamlBB
.NumInstructions
= BB
->getNumNonPseudos();
249 YamlBB
.Hash
= BB
->getHash();
252 YamlBB
.EventCount
= BB
->getKnownExecutionCount();
253 if (YamlBB
.EventCount
)
254 YamlBF
.Blocks
.emplace_back(YamlBB
);
258 YamlBB
.ExecCount
= BB
->getKnownExecutionCount();
260 for (const MCInst
&Instr
: *BB
) {
261 if (!BC
.MIB
->isCall(Instr
) && !BC
.MIB
->isIndirectBranch(Instr
))
264 SmallVector
<std::pair
<StringRef
, yaml::bolt::CallSiteInfo
>> CSTargets
;
265 yaml::bolt::CallSiteInfo CSI
;
266 std::optional
<uint32_t> Offset
= BC
.MIB
->getOffset(Instr
);
267 if (!Offset
|| *Offset
< BB
->getInputOffset())
269 CSI
.Offset
= *Offset
- BB
->getInputOffset();
271 if (BC
.MIB
->isIndirectCall(Instr
) || BC
.MIB
->isIndirectBranch(Instr
)) {
272 const auto ICSP
= BC
.MIB
->tryGetAnnotationAs
<IndirectCallSiteProfile
>(
273 Instr
, "CallProfile");
276 for (const IndirectCallProfile
&CSP
: ICSP
.get()) {
277 StringRef TargetName
= "";
278 const BinaryFunction
*Callee
=
279 setCSIDestination(BC
, CSI
, CSP
.Symbol
, BAT
);
281 TargetName
= Callee
->getOneName();
282 CSI
.Count
= CSP
.Count
;
283 CSI
.Mispreds
= CSP
.Mispreds
;
284 CSTargets
.emplace_back(TargetName
, CSI
);
286 } else { // direct call or a tail call
287 StringRef TargetName
= "";
288 const MCSymbol
*CalleeSymbol
= BC
.MIB
->getTargetSymbol(Instr
);
289 const BinaryFunction
*const Callee
=
290 setCSIDestination(BC
, CSI
, CalleeSymbol
, BAT
);
292 TargetName
= Callee
->getOneName();
294 auto getAnnotationWithDefault
= [&](const MCInst
&Inst
, StringRef Ann
) {
295 return BC
.MIB
->getAnnotationWithDefault(Instr
, Ann
, 0ull);
297 if (BC
.MIB
->getConditionalTailCall(Instr
)) {
298 CSI
.Count
= getAnnotationWithDefault(Instr
, "CTCTakenCount");
299 CSI
.Mispreds
= getAnnotationWithDefault(Instr
, "CTCMispredCount");
301 CSI
.Count
= getAnnotationWithDefault(Instr
, "Count");
305 CSTargets
.emplace_back(TargetName
, CSI
);
307 // Sort targets in a similar way to getBranchData, see Location::operator<
308 llvm::sort(CSTargets
, [](const auto &RHS
, const auto &LHS
) {
309 if (RHS
.first
!= LHS
.first
)
310 return RHS
.first
< LHS
.first
;
311 return RHS
.second
.Offset
< LHS
.second
.Offset
;
313 for (auto &KV
: CSTargets
)
314 YamlBB
.CallSites
.push_back(KV
.second
);
317 // Skip printing if there's no profile data for non-entry basic block.
318 // Include landing pads with non-zero execution count.
319 if (YamlBB
.CallSites
.empty() && !BB
->isEntryPoint() &&
320 !(BB
->isLandingPad() && BB
->getKnownExecutionCount() != 0)) {
321 // Include blocks having successors or predecessors with positive counts.
322 uint64_t SuccessorExecCount
= 0;
323 for (const BinaryBasicBlock::BinaryBranchInfo
&BranchInfo
:
325 SuccessorExecCount
+= BranchInfo
.Count
;
326 uint64_t PredecessorExecCount
= 0;
327 for (auto Pred
: BB
->predecessors())
328 PredecessorExecCount
+= Pred
->getBranchInfo(*BB
).Count
;
329 if (!SuccessorExecCount
&& !PredecessorExecCount
)
333 auto BranchInfo
= BB
->branch_info_begin();
334 for (const BinaryBasicBlock
*Successor
: BB
->successors()) {
335 yaml::bolt::SuccessorInfo YamlSI
;
336 YamlSI
.Index
= Successor
->getLayoutIndex();
337 YamlSI
.Count
= BranchInfo
->Count
;
338 YamlSI
.Mispreds
= BranchInfo
->MispredictedCount
;
340 YamlBB
.Successors
.emplace_back(YamlSI
);
345 if (PseudoProbeDecoder
) {
346 const AddressProbesMap
&ProbeMap
=
347 PseudoProbeDecoder
->getAddress2ProbesMap();
348 const uint64_t FuncAddr
= BF
.getAddress();
349 const std::pair
<uint64_t, uint64_t> &BlockRange
=
350 BB
->getInputAddressRange();
351 const std::pair
<uint64_t, uint64_t> BlockAddrRange
= {
352 FuncAddr
+ BlockRange
.first
, FuncAddr
+ BlockRange
.second
};
353 auto Probes
= ProbeMap
.find(BlockAddrRange
.first
, BlockAddrRange
.second
);
354 YamlBB
.PseudoProbes
= writeBlockProbes(Probes
, InlineTreeNodeId
);
357 YamlBF
.Blocks
.emplace_back(YamlBB
);
362 std::error_code
YAMLProfileWriter::writeProfile(const RewriteInstance
&RI
) {
363 const BinaryContext
&BC
= RI
.getBinaryContext();
364 const auto &Functions
= BC
.getBinaryFunctions();
367 OS
= std::make_unique
<raw_fd_ostream
>(Filename
, EC
, sys::fs::OF_None
);
369 errs() << "BOLT-WARNING: " << EC
.message() << " : unable to open "
370 << Filename
<< " for output.\n";
374 yaml::bolt::BinaryProfile BP
;
376 // Fill out the header info.
377 BP
.Header
.Version
= 1;
378 BP
.Header
.FileName
= std::string(BC
.getFilename());
379 std::optional
<StringRef
> BuildID
= BC
.getFileBuildID();
380 BP
.Header
.Id
= BuildID
? std::string(*BuildID
) : "<unknown>";
381 BP
.Header
.Origin
= std::string(RI
.getProfileReader()->getReaderName());
382 BP
.Header
.IsDFSOrder
= opts::ProfileUseDFS
;
383 BP
.Header
.HashFunction
= HashFunction::Default
;
385 StringSet
<> EventNames
= RI
.getProfileReader()->getEventNames();
386 if (!EventNames
.empty()) {
388 for (const StringMapEntry
<std::nullopt_t
> &EventEntry
: EventNames
) {
389 BP
.Header
.EventNames
+= Sep
+ EventEntry
.first().str();
394 // Make sure the profile is consistent across all functions.
395 uint16_t ProfileFlags
= BinaryFunction::PF_NONE
;
396 for (const auto &BFI
: Functions
) {
397 const BinaryFunction
&BF
= BFI
.second
;
398 if (BF
.hasProfile() && !BF
.empty()) {
399 assert(BF
.getProfileFlags() != BinaryFunction::PF_NONE
);
400 if (ProfileFlags
== BinaryFunction::PF_NONE
)
401 ProfileFlags
= BF
.getProfileFlags();
403 assert(BF
.getProfileFlags() == ProfileFlags
&&
404 "expected consistent profile flags across all functions");
407 BP
.Header
.Flags
= ProfileFlags
;
409 // Add probe inline tree nodes.
410 InlineTreeDesc InlineTree
;
411 if (const MCPseudoProbeDecoder
*Decoder
=
412 opts::ProfileWritePseudoProbes
? BC
.getPseudoProbeDecoder() : nullptr)
413 std::tie(BP
.PseudoProbeDesc
, InlineTree
) = convertPseudoProbeDesc(*Decoder
);
415 // Add all function objects.
416 for (const auto &BFI
: Functions
) {
417 const BinaryFunction
&BF
= BFI
.second
;
418 if (BF
.hasProfile()) {
419 if (!BF
.hasValidProfile() && !RI
.getProfileReader()->isTrustedSource())
422 BP
.Functions
.emplace_back(convert(BF
, opts::ProfileUseDFS
, InlineTree
));
426 // Write the profile.
427 yaml::Output
Out(*OS
, nullptr, 0);
430 return std::error_code();