1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implementation of the basic block sections profile reader pass. It parses
10 // and stores the basic block sections profile file (which is specified via the
11 // `-basic-block-sections` flag).
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/LineIterator.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Path.h"
29 #include <llvm/ADT/STLExtras.h>
33 char BasicBlockSectionsProfileReaderWrapperPass::ID
= 0;
34 INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass
,
35 "bbsections-profile-reader",
36 "Reads and parses a basic block sections profile.", false,
40 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S
) const {
41 SmallVector
<StringRef
, 2> Parts
;
44 return createProfileParseError(Twine("unable to parse basic block id: '") +
46 unsigned long long BaseBBID
;
47 if (getAsUnsignedInteger(Parts
[0], 10, BaseBBID
))
48 return createProfileParseError(
49 Twine("unable to parse BB id: '" + Parts
[0]) +
50 "': unsigned integer expected");
51 unsigned long long CloneID
= 0;
52 if (Parts
.size() > 1 && getAsUnsignedInteger(Parts
[1], 10, CloneID
))
53 return createProfileParseError(Twine("unable to parse clone id: '") +
54 Parts
[1] + "': unsigned integer expected");
55 return UniqueBBID
{static_cast<unsigned>(BaseBBID
),
56 static_cast<unsigned>(CloneID
)};
59 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName
) const {
60 return getClusterInfoForFunction(FuncName
).first
;
63 std::pair
<bool, SmallVector
<BBClusterInfo
>>
64 BasicBlockSectionsProfileReader::getClusterInfoForFunction(
65 StringRef FuncName
) const {
66 auto R
= ProgramPathAndClusterInfo
.find(getAliasName(FuncName
));
67 return R
!= ProgramPathAndClusterInfo
.end()
68 ? std::pair(true, R
->second
.ClusterInfo
)
69 : std::pair(false, SmallVector
<BBClusterInfo
>());
72 SmallVector
<SmallVector
<unsigned>>
73 BasicBlockSectionsProfileReader::getClonePathsForFunction(
74 StringRef FuncName
) const {
75 return ProgramPathAndClusterInfo
.lookup(getAliasName(FuncName
)).ClonePaths
;
78 // Reads the version 1 basic block sections profile. Profile for each function
79 // is encoded as follows:
81 // f <function_name_1> <function_name_2> ...
82 // c <bb_id_1> <bb_id_2> <bb_id_3>
83 // c <bb_id_4> <bb_id_5>
85 // Module name specifier (starting with 'm') is optional and allows
86 // distinguishing profile for internal-linkage functions with the same name. If
87 // not specified, it will apply to any function with the same name. Function
88 // name specifier (starting with 'f') can specify multiple function name
89 // aliases. Basic block clusters are specified by 'c' and specify the cluster of
90 // basic blocks, and the internal order in which they must be placed in the same
92 // This profile can also specify cloning paths which instruct the compiler to
93 // clone basic blocks along a path. The cloned blocks are then specified in the
94 // cluster information.
95 // The following profile lists two cloning paths (starting with 'p') for
96 // function bar and places the total 9 blocks within two clusters. The first two
97 // blocks of a cloning path specify the edge along which the path is cloned. For
98 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
99 // the edge 1->3. Within the given clusters, each cloned block is identified by
100 // "<original block id>.<clone id>". For instance, 3.1 represents the first
101 // clone of block 3. Original blocks are specified just with their block ids. A
102 // block cloned multiple times appears with distinct clone ids. The CFG for bar
103 // is shown below before and after cloning with its final clusters labeled.
107 // p 1 3 4 # cloning path 1
108 // p 4 2 # cloning path 2
109 // c 1 3.1 4.1 6 # basic block cluster 1
110 // c 0 2 3 4 2.1 5 # basic block cluster 2
111 // ****************************************************************************
112 // function bar before and after cloning with basic block clusters shown.
113 // ****************************************************************************
114 // .... ..............
115 // 0 -------+ : 0 :---->: 1 ---> 3.1 :
116 // | | : | : :........ | :
118 // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1
119 // | | | | : | : : | :
120 // | v | | : v ....... : v :
121 // | 3 <------+ | : 3 <--+ : : 6 :
122 // | | | : | | : :....:
124 // +--- 4 ---> 6 | : 4 | :
127 // | :2.1---+ : cluster 2
132 // ****************************************************************************
133 Error
BasicBlockSectionsProfileReader::ReadV1Profile() {
134 auto FI
= ProgramPathAndClusterInfo
.end();
136 // Current cluster ID corresponding to this function.
137 unsigned CurrentCluster
= 0;
138 // Current position in the current cluster.
139 unsigned CurrentPosition
= 0;
141 // Temporary set to ensure every basic block ID appears once in the clusters
143 DenseSet
<UniqueBBID
> FuncBBIDs
;
145 // Debug-info-based module filename for the current function. Empty string
146 // means no filename.
147 StringRef DIFilename
;
149 for (; !LineIt
.is_at_eof(); ++LineIt
) {
150 StringRef
S(*LineIt
);
151 char Specifier
= S
[0];
152 S
= S
.drop_front().trim();
153 SmallVector
<StringRef
, 4> Values
;
154 S
.split(Values
, ' ');
158 case 'm': // Module name speicifer.
159 if (Values
.size() != 1) {
160 return createProfileParseError(Twine("invalid module name value: '") +
163 DIFilename
= sys::path::remove_leading_dotslash(Values
[0]);
165 case 'f': { // Function names specifier.
166 bool FunctionFound
= any_of(Values
, [&](StringRef Alias
) {
167 auto It
= FunctionNameToDIFilename
.find(Alias
);
168 // No match if this function name is not found in this module.
169 if (It
== FunctionNameToDIFilename
.end())
171 // Return a match if debug-info-filename is not specified. Otherwise,
172 // check for equality.
173 return DIFilename
.empty() || It
->second
.equals(DIFilename
);
175 if (!FunctionFound
) {
176 // Skip the following profile by setting the profile iterator (FI) to
177 // the past-the-end element.
178 FI
= ProgramPathAndClusterInfo
.end();
182 for (size_t i
= 1; i
< Values
.size(); ++i
)
183 FuncAliasMap
.try_emplace(Values
[i
], Values
.front());
185 // Prepare for parsing clusters of this function name.
186 // Start a new cluster map for this function name.
187 auto R
= ProgramPathAndClusterInfo
.try_emplace(Values
.front());
188 // Report error when multiple profiles have been specified for the same
191 return createProfileParseError("duplicate profile for function '" +
192 Values
.front() + "'");
196 // We won't need DIFilename anymore. Clean it up to avoid its application
197 // on the next function.
201 case 'c': // Basic block cluster specifier.
202 // Skip the profile when we the profile iterator (FI) refers to the
203 // past-the-end element.
204 if (FI
== ProgramPathAndClusterInfo
.end())
206 // Reset current cluster position.
208 for (auto BasicBlockIDStr
: Values
) {
209 auto BasicBlockID
= parseUniqueBBID(BasicBlockIDStr
);
211 return BasicBlockID
.takeError();
212 if (!FuncBBIDs
.insert(*BasicBlockID
).second
)
213 return createProfileParseError(
214 Twine("duplicate basic block id found '") + BasicBlockIDStr
+
217 FI
->second
.ClusterInfo
.emplace_back(BBClusterInfo
{
218 *std::move(BasicBlockID
), CurrentCluster
, CurrentPosition
++});
222 case 'p': { // Basic block cloning path specifier.
223 // Skip the profile when we the profile iterator (FI) refers to the
224 // past-the-end element.
225 if (FI
== ProgramPathAndClusterInfo
.end())
227 SmallSet
<unsigned, 5> BBsInPath
;
228 FI
->second
.ClonePaths
.push_back({});
229 for (size_t I
= 0; I
< Values
.size(); ++I
) {
230 auto BaseBBIDStr
= Values
[I
];
231 unsigned long long BaseBBID
= 0;
232 if (getAsUnsignedInteger(BaseBBIDStr
, 10, BaseBBID
))
233 return createProfileParseError(Twine("unsigned integer expected: '") +
235 if (I
!= 0 && !BBsInPath
.insert(BaseBBID
).second
)
236 return createProfileParseError(
237 Twine("duplicate cloned block in path: '") + BaseBBIDStr
+ "'");
238 FI
->second
.ClonePaths
.back().push_back(BaseBBID
);
243 return createProfileParseError(Twine("invalid specifier: '") +
244 Twine(Specifier
) + "'");
246 llvm_unreachable("should not break from this switch statement");
248 return Error::success();
251 Error
BasicBlockSectionsProfileReader::ReadV0Profile() {
252 auto FI
= ProgramPathAndClusterInfo
.end();
253 // Current cluster ID corresponding to this function.
254 unsigned CurrentCluster
= 0;
255 // Current position in the current cluster.
256 unsigned CurrentPosition
= 0;
258 // Temporary set to ensure every basic block ID appears once in the clusters
260 SmallSet
<unsigned, 4> FuncBBIDs
;
262 for (; !LineIt
.is_at_eof(); ++LineIt
) {
263 StringRef
S(*LineIt
);
266 // Check for the leading "!"
267 if (!S
.consume_front("!") || S
.empty())
269 // Check for second "!" which indicates a cluster of basic blocks.
270 if (S
.consume_front("!")) {
271 // Skip the profile when we the profile iterator (FI) refers to the
272 // past-the-end element.
273 if (FI
== ProgramPathAndClusterInfo
.end())
275 SmallVector
<StringRef
, 4> BBIDs
;
277 // Reset current cluster position.
279 for (auto BBIDStr
: BBIDs
) {
280 unsigned long long BBID
;
281 if (getAsUnsignedInteger(BBIDStr
, 10, BBID
))
282 return createProfileParseError(Twine("unsigned integer expected: '") +
284 if (!FuncBBIDs
.insert(BBID
).second
)
285 return createProfileParseError(
286 Twine("duplicate basic block id found '") + BBIDStr
+ "'");
288 FI
->second
.ClusterInfo
.emplace_back(
289 BBClusterInfo({{static_cast<unsigned>(BBID
), 0},
291 CurrentPosition
++}));
295 // This is a function name specifier. It may include a debug info filename
296 // specifier starting with `M=`.
297 auto [AliasesStr
, DIFilenameStr
] = S
.split(' ');
298 SmallString
<128> DIFilename
;
299 if (DIFilenameStr
.starts_with("M=")) {
301 sys::path::remove_leading_dotslash(DIFilenameStr
.substr(2));
302 if (DIFilename
.empty())
303 return createProfileParseError("empty module name specifier");
304 } else if (!DIFilenameStr
.empty()) {
305 return createProfileParseError("unknown string found: '" +
306 DIFilenameStr
+ "'");
308 // Function aliases are separated using '/'. We use the first function
309 // name for the cluster info mapping and delegate all other aliases to
311 SmallVector
<StringRef
, 4> Aliases
;
312 AliasesStr
.split(Aliases
, '/');
313 bool FunctionFound
= any_of(Aliases
, [&](StringRef Alias
) {
314 auto It
= FunctionNameToDIFilename
.find(Alias
);
315 // No match if this function name is not found in this module.
316 if (It
== FunctionNameToDIFilename
.end())
318 // Return a match if debug-info-filename is not specified. Otherwise,
319 // check for equality.
320 return DIFilename
.empty() || It
->second
.equals(DIFilename
);
322 if (!FunctionFound
) {
323 // Skip the following profile by setting the profile iterator (FI) to
324 // the past-the-end element.
325 FI
= ProgramPathAndClusterInfo
.end();
328 for (size_t i
= 1; i
< Aliases
.size(); ++i
)
329 FuncAliasMap
.try_emplace(Aliases
[i
], Aliases
.front());
331 // Prepare for parsing clusters of this function name.
332 // Start a new cluster map for this function name.
333 auto R
= ProgramPathAndClusterInfo
.try_emplace(Aliases
.front());
334 // Report error when multiple profiles have been specified for the same
337 return createProfileParseError("duplicate profile for function '" +
338 Aliases
.front() + "'");
344 return Error::success();
347 // Basic Block Sections can be enabled for a subset of machine basic blocks.
348 // This is done by passing a file containing names of functions for which basic
349 // block sections are desired. Additionally, machine basic block ids of the
350 // functions can also be specified for a finer granularity. Moreover, a cluster
351 // of basic blocks could be assigned to the same section.
352 // Optionally, a debug-info filename can be specified for each function to allow
353 // distinguishing internal-linkage functions of the same name.
354 // A file with basic block sections for all of function main and three blocks
355 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
356 // (Profile for function foo is only loaded when its debug-info filename
357 // matches 'path/to/foo_file.cc').
358 // ----------------------------
361 // !foo M=path/to/foo_file.cc
364 Error
BasicBlockSectionsProfileReader::ReadProfile() {
367 unsigned long long Version
= 0;
368 StringRef
FirstLine(*LineIt
);
369 if (FirstLine
.consume_front("v")) {
370 if (getAsUnsignedInteger(FirstLine
, 10, Version
)) {
371 return createProfileParseError(Twine("version number expected: '") +
375 return createProfileParseError(Twine("invalid profile version: ") +
383 // TODO: Deprecate V0 once V1 is fully integrated downstream.
384 return ReadV0Profile();
386 return ReadV1Profile();
388 llvm_unreachable("Invalid profile version.");
392 bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module
&M
) {
395 // Get the function name to debug info filename mapping.
396 BBSPR
.FunctionNameToDIFilename
.clear();
397 for (const Function
&F
: M
) {
398 SmallString
<128> DIFilename
;
399 if (F
.isDeclaration())
401 DISubprogram
*Subprogram
= F
.getSubprogram();
403 llvm::DICompileUnit
*CU
= Subprogram
->getUnit();
405 DIFilename
= sys::path::remove_leading_dotslash(CU
->getFilename());
407 [[maybe_unused
]] bool inserted
=
408 BBSPR
.FunctionNameToDIFilename
.try_emplace(F
.getName(), DIFilename
)
412 if (auto Err
= BBSPR
.ReadProfile())
413 report_fatal_error(std::move(Err
));
417 AnalysisKey
BasicBlockSectionsProfileReaderAnalysis::Key
;
419 BasicBlockSectionsProfileReader
420 BasicBlockSectionsProfileReaderAnalysis::run(Function
&F
,
421 FunctionAnalysisManager
&AM
) {
422 return BasicBlockSectionsProfileReader(TM
->getBBSectionsFuncListBuf());
425 bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot(
426 StringRef FuncName
) const {
427 return BBSPR
.isFunctionHot(FuncName
);
430 std::pair
<bool, SmallVector
<BBClusterInfo
>>
431 BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction(
432 StringRef FuncName
) const {
433 return BBSPR
.getClusterInfoForFunction(FuncName
);
436 SmallVector
<SmallVector
<unsigned>>
437 BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction(
438 StringRef FuncName
) const {
439 return BBSPR
.getClonePathsForFunction(FuncName
);
442 BasicBlockSectionsProfileReader
&
443 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
447 ImmutablePass
*llvm::createBasicBlockSectionsProfileReaderWrapperPass(
448 const MemoryBuffer
*Buf
) {
449 return new BasicBlockSectionsProfileReaderWrapperPass(Buf
);