1 //===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This contains code to emit warnings for potentially incorrect usage of the
10 // llvm.expect intrinsic. This utility extracts the threshold values from
11 // metadata associated with the instrumented Branch or Switch instruction. The
12 // threshold values are then used to determine if a warning should be emmited.
14 // MisExpect's implementation relies on two assumptions about how branch weights
15 // are managed in LLVM.
17 // 1) Frontend profiling weights are always in place before llvm.expect is
18 // lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
19 // needs to extract the branch weights and then compare them to the weights
20 // being added by the llvm.expect intrinsic lowering.
22 // 2) Sampling and IR based profiles will *only* have branch weight metadata
23 // before profiling data is consulted if they are from a lowered llvm.expect
24 // intrinsic. These profiles thus always extract the expected weights and then
25 // compare them to the weights collected during profiling to determine if a
26 // diagnostic message is warranted.
28 //===----------------------------------------------------------------------===//
30 #include "llvm/Transforms/Utils/MisExpect.h"
31 #include "llvm/ADT/Twine.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/LLVMContext.h"
38 #include "llvm/IR/ProfDataUtils.h"
39 #include "llvm/Support/BranchProbability.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/FormatVariadic.h"
48 #define DEBUG_TYPE "misexpect"
51 using namespace misexpect
;
55 // Command line option to enable/disable the warning when profile data suggests
56 // a mismatch with the use of the llvm.expect intrinsic
57 static cl::opt
<bool> PGOWarnMisExpect(
58 "pgo-warn-misexpect", cl::init(false), cl::Hidden
,
59 cl::desc("Use this option to turn on/off "
60 "warnings about incorrect usage of llvm.expect intrinsics."));
62 static cl::opt
<uint32_t> MisExpectTolerance(
63 "misexpect-tolerance", cl::init(0),
64 cl::desc("Prevents emiting diagnostics when profile counts are "
65 "within N% of the threshold.."));
71 bool isMisExpectDiagEnabled(LLVMContext
&Ctx
) {
72 return PGOWarnMisExpect
|| Ctx
.getMisExpectWarningRequested();
75 uint32_t getMisExpectTolerance(LLVMContext
&Ctx
) {
76 return std::max(static_cast<uint32_t>(MisExpectTolerance
),
77 Ctx
.getDiagnosticsMisExpectTolerance());
80 Instruction
*getInstCondition(Instruction
*I
) {
81 assert(I
!= nullptr && "MisExpect target Instruction cannot be nullptr");
82 Instruction
*Ret
= nullptr;
83 if (auto *B
= dyn_cast
<BranchInst
>(I
)) {
84 Ret
= dyn_cast
<Instruction
>(B
->getCondition());
86 // TODO: Find a way to resolve condition location for switches
87 // Using the condition of the switch seems to often resolve to an earlier
88 // point in the program, i.e. the calculation of the switch condition, rather
89 // than the switch's location in the source code. Thus, we should use the
90 // instruction to get source code locations rather than the condition to
91 // improve diagnostic output, such as the caret. If the same problem exists
92 // for branch instructions, then we should remove this function and directly
93 // use the instruction
95 else if (auto *S
= dyn_cast
<SwitchInst
>(I
)) {
96 Ret
= dyn_cast
<Instruction
>(S
->getCondition());
101 void emitMisexpectDiagnostic(Instruction
*I
, LLVMContext
&Ctx
,
102 uint64_t ProfCount
, uint64_t TotalCount
) {
103 double PercentageCorrect
= (double)ProfCount
/ TotalCount
;
105 formatv("{0:P} ({1} / {2})", PercentageCorrect
, ProfCount
, TotalCount
);
106 auto RemStr
= formatv(
107 "Potential performance regression from use of the llvm.expect intrinsic: "
108 "Annotation was correct on {0} of profiled executions.",
110 Twine
Msg(PerString
);
111 Instruction
*Cond
= getInstCondition(I
);
112 if (isMisExpectDiagEnabled(Ctx
))
113 Ctx
.diagnose(DiagnosticInfoMisExpect(Cond
, Msg
));
114 OptimizationRemarkEmitter
ORE(I
->getParent()->getParent());
115 ORE
.emit(OptimizationRemark(DEBUG_TYPE
, "misexpect", Cond
) << RemStr
.str());
121 namespace misexpect
{
123 void verifyMisExpect(Instruction
&I
, ArrayRef
<uint32_t> RealWeights
,
124 ArrayRef
<uint32_t> ExpectedWeights
) {
125 // To determine if we emit a diagnostic, we need to compare the branch weights
126 // from the profile to those added by the llvm.expect intrinsic.
127 // So first, we extract the "likely" and "unlikely" weights from
128 // ExpectedWeights And determine the correct weight in the profile to compare
130 uint64_t LikelyBranchWeight
= 0,
131 UnlikelyBranchWeight
= std::numeric_limits
<uint32_t>::max();
133 for (size_t Idx
= 0, End
= ExpectedWeights
.size(); Idx
< End
; Idx
++) {
134 uint32_t V
= ExpectedWeights
[Idx
];
135 if (LikelyBranchWeight
< V
) {
136 LikelyBranchWeight
= V
;
139 if (UnlikelyBranchWeight
> V
) {
140 UnlikelyBranchWeight
= V
;
144 const uint64_t ProfiledWeight
= RealWeights
[MaxIndex
];
145 const uint64_t RealWeightsTotal
=
146 std::accumulate(RealWeights
.begin(), RealWeights
.end(), (uint64_t)0,
147 std::plus
<uint64_t>());
148 const uint64_t NumUnlikelyTargets
= RealWeights
.size() - 1;
150 uint64_t TotalBranchWeight
=
151 LikelyBranchWeight
+ (UnlikelyBranchWeight
* NumUnlikelyTargets
);
153 // FIXME: When we've addressed sample profiling, restore the assertion
155 // We cannot calculate branch probability if either of these invariants aren't
156 // met. However, MisExpect diagnostics should not prevent code from compiling,
157 // so we simply forgo emitting diagnostics here, and return early.
158 // assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0)
159 // && "TotalBranchWeight is less than the Likely branch weight");
160 if ((TotalBranchWeight
== 0) || (TotalBranchWeight
<= LikelyBranchWeight
))
163 // To determine our threshold value we need to obtain the branch probability
164 // for the weights added by llvm.expect and use that proportion to calculate
165 // our threshold based on the collected profile data.
166 auto LikelyProbablilty
= BranchProbability::getBranchProbability(
167 LikelyBranchWeight
, TotalBranchWeight
);
169 uint64_t ScaledThreshold
= LikelyProbablilty
.scale(RealWeightsTotal
);
171 // clamp tolerance range to [0, 100)
172 auto Tolerance
= getMisExpectTolerance(I
.getContext());
173 Tolerance
= std::clamp(Tolerance
, 0u, 99u);
175 // Allow users to relax checking by N% i.e., if they use a 5% tolerance,
176 // then we check against 0.95*ScaledThreshold
178 ScaledThreshold
*= (1.0 - Tolerance
/ 100.0);
180 // When the profile weight is below the threshold, we emit the diagnostic
181 if (ProfiledWeight
< ScaledThreshold
)
182 emitMisexpectDiagnostic(&I
, I
.getContext(), ProfiledWeight
,
186 void checkBackendInstrumentation(Instruction
&I
,
187 const ArrayRef
<uint32_t> RealWeights
) {
188 SmallVector
<uint32_t> ExpectedWeights
;
189 if (!extractBranchWeights(I
, ExpectedWeights
))
191 verifyMisExpect(I
, RealWeights
, ExpectedWeights
);
194 void checkFrontendInstrumentation(Instruction
&I
,
195 const ArrayRef
<uint32_t> ExpectedWeights
) {
196 SmallVector
<uint32_t> RealWeights
;
197 if (!extractBranchWeights(I
, RealWeights
))
199 verifyMisExpect(I
, RealWeights
, ExpectedWeights
);
202 void checkExpectAnnotations(Instruction
&I
,
203 const ArrayRef
<uint32_t> ExistingWeights
,
206 checkFrontendInstrumentation(I
, ExistingWeights
);
208 checkBackendInstrumentation(I
, ExistingWeights
);
212 } // namespace misexpect