1 //===- PoisonChecking.cpp - -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit. That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language. If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
17 // The rewritten semantics of each instruction will include the following
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 // state of each input to whether the dynamic output of the instruction
24 // 3) A creation rule which validates any poison producing flags on the
25 // instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 // execute undefined behavior given the poison state of it's inputs.
29 // This is a must analysis based transform; that is, the resulting code may
30 // produce a false negative result (not report UB when actually exists
31 // according to the LangRef spec), but should never produce a false positive
32 // (report UB where it doesn't exist).
34 // Use cases for this pass include:
35 // - Understanding (and testing!) the implications of the definition of poison
37 // - Validating the output of a IR fuzzer to ensure that all programs produced
38 // are well defined on the specific input used.
39 // - Finding/confirming poison specific miscompiles by checking the poison
40 // status of an input/IR pair is the same before and after an optimization
42 // - Checking that a bugpoint reduction does not introduce UB which didn't
43 // exist in the original program being reduced.
45 // The major sources of inaccuracy are currently:
46 // - Most validation rules not yet implemented for instructions with poison
47 // relavant flags. At the moment, only nsw/nuw on add/sub are supported.
48 // - UB which is control dependent on a branch on poison is not yet
49 // reported. Currently, only data flow dependence is modeled.
50 // - Poison which is propagated through memory is not modeled. As such,
51 // storing poison to memory and then reloading it will cause a false negative
52 // as we consider the reloaded value to not be poisoned.
53 // - Poison propagation across function boundaries is not modeled. At the
54 // moment, all arguments and return values are assumed not to be poison.
55 // - Undef is not modeled. In particular, the optimizer's freedom to pick
56 // concrete values for undef bits so as to maximize potential for producing
57 // poison is not modeled.
59 //===----------------------------------------------------------------------===//
61 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
62 #include "llvm/ADT/DenseMap.h"
63 #include "llvm/Analysis/ValueTracking.h"
64 #include "llvm/IR/IRBuilder.h"
65 #include "llvm/Support/CommandLine.h"
69 #define DEBUG_TYPE "poison-checking"
72 LocalCheck("poison-checking-function-local",
74 cl::desc("Check that returns are non-poison (for testing)"));
77 static bool isConstantFalse(Value
* V
) {
78 assert(V
->getType()->isIntegerTy(1));
79 if (auto *CI
= dyn_cast
<ConstantInt
>(V
))
84 static Value
*buildOrChain(IRBuilder
<> &B
, ArrayRef
<Value
*> Ops
) {
88 for (; i
< Ops
.size() && isConstantFalse(Ops
[i
]); i
++) {}
91 Value
*Accum
= Ops
[i
++];
92 for (Value
*Op
: llvm::drop_begin(Ops
, i
))
93 if (!isConstantFalse(Op
))
94 Accum
= B
.CreateOr(Accum
, Op
);
98 static void generateCreationChecksForBinOp(Instruction
&I
,
99 SmallVectorImpl
<Value
*> &Checks
) {
100 assert(isa
<BinaryOperator
>(I
));
103 Value
*LHS
= I
.getOperand(0);
104 Value
*RHS
= I
.getOperand(1);
105 switch (I
.getOpcode()) {
108 case Instruction::Add
: {
109 if (I
.hasNoSignedWrap()) {
111 B
.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow
, LHS
, RHS
);
112 Checks
.push_back(B
.CreateExtractValue(OverflowOp
, 1));
114 if (I
.hasNoUnsignedWrap()) {
116 B
.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow
, LHS
, RHS
);
117 Checks
.push_back(B
.CreateExtractValue(OverflowOp
, 1));
121 case Instruction::Sub
: {
122 if (I
.hasNoSignedWrap()) {
124 B
.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow
, LHS
, RHS
);
125 Checks
.push_back(B
.CreateExtractValue(OverflowOp
, 1));
127 if (I
.hasNoUnsignedWrap()) {
129 B
.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow
, LHS
, RHS
);
130 Checks
.push_back(B
.CreateExtractValue(OverflowOp
, 1));
134 case Instruction::Mul
: {
135 if (I
.hasNoSignedWrap()) {
137 B
.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow
, LHS
, RHS
);
138 Checks
.push_back(B
.CreateExtractValue(OverflowOp
, 1));
140 if (I
.hasNoUnsignedWrap()) {
142 B
.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow
, LHS
, RHS
);
143 Checks
.push_back(B
.CreateExtractValue(OverflowOp
, 1));
147 case Instruction::UDiv
: {
150 B
.CreateICmp(ICmpInst::ICMP_NE
, B
.CreateURem(LHS
, RHS
),
151 ConstantInt::get(LHS
->getType(), 0));
152 Checks
.push_back(Check
);
156 case Instruction::SDiv
: {
159 B
.CreateICmp(ICmpInst::ICMP_NE
, B
.CreateSRem(LHS
, RHS
),
160 ConstantInt::get(LHS
->getType(), 0));
161 Checks
.push_back(Check
);
165 case Instruction::AShr
:
166 case Instruction::LShr
:
167 case Instruction::Shl
: {
169 B
.CreateICmp(ICmpInst::ICMP_UGE
, RHS
,
170 ConstantInt::get(RHS
->getType(),
171 LHS
->getType()->getScalarSizeInBits()));
172 Checks
.push_back(ShiftCheck
);
178 /// Given an instruction which can produce poison on non-poison inputs
179 /// (i.e. canCreatePoison returns true), generate runtime checks to produce
180 /// boolean indicators of when poison would result.
181 static void generateCreationChecks(Instruction
&I
,
182 SmallVectorImpl
<Value
*> &Checks
) {
184 if (isa
<BinaryOperator
>(I
) && !I
.getType()->isVectorTy())
185 generateCreationChecksForBinOp(I
, Checks
);
187 // Handle non-binops separately
188 switch (I
.getOpcode()) {
190 // Note there are a couple of missing cases here, once implemented, this
191 // should become an llvm_unreachable.
193 case Instruction::ExtractElement
: {
194 Value
*Vec
= I
.getOperand(0);
195 auto *VecVTy
= dyn_cast
<FixedVectorType
>(Vec
->getType());
198 Value
*Idx
= I
.getOperand(1);
199 unsigned NumElts
= VecVTy
->getNumElements();
201 B
.CreateICmp(ICmpInst::ICMP_UGE
, Idx
,
202 ConstantInt::get(Idx
->getType(), NumElts
));
203 Checks
.push_back(Check
);
206 case Instruction::InsertElement
: {
207 Value
*Vec
= I
.getOperand(0);
208 auto *VecVTy
= dyn_cast
<FixedVectorType
>(Vec
->getType());
211 Value
*Idx
= I
.getOperand(2);
212 unsigned NumElts
= VecVTy
->getNumElements();
214 B
.CreateICmp(ICmpInst::ICMP_UGE
, Idx
,
215 ConstantInt::get(Idx
->getType(), NumElts
));
216 Checks
.push_back(Check
);
222 static Value
*getPoisonFor(DenseMap
<Value
*, Value
*> &ValToPoison
, Value
*V
) {
223 auto Itr
= ValToPoison
.find(V
);
224 if (Itr
!= ValToPoison
.end())
226 if (isa
<Constant
>(V
)) {
227 return ConstantInt::getFalse(V
->getContext());
229 // Return false for unknwon values - this implements a non-strict mode where
230 // unhandled IR constructs are simply considered to never produce poison. At
231 // some point in the future, we probably want a "strict mode" for testing if
233 return ConstantInt::getFalse(V
->getContext());
236 static void CreateAssert(IRBuilder
<> &B
, Value
*Cond
) {
237 assert(Cond
->getType()->isIntegerTy(1));
238 if (auto *CI
= dyn_cast
<ConstantInt
>(Cond
))
239 if (CI
->isAllOnesValue())
242 Module
*M
= B
.GetInsertBlock()->getModule();
243 M
->getOrInsertFunction("__poison_checker_assert",
244 Type::getVoidTy(M
->getContext()),
245 Type::getInt1Ty(M
->getContext()));
246 Function
*TrapFunc
= M
->getFunction("__poison_checker_assert");
247 B
.CreateCall(TrapFunc
, Cond
);
250 static void CreateAssertNot(IRBuilder
<> &B
, Value
*Cond
) {
251 assert(Cond
->getType()->isIntegerTy(1));
252 CreateAssert(B
, B
.CreateNot(Cond
));
255 static bool rewrite(Function
&F
) {
256 auto * const Int1Ty
= Type::getInt1Ty(F
.getContext());
258 DenseMap
<Value
*, Value
*> ValToPoison
;
260 for (BasicBlock
&BB
: F
)
261 for (auto I
= BB
.begin(); isa
<PHINode
>(&*I
); I
++) {
262 auto *OldPHI
= cast
<PHINode
>(&*I
);
263 auto *NewPHI
= PHINode::Create(Int1Ty
, OldPHI
->getNumIncomingValues());
264 for (unsigned i
= 0; i
< OldPHI
->getNumIncomingValues(); i
++)
265 NewPHI
->addIncoming(UndefValue::get(Int1Ty
),
266 OldPHI
->getIncomingBlock(i
));
267 NewPHI
->insertBefore(OldPHI
);
268 ValToPoison
[OldPHI
] = NewPHI
;
271 for (BasicBlock
&BB
: F
)
272 for (Instruction
&I
: BB
) {
273 if (isa
<PHINode
>(I
)) continue;
275 IRBuilder
<> B(cast
<Instruction
>(&I
));
277 // Note: There are many more sources of documented UB, but this pass only
278 // attempts to find UB triggered by propagation of poison.
279 SmallVector
<const Value
*, 4> NonPoisonOps
;
280 SmallPtrSet
<const Value
*, 4> SeenNonPoisonOps
;
281 getGuaranteedNonPoisonOps(&I
, NonPoisonOps
);
282 for (const Value
*Op
: NonPoisonOps
)
283 if (SeenNonPoisonOps
.insert(Op
).second
)
285 getPoisonFor(ValToPoison
, const_cast<Value
*>(Op
)));
288 if (auto *RI
= dyn_cast
<ReturnInst
>(&I
))
289 if (RI
->getNumOperands() != 0) {
290 Value
*Op
= RI
->getOperand(0);
291 CreateAssertNot(B
, getPoisonFor(ValToPoison
, Op
));
294 SmallVector
<Value
*, 4> Checks
;
295 for (const Use
&U
: I
.operands()) {
296 if (ValToPoison
.count(U
) && propagatesPoison(U
))
297 Checks
.push_back(getPoisonFor(ValToPoison
, U
));
300 if (canCreatePoison(cast
<Operator
>(&I
)))
301 generateCreationChecks(I
, Checks
);
302 ValToPoison
[&I
] = buildOrChain(B
, Checks
);
305 for (BasicBlock
&BB
: F
)
306 for (auto I
= BB
.begin(); isa
<PHINode
>(&*I
); I
++) {
307 auto *OldPHI
= cast
<PHINode
>(&*I
);
308 if (!ValToPoison
.count(OldPHI
))
309 continue; // skip the newly inserted phis
310 auto *NewPHI
= cast
<PHINode
>(ValToPoison
[OldPHI
]);
311 for (unsigned i
= 0; i
< OldPHI
->getNumIncomingValues(); i
++) {
312 auto *OldVal
= OldPHI
->getIncomingValue(i
);
313 NewPHI
->setIncomingValue(i
, getPoisonFor(ValToPoison
, OldVal
));
320 PreservedAnalyses
PoisonCheckingPass::run(Module
&M
,
321 ModuleAnalysisManager
&AM
) {
322 bool Changed
= false;
324 Changed
|= rewrite(F
);
326 return Changed
? PreservedAnalyses::none() : PreservedAnalyses::all();
329 PreservedAnalyses
PoisonCheckingPass::run(Function
&F
,
330 FunctionAnalysisManager
&AM
) {
331 return rewrite(F
) ? PreservedAnalyses::none() : PreservedAnalyses::all();
335 - Control dependent poison UB
336 - Strict mode - (i.e. must analyze every operand)
337 - Poison through memory
339 - Full coverage of intrinsics, etc.. (ouch)
341 Instructions w/Unclear Semantics:
342 - shufflevector - It would seem reasonable for an out of bounds mask element
343 to produce poison, but the LangRef does not state.
344 - all binary ops w/vector operands - The likely interpretation would be that
345 any element overflowing should produce poison for the entire result, but
346 the LangRef does not state.
347 - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
348 strange that only certian flags should be documented as producing poison.
350 Cases of clear poison semantics not yet implemented:
351 - Exact flags on ashr/lshr produce poison
352 - NSW/NUW flags on shl produce poison
353 - Inbounds flag on getelementptr produce poison
354 - fptosi/fptoui (out of bounds input) produce poison
355 - Scalable vector types for insertelement/extractelement
356 - Floating point binary ops w/fmf nnan/noinfs flags produce poison