llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp

   1 //===- PoisonChecking.cpp - -----------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Implements a transform pass which instruments IR such that poison semantics
  10 // are made explicit.  That is, it provides a (possibly partial) executable
  11 // semantics for every instruction w.r.t. poison as specified in the LLVM
  12 // LangRef.  There are obvious parallels to the sanitizer tools, but this pass
  13 // is focused purely on the semantics of LLVM IR, not any particular source
  14 // language.   If you're looking for something to see if your C/C++ contains
  15 // UB, this is not it.
  16 //
  17 // The rewritten semantics of each instruction will include the following
  18 // components:
  19 //
  20 // 1) The original instruction, unmodified.
  21 // 2) A propagation rule which translates dynamic information about the poison
  22 //    state of each input to whether the dynamic output of the instruction
  23 //    produces poison.
  24 // 3) A creation rule which validates any poison producing flags on the
  25 //    instruction itself (e.g. checks for overflow on nsw).
  26 // 4) A check rule which traps (to a handler function) if this instruction must
  27 //    execute undefined behavior given the poison state of it's inputs.
  28 //
  29 // This is a must analysis based transform; that is, the resulting code may
  30 // produce a false negative result (not report UB when actually exists
  31 // according to the LangRef spec), but should never produce a false positive
  32 // (report UB where it doesn't exist).
  33 //
  34 // Use cases for this pass include:
  35 // - Understanding (and testing!) the implications of the definition of poison
  36 //   from the LangRef.
  37 // - Validating the output of a IR fuzzer to ensure that all programs produced
  38 //   are well defined on the specific input used.
  39 // - Finding/confirming poison specific miscompiles by checking the poison
  40 //   status of an input/IR pair is the same before and after an optimization
  41 //   transform.
  42 // - Checking that a bugpoint reduction does not introduce UB which didn't
  43 //   exist in the original program being reduced.
  44 //
  45 // The major sources of inaccuracy are currently:
  46 // - Most validation rules not yet implemented for instructions with poison
  47 //   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
  48 // - UB which is control dependent on a branch on poison is not yet
  49 //   reported. Currently, only data flow dependence is modeled.
  50 // - Poison which is propagated through memory is not modeled.  As such,
  51 //   storing poison to memory and then reloading it will cause a false negative
  52 //   as we consider the reloaded value to not be poisoned.
  53 // - Poison propagation across function boundaries is not modeled.  At the
  54 //   moment, all arguments and return values are assumed not to be poison.
  55 // - Undef is not modeled.  In particular, the optimizer's freedom to pick
  56 //   concrete values for undef bits so as to maximize potential for producing
  57 //   poison is not modeled.
  58 //
  59 //===----------------------------------------------------------------------===//
  60
  61 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
  62 #include "llvm/ADT/DenseMap.h"
  63 #include "llvm/Analysis/ValueTracking.h"
  64 #include "llvm/IR/IRBuilder.h"
  65 #include "llvm/Support/CommandLine.h"
  66
  67 using namespace llvm;
  68
  69 #define DEBUG_TYPE "poison-checking"
  70
  71 static cl::opt<bool>
  72 LocalCheck("poison-checking-function-local",
  73            cl::init(false),
  74            cl::desc("Check that returns are non-poison (for testing)"));
  75
  76
  77 static bool isConstantFalse(Value* V) {
  78   assert(V->getType()->isIntegerTy(1));
  79   if (auto *CI = dyn_cast<ConstantInt>(V))
  80     return CI->isZero();
  81   return false;
  82 }
  83
  84 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
  85   if (Ops.size() == 0)
  86     return B.getFalse();
  87   unsigned i = 0;
  88   for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
  89   if (i == Ops.size())
  90     return B.getFalse();
  91   Value *Accum = Ops[i++];
  92   for (Value *Op : llvm::drop_begin(Ops, i))
  93     if (!isConstantFalse(Op))
  94       Accum = B.CreateOr(Accum, Op);
  95   return Accum;
  96 }
  97
  98 static void generateCreationChecksForBinOp(Instruction &I,
  99                                            SmallVectorImpl<Value*> &Checks) {
 100   assert(isa<BinaryOperator>(I));
 101
 102   IRBuilder<> B(&I);
 103   Value *LHS = I.getOperand(0);
 104   Value *RHS = I.getOperand(1);
 105   switch (I.getOpcode()) {
 106   default:
 107     return;
 108   case Instruction::Add: {
 109     if (I.hasNoSignedWrap()) {
 110       auto *OverflowOp =
 111         B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
 112       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 113     }
 114     if (I.hasNoUnsignedWrap()) {
 115       auto *OverflowOp =
 116         B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
 117       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 118     }
 119     break;
 120   }
 121   case Instruction::Sub: {
 122     if (I.hasNoSignedWrap()) {
 123       auto *OverflowOp =
 124         B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
 125       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 126     }
 127     if (I.hasNoUnsignedWrap()) {
 128       auto *OverflowOp =
 129         B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
 130       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 131     }
 132     break;
 133   }
 134   case Instruction::Mul: {
 135     if (I.hasNoSignedWrap()) {
 136       auto *OverflowOp =
 137         B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
 138       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 139     }
 140     if (I.hasNoUnsignedWrap()) {
 141       auto *OverflowOp =
 142         B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
 143       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 144     }
 145     break;
 146   }
 147   case Instruction::UDiv: {
 148     if (I.isExact()) {
 149       auto *Check =
 150         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
 151                      ConstantInt::get(LHS->getType(), 0));
 152       Checks.push_back(Check);
 153     }
 154     break;
 155   }
 156   case Instruction::SDiv: {
 157     if (I.isExact()) {
 158       auto *Check =
 159         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
 160                      ConstantInt::get(LHS->getType(), 0));
 161       Checks.push_back(Check);
 162     }
 163     break;
 164   }
 165   case Instruction::AShr:
 166   case Instruction::LShr:
 167   case Instruction::Shl: {
 168     Value *ShiftCheck =
 169       B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
 170                    ConstantInt::get(RHS->getType(),
 171                                     LHS->getType()->getScalarSizeInBits()));
 172     Checks.push_back(ShiftCheck);
 173     break;
 174   }
 175   };
 176 }
 177
 178 /// Given an instruction which can produce poison on non-poison inputs
 179 /// (i.e. canCreatePoison returns true), generate runtime checks to produce
 180 /// boolean indicators of when poison would result.
 181 static void generateCreationChecks(Instruction &I,
 182                                    SmallVectorImpl<Value*> &Checks) {
 183   IRBuilder<> B(&I);
 184   if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
 185     generateCreationChecksForBinOp(I, Checks);
 186
 187   // Handle non-binops separately
 188   switch (I.getOpcode()) {
 189   default:
 190     // Note there are a couple of missing cases here, once implemented, this
 191     // should become an llvm_unreachable.
 192     break;
 193   case Instruction::ExtractElement: {
 194     Value *Vec = I.getOperand(0);
 195     auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
 196     if (!VecVTy)
 197       break;
 198     Value *Idx = I.getOperand(1);
 199     unsigned NumElts = VecVTy->getNumElements();
 200     Value *Check =
 201       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
 202                    ConstantInt::get(Idx->getType(), NumElts));
 203     Checks.push_back(Check);
 204     break;
 205   }
 206   case Instruction::InsertElement: {
 207     Value *Vec = I.getOperand(0);
 208     auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
 209     if (!VecVTy)
 210       break;
 211     Value *Idx = I.getOperand(2);
 212     unsigned NumElts = VecVTy->getNumElements();
 213     Value *Check =
 214       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
 215                    ConstantInt::get(Idx->getType(), NumElts));
 216     Checks.push_back(Check);
 217     break;
 218   }
 219   };
 220 }
 221
 222 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
 223   auto Itr = ValToPoison.find(V);
 224   if (Itr != ValToPoison.end())
 225     return Itr->second;
 226   if (isa<Constant>(V)) {
 227     return ConstantInt::getFalse(V->getContext());
 228   }
 229   // Return false for unknwon values - this implements a non-strict mode where
 230   // unhandled IR constructs are simply considered to never produce poison.  At
 231   // some point in the future, we probably want a "strict mode" for testing if
 232   // nothing else.
 233   return ConstantInt::getFalse(V->getContext());
 234 }
 235
 236 static void CreateAssert(IRBuilder<> &B, Value *Cond) {
 237   assert(Cond->getType()->isIntegerTy(1));
 238   if (auto *CI = dyn_cast<ConstantInt>(Cond))
 239     if (CI->isAllOnesValue())
 240       return;
 241
 242   Module *M = B.GetInsertBlock()->getModule();
 243   M->getOrInsertFunction("__poison_checker_assert",
 244                          Type::getVoidTy(M->getContext()),
 245                          Type::getInt1Ty(M->getContext()));
 246   Function *TrapFunc = M->getFunction("__poison_checker_assert");
 247   B.CreateCall(TrapFunc, Cond);
 248 }
 249
 250 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
 251   assert(Cond->getType()->isIntegerTy(1));
 252   CreateAssert(B, B.CreateNot(Cond));
 253 }
 254
 255 static bool rewrite(Function &F) {
 256   auto * const Int1Ty = Type::getInt1Ty(F.getContext());
 257
 258   DenseMap<Value *, Value *> ValToPoison;
 259
 260   for (BasicBlock &BB : F)
 261     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
 262       auto *OldPHI = cast<PHINode>(&*I);
 263       auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
 264       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
 265         NewPHI->addIncoming(UndefValue::get(Int1Ty),
 266                             OldPHI->getIncomingBlock(i));
 267       NewPHI->insertBefore(OldPHI);
 268       ValToPoison[OldPHI] = NewPHI;
 269     }
 270
 271   for (BasicBlock &BB : F)
 272     for (Instruction &I : BB) {
 273       if (isa<PHINode>(I)) continue;
 274
 275       IRBuilder<> B(cast<Instruction>(&I));
 276
 277       // Note: There are many more sources of documented UB, but this pass only
 278       // attempts to find UB triggered by propagation of poison.
 279       SmallVector<const Value *, 4> NonPoisonOps;
 280       SmallPtrSet<const Value *, 4> SeenNonPoisonOps;
 281       getGuaranteedNonPoisonOps(&I, NonPoisonOps);
 282       for (const Value *Op : NonPoisonOps)
 283         if (SeenNonPoisonOps.insert(Op).second)
 284           CreateAssertNot(B,
 285                           getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
 286
 287       if (LocalCheck)
 288         if (auto *RI = dyn_cast<ReturnInst>(&I))
 289           if (RI->getNumOperands() != 0) {
 290             Value *Op = RI->getOperand(0);
 291             CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
 292           }
 293
 294       SmallVector<Value*, 4> Checks;
 295       for (const Use &U : I.operands()) {
 296         if (ValToPoison.count(U) && propagatesPoison(U))
 297           Checks.push_back(getPoisonFor(ValToPoison, U));
 298       }
 299
 300       if (canCreatePoison(cast<Operator>(&I)))
 301         generateCreationChecks(I, Checks);
 302       ValToPoison[&I] = buildOrChain(B, Checks);
 303     }
 304
 305   for (BasicBlock &BB : F)
 306     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
 307       auto *OldPHI = cast<PHINode>(&*I);
 308       if (!ValToPoison.count(OldPHI))
 309         continue; // skip the newly inserted phis
 310       auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
 311       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
 312         auto *OldVal = OldPHI->getIncomingValue(i);
 313         NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
 314       }
 315     }
 316   return true;
 317 }
 318
 319
 320 PreservedAnalyses PoisonCheckingPass::run(Module &M,
 321                                           ModuleAnalysisManager &AM) {
 322   bool Changed = false;
 323   for (auto &F : M)
 324     Changed |= rewrite(F);
 325
 326   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
 327 }
 328
 329 PreservedAnalyses PoisonCheckingPass::run(Function &F,
 330                                           FunctionAnalysisManager &AM) {
 331   return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
 332 }
 333
 334 /* Major TODO Items:
 335    - Control dependent poison UB
 336    - Strict mode - (i.e. must analyze every operand)
 337      - Poison through memory
 338      - Function ABIs
 339      - Full coverage of intrinsics, etc.. (ouch)
 340
 341    Instructions w/Unclear Semantics:
 342    - shufflevector - It would seem reasonable for an out of bounds mask element
 343      to produce poison, but the LangRef does not state.
 344    - all binary ops w/vector operands - The likely interpretation would be that
 345      any element overflowing should produce poison for the entire result, but
 346      the LangRef does not state.
 347    - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
 348      strange that only certian flags should be documented as producing poison.
 349
 350    Cases of clear poison semantics not yet implemented:
 351    - Exact flags on ashr/lshr produce poison
 352    - NSW/NUW flags on shl produce poison
 353    - Inbounds flag on getelementptr produce poison
 354    - fptosi/fptoui (out of bounds input) produce poison
 355    - Scalable vector types for insertelement/extractelement
 356    - Floating point binary ops w/fmf nnan/noinfs flags produce poison
 357  */