lib/Transforms/Instrumentation/PoisonChecking.cpp

   1 //===- PoisonChecking.cpp - -----------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Implements a transform pass which instruments IR such that poison semantics
  10 // are made explicit.  That is, it provides a (possibly partial) executable
  11 // semantics for every instruction w.r.t. poison as specified in the LLVM
  12 // LangRef.  There are obvious parallels to the sanitizer tools, but this pass
  13 // is focused purely on the semantics of LLVM IR, not any particular source
  14 // language.   If you're looking for something to see if your C/C++ contains
  15 // UB, this is not it.
  16 //
  17 // The rewritten semantics of each instruction will include the following
  18 // components:
  19 //
  20 // 1) The original instruction, unmodified.
  21 // 2) A propagation rule which translates dynamic information about the poison
  22 //    state of each input to whether the dynamic output of the instruction
  23 //    produces poison.
  24 // 3) A flag validation rule which validates any poison producing flags on the
  25 //    instruction itself (e.g. checks for overflow on nsw).
  26 // 4) A check rule which traps (to a handler function) if this instruction must
  27 //    execute undefined behavior given the poison state of it's inputs.
  28 //
  29 // At the moment, the UB detection is done in a best effort manner; that is,
  30 // the resulting code may produce a false negative result (not report UB when
  31 // it actually exists according to the LangRef spec), but should never produce
  32 // a false positive (report UB where it doesn't exist).  The intention is to
  33 // eventually support a "strict" mode which never dynamically reports a false
  34 // negative at the cost of rejecting some valid inputs to translation.
  35 //
  36 // Use cases for this pass include:
  37 // - Understanding (and testing!) the implications of the definition of poison
  38 //   from the LangRef.
  39 // - Validating the output of a IR fuzzer to ensure that all programs produced
  40 //   are well defined on the specific input used.
  41 // - Finding/confirming poison specific miscompiles by checking the poison
  42 //   status of an input/IR pair is the same before and after an optimization
  43 //   transform.
  44 // - Checking that a bugpoint reduction does not introduce UB which didn't
  45 //   exist in the original program being reduced.
  46 //
  47 // The major sources of inaccuracy are currently:
  48 // - Most validation rules not yet implemented for instructions with poison
  49 //   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
  50 // - UB which is control dependent on a branch on poison is not yet
  51 //   reported. Currently, only data flow dependence is modeled.
  52 // - Poison which is propagated through memory is not modeled.  As such,
  53 //   storing poison to memory and then reloading it will cause a false negative
  54 //   as we consider the reloaded value to not be poisoned.
  55 // - Poison propagation across function boundaries is not modeled.  At the
  56 //   moment, all arguments and return values are assumed not to be poison.
  57 // - Undef is not modeled.  In particular, the optimizer's freedom to pick
  58 //   concrete values for undef bits so as to maximize potential for producing
  59 //   poison is not modeled.
  60 //
  61 //===----------------------------------------------------------------------===//
  62
  63 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
  64 #include "llvm/ADT/DenseMap.h"
  65 #include "llvm/ADT/Statistic.h"
  66 #include "llvm/Analysis/MemoryBuiltins.h"
  67 #include "llvm/Analysis/ValueTracking.h"
  68 #include "llvm/IR/InstVisitor.h"
  69 #include "llvm/IR/IntrinsicInst.h"
  70 #include "llvm/IR/IRBuilder.h"
  71 #include "llvm/IR/PatternMatch.h"
  72 #include "llvm/Support/Debug.h"
  73
  74 using namespace llvm;
  75
  76 #define DEBUG_TYPE "poison-checking"
  77
  78 static cl::opt<bool>
  79 LocalCheck("poison-checking-function-local",
  80            cl::init(false),
  81            cl::desc("Check that returns are non-poison (for testing)"));
  82
  83
  84 static bool isConstantFalse(Value* V) {
  85   assert(V->getType()->isIntegerTy(1));
  86   if (auto *CI = dyn_cast<ConstantInt>(V))
  87     return CI->isZero();
  88   return false;
  89 }
  90
  91 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
  92   if (Ops.size() == 0)
  93     return B.getFalse();
  94   unsigned i = 0;
  95   for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
  96   if (i == Ops.size())
  97     return B.getFalse();
  98   Value *Accum = Ops[i++];
  99   for (; i < Ops.size(); i++)
 100     if (!isConstantFalse(Ops[i]))
 101       Accum = B.CreateOr(Accum, Ops[i]);
 102   return Accum;
 103 }
 104
 105 static void generatePoisonChecksForBinOp(Instruction &I,
 106                                          SmallVector<Value*, 2> &Checks) {
 107   assert(isa<BinaryOperator>(I));
 108
 109   IRBuilder<> B(&I);
 110   Value *LHS = I.getOperand(0);
 111   Value *RHS = I.getOperand(1);
 112   switch (I.getOpcode()) {
 113   default:
 114     return;
 115   case Instruction::Add: {
 116     if (I.hasNoSignedWrap()) {
 117       auto *OverflowOp =
 118         B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
 119       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 120     }
 121     if (I.hasNoUnsignedWrap()) {
 122       auto *OverflowOp =
 123         B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
 124       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 125     }
 126     break;
 127   }
 128   case Instruction::Sub: {
 129     if (I.hasNoSignedWrap()) {
 130       auto *OverflowOp =
 131         B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
 132       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 133     }
 134     if (I.hasNoUnsignedWrap()) {
 135       auto *OverflowOp =
 136         B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
 137       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 138     }
 139     break;
 140   }
 141   case Instruction::Mul: {
 142     if (I.hasNoSignedWrap()) {
 143       auto *OverflowOp =
 144         B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
 145       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 146     }
 147     if (I.hasNoUnsignedWrap()) {
 148       auto *OverflowOp =
 149         B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
 150       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
 151     }
 152     break;
 153   }
 154   case Instruction::UDiv: {
 155     if (I.isExact()) {
 156       auto *Check =
 157         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
 158                      ConstantInt::get(LHS->getType(), 0));
 159       Checks.push_back(Check);
 160     }
 161     break;
 162   }
 163   case Instruction::SDiv: {
 164     if (I.isExact()) {
 165       auto *Check =
 166         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
 167                      ConstantInt::get(LHS->getType(), 0));
 168       Checks.push_back(Check);
 169     }
 170     break;
 171   }
 172   case Instruction::AShr:
 173   case Instruction::LShr:
 174   case Instruction::Shl: {
 175     Value *ShiftCheck =
 176       B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
 177                    ConstantInt::get(RHS->getType(),
 178                                     LHS->getType()->getScalarSizeInBits()));
 179     Checks.push_back(ShiftCheck);
 180     break;
 181   }
 182   };
 183 }
 184
 185 static Value* generatePoisonChecks(Instruction &I) {
 186   IRBuilder<> B(&I);
 187   SmallVector<Value*, 2> Checks;
 188   if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
 189     generatePoisonChecksForBinOp(I, Checks);
 190
 191   // Handle non-binops seperately
 192   switch (I.getOpcode()) {
 193   default:
 194     break;
 195   case Instruction::ExtractElement: {
 196     Value *Vec = I.getOperand(0);
 197     if (Vec->getType()->getVectorIsScalable())
 198       break;
 199     Value *Idx = I.getOperand(1);
 200     unsigned NumElts = Vec->getType()->getVectorNumElements();
 201     Value *Check =
 202       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
 203                    ConstantInt::get(Idx->getType(), NumElts));
 204     Checks.push_back(Check);
 205     break;
 206   }
 207   case Instruction::InsertElement: {
 208     Value *Vec = I.getOperand(0);
 209     if (Vec->getType()->getVectorIsScalable())
 210       break;
 211     Value *Idx = I.getOperand(2);
 212     unsigned NumElts = Vec->getType()->getVectorNumElements();
 213     Value *Check =
 214       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
 215                    ConstantInt::get(Idx->getType(), NumElts));
 216     Checks.push_back(Check);
 217     break;
 218   }
 219   };
 220   return buildOrChain(B, Checks);
 221 }
 222
 223 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
 224   auto Itr = ValToPoison.find(V);
 225   if (Itr != ValToPoison.end())
 226     return Itr->second;
 227   if (isa<Constant>(V)) {
 228     return ConstantInt::getFalse(V->getContext());
 229   }
 230   // Return false for unknwon values - this implements a non-strict mode where
 231   // unhandled IR constructs are simply considered to never produce poison.  At
 232   // some point in the future, we probably want a "strict mode" for testing if
 233   // nothing else.
 234   return ConstantInt::getFalse(V->getContext());
 235 }
 236
 237 static void CreateAssert(IRBuilder<> &B, Value *Cond) {
 238   assert(Cond->getType()->isIntegerTy(1));
 239   if (auto *CI = dyn_cast<ConstantInt>(Cond))
 240     if (CI->isAllOnesValue())
 241       return;
 242
 243   Module *M = B.GetInsertBlock()->getModule();
 244   M->getOrInsertFunction("__poison_checker_assert",
 245                          Type::getVoidTy(M->getContext()),
 246                          Type::getInt1Ty(M->getContext()));
 247   Function *TrapFunc = M->getFunction("__poison_checker_assert");
 248   B.CreateCall(TrapFunc, Cond);
 249 }
 250
 251 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
 252   assert(Cond->getType()->isIntegerTy(1));
 253   CreateAssert(B, B.CreateNot(Cond));
 254 }
 255
 256 static bool rewrite(Function &F) {
 257   auto * const Int1Ty = Type::getInt1Ty(F.getContext());
 258
 259   DenseMap<Value *, Value *> ValToPoison;
 260
 261   for (BasicBlock &BB : F)
 262     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
 263       auto *OldPHI = cast<PHINode>(&*I);
 264       auto *NewPHI = PHINode::Create(Int1Ty,
 265                                      OldPHI->getNumIncomingValues());
 266       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
 267         NewPHI->addIncoming(UndefValue::get(Int1Ty),
 268                             OldPHI->getIncomingBlock(i));
 269       NewPHI->insertBefore(OldPHI);
 270       ValToPoison[OldPHI] = NewPHI;
 271     }
 272
 273   for (BasicBlock &BB : F)
 274     for (Instruction &I : BB) {
 275       if (isa<PHINode>(I)) continue;
 276
 277       IRBuilder<> B(cast<Instruction>(&I));
 278
 279       // Note: There are many more sources of documented UB, but this pass only
 280       // attempts to find UB triggered by propagation of poison.
 281       if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
 282         CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
 283
 284       if (LocalCheck)
 285         if (auto *RI = dyn_cast<ReturnInst>(&I))
 286           if (RI->getNumOperands() != 0) {
 287             Value *Op = RI->getOperand(0);
 288             CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
 289           }
 290
 291       SmallVector<Value*, 4> Checks;
 292       if (propagatesFullPoison(&I))
 293         for (Value *V : I.operands())
 294           Checks.push_back(getPoisonFor(ValToPoison, V));
 295
 296       if (auto *Check = generatePoisonChecks(I))
 297         Checks.push_back(Check);
 298       ValToPoison[&I] = buildOrChain(B, Checks);
 299     }
 300
 301   for (BasicBlock &BB : F)
 302     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
 303       auto *OldPHI = cast<PHINode>(&*I);
 304       if (!ValToPoison.count(OldPHI))
 305         continue; // skip the newly inserted phis
 306       auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
 307       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
 308         auto *OldVal = OldPHI->getIncomingValue(i);
 309         NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
 310       }
 311     }
 312   return true;
 313 }
 314
 315
 316 PreservedAnalyses PoisonCheckingPass::run(Module &M,
 317                                           ModuleAnalysisManager &AM) {
 318   bool Changed = false;
 319   for (auto &F : M)
 320     Changed |= rewrite(F);
 321
 322   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
 323 }
 324
 325 PreservedAnalyses PoisonCheckingPass::run(Function &F,
 326                                           FunctionAnalysisManager &AM) {
 327   return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
 328 }
 329
 330
 331 /* Major TODO Items:
 332    - Control dependent poison UB
 333    - Strict mode - (i.e. must analyze every operand)
 334      - Poison through memory
 335      - Function ABIs
 336      - Full coverage of intrinsics, etc.. (ouch)
 337
 338    Instructions w/Unclear Semantics:
 339    - shufflevector - It would seem reasonable for an out of bounds mask element
 340      to produce poison, but the LangRef does not state.
 341    - and/or - It would seem reasonable for poison to propagate from both
 342      arguments, but LangRef doesn't state and propagatesFullPoison doesn't
 343      include these two.
 344    - all binary ops w/vector operands - The likely interpretation would be that
 345      any element overflowing should produce poison for the entire result, but
 346      the LangRef does not state.
 347    - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
 348      strange that only certian flags should be documented as producing poison.
 349
 350    Cases of clear poison semantics not yet implemented:
 351    - Exact flags on ashr/lshr produce poison
 352    - NSW/NUW flags on shl produce poison
 353    - Inbounds flag on getelementptr produce poison
 354    - fptosi/fptoui (out of bounds input) produce poison
 355    - Scalable vector types for insertelement/extractelement
 356    - Floating point binary ops w/fmf nnan/noinfs flags produce poison
 357  */