llvm/lib/FuzzMutate/Operations.cpp

   1 //===-- Operations.cpp ----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "llvm/FuzzMutate/Operations.h"
  10 #include "llvm/IR/BasicBlock.h"
  11 #include "llvm/IR/Constants.h"
  12 #include "llvm/IR/Function.h"
  13 #include "llvm/IR/Instructions.h"
  14
  15 using namespace llvm;
  16 using namespace fuzzerop;
  17
  18 void llvm::describeFuzzerIntOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
  19   Ops.push_back(binOpDescriptor(1, Instruction::Add));
  20   Ops.push_back(binOpDescriptor(1, Instruction::Sub));
  21   Ops.push_back(binOpDescriptor(1, Instruction::Mul));
  22   Ops.push_back(binOpDescriptor(1, Instruction::SDiv));
  23   Ops.push_back(binOpDescriptor(1, Instruction::UDiv));
  24   Ops.push_back(binOpDescriptor(1, Instruction::SRem));
  25   Ops.push_back(binOpDescriptor(1, Instruction::URem));
  26   Ops.push_back(binOpDescriptor(1, Instruction::Shl));
  27   Ops.push_back(binOpDescriptor(1, Instruction::LShr));
  28   Ops.push_back(binOpDescriptor(1, Instruction::AShr));
  29   Ops.push_back(binOpDescriptor(1, Instruction::And));
  30   Ops.push_back(binOpDescriptor(1, Instruction::Or));
  31   Ops.push_back(binOpDescriptor(1, Instruction::Xor));
  32
  33   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_EQ));
  34   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_NE));
  35   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_UGT));
  36   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_UGE));
  37   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_ULT));
  38   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_ULE));
  39   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SGT));
  40   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SGE));
  41   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SLT));
  42   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SLE));
  43 }
  44
  45 void llvm::describeFuzzerFloatOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
  46   Ops.push_back(binOpDescriptor(1, Instruction::FAdd));
  47   Ops.push_back(binOpDescriptor(1, Instruction::FSub));
  48   Ops.push_back(binOpDescriptor(1, Instruction::FMul));
  49   Ops.push_back(binOpDescriptor(1, Instruction::FDiv));
  50   Ops.push_back(binOpDescriptor(1, Instruction::FRem));
  51
  52   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_FALSE));
  53   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OEQ));
  54   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OGT));
  55   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OGE));
  56   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OLT));
  57   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OLE));
  58   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ONE));
  59   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ORD));
  60   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UNO));
  61   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UEQ));
  62   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UGT));
  63   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UGE));
  64   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ULT));
  65   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ULE));
  66   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UNE));
  67   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_TRUE));
  68 }
  69
  70 void llvm::describeFuzzerUnaryOperations(
  71     std::vector<fuzzerop::OpDescriptor> &Ops) {
  72   Ops.push_back(fnegDescriptor(1));
  73 }
  74
  75 void llvm::describeFuzzerControlFlowOps(
  76     std::vector<fuzzerop::OpDescriptor> &Ops) {
  77   Ops.push_back(splitBlockDescriptor(1));
  78 }
  79
  80 void llvm::describeFuzzerOtherOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
  81   Ops.push_back(selectDescriptor(1));
  82 }
  83
  84 void llvm::describeFuzzerPointerOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
  85   Ops.push_back(gepDescriptor(1));
  86 }
  87
  88 void llvm::describeFuzzerAggregateOps(
  89     std::vector<fuzzerop::OpDescriptor> &Ops) {
  90   Ops.push_back(extractValueDescriptor(1));
  91   Ops.push_back(insertValueDescriptor(1));
  92 }
  93
  94 void llvm::describeFuzzerVectorOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
  95   Ops.push_back(extractElementDescriptor(1));
  96   Ops.push_back(insertElementDescriptor(1));
  97   Ops.push_back(shuffleVectorDescriptor(1));
  98 }
  99
 100 OpDescriptor llvm::fuzzerop::selectDescriptor(unsigned Weight) {
 101   auto buildOp = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 102     return SelectInst::Create(Srcs[0], Srcs[1], Srcs[2], "S", Inst);
 103   };
 104   return {Weight,
 105           {boolOrVecBoolType(), matchFirstLengthWAnyType(), matchSecondType()},
 106           buildOp};
 107 }
 108
 109 OpDescriptor llvm::fuzzerop::fnegDescriptor(unsigned Weight) {
 110   auto buildOp = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 111     return UnaryOperator::Create(Instruction::FNeg, Srcs[0], "F", Inst);
 112   };
 113   return {Weight, {anyFloatOrVecFloatType()}, buildOp};
 114 }
 115
 116 OpDescriptor llvm::fuzzerop::binOpDescriptor(unsigned Weight,
 117                                              Instruction::BinaryOps Op) {
 118   auto buildOp = [Op](ArrayRef<Value *> Srcs, Instruction *Inst) {
 119     return BinaryOperator::Create(Op, Srcs[0], Srcs[1], "B", Inst);
 120   };
 121   switch (Op) {
 122   case Instruction::Add:
 123   case Instruction::Sub:
 124   case Instruction::Mul:
 125   case Instruction::SDiv:
 126   case Instruction::UDiv:
 127   case Instruction::SRem:
 128   case Instruction::URem:
 129   case Instruction::Shl:
 130   case Instruction::LShr:
 131   case Instruction::AShr:
 132   case Instruction::And:
 133   case Instruction::Or:
 134   case Instruction::Xor:
 135     return {Weight, {anyIntOrVecIntType(), matchFirstType()}, buildOp};
 136   case Instruction::FAdd:
 137   case Instruction::FSub:
 138   case Instruction::FMul:
 139   case Instruction::FDiv:
 140   case Instruction::FRem:
 141     return {Weight, {anyFloatOrVecFloatType(), matchFirstType()}, buildOp};
 142   case Instruction::BinaryOpsEnd:
 143     llvm_unreachable("Value out of range of enum");
 144   }
 145   llvm_unreachable("Covered switch");
 146 }
 147
 148 OpDescriptor llvm::fuzzerop::cmpOpDescriptor(unsigned Weight,
 149                                              Instruction::OtherOps CmpOp,
 150                                              CmpInst::Predicate Pred) {
 151   auto buildOp = [CmpOp, Pred](ArrayRef<Value *> Srcs, Instruction *Inst) {
 152     return CmpInst::Create(CmpOp, Pred, Srcs[0], Srcs[1], "C", Inst);
 153   };
 154
 155   switch (CmpOp) {
 156   case Instruction::ICmp:
 157     return {Weight, {anyIntOrVecIntType(), matchFirstType()}, buildOp};
 158   case Instruction::FCmp:
 159     return {Weight, {anyFloatOrVecFloatType(), matchFirstType()}, buildOp};
 160   default:
 161     llvm_unreachable("CmpOp must be ICmp or FCmp");
 162   }
 163 }
 164
 165 OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) {
 166   auto buildSplitBlock = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 167     BasicBlock *Block = Inst->getParent();
 168     BasicBlock *Next = Block->splitBasicBlock(Inst, "BB");
 169
 170     // If it was an exception handling block, we are done.
 171     if (Block->isEHPad())
 172       return nullptr;
 173
 174     // Loop back on this block by replacing the unconditional forward branch
 175     // with a conditional with a backedge.
 176     if (Block != &Block->getParent()->getEntryBlock()) {
 177       BranchInst::Create(Block, Next, Srcs[0], Block->getTerminator());
 178       Block->getTerminator()->eraseFromParent();
 179
 180       // We need values for each phi in the block. Since there isn't a good way
 181       // to do a variable number of input values currently, we just fill them
 182       // with undef.
 183       for (PHINode &PHI : Block->phis())
 184         PHI.addIncoming(UndefValue::get(PHI.getType()), Block);
 185     }
 186     return nullptr;
 187   };
 188   SourcePred isInt1Ty{[](ArrayRef<Value *>, const Value *V) {
 189                         return V->getType()->isIntegerTy(1);
 190                       },
 191                       std::nullopt};
 192   return {Weight, {isInt1Ty}, buildSplitBlock};
 193 }
 194
 195 OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) {
 196   auto buildGEP = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 197     // TODO: It would be better to generate a random type here, rather than
 198     // generating a random value and picking its type.
 199     Type *Ty = Srcs[1]->getType();
 200     auto Indices = ArrayRef(Srcs).drop_front(2);
 201     return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", Inst);
 202   };
 203   // TODO: Handle aggregates and vectors
 204   // TODO: Support multiple indices.
 205   // TODO: Try to avoid meaningless accesses.
 206   SourcePred sizedType(
 207       [](ArrayRef<Value *>, const Value *V) { return V->getType()->isSized(); },
 208       std::nullopt);
 209   return {Weight, {sizedPtrType(), sizedType, anyIntType()}, buildGEP};
 210 }
 211
 212 static uint64_t getAggregateNumElements(Type *T) {
 213   assert(T->isAggregateType() && "Not a struct or array");
 214   if (isa<StructType>(T))
 215     return T->getStructNumElements();
 216   return T->getArrayNumElements();
 217 }
 218
 219 static SourcePred validExtractValueIndex() {
 220   auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
 221     if (auto *CI = dyn_cast<ConstantInt>(V))
 222       if (!CI->uge(getAggregateNumElements(Cur[0]->getType())))
 223         return true;
 224     return false;
 225   };
 226   auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) {
 227     std::vector<Constant *> Result;
 228     auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext());
 229     uint64_t N = getAggregateNumElements(Cur[0]->getType());
 230     // Create indices at the start, end, and middle, but avoid dups.
 231     Result.push_back(ConstantInt::get(Int32Ty, 0));
 232     if (N > 1)
 233       Result.push_back(ConstantInt::get(Int32Ty, N - 1));
 234     if (N > 2)
 235       Result.push_back(ConstantInt::get(Int32Ty, N / 2));
 236     return Result;
 237   };
 238   return {Pred, Make};
 239 }
 240
 241 OpDescriptor llvm::fuzzerop::extractValueDescriptor(unsigned Weight) {
 242   auto buildExtract = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 243     // TODO: It's pretty inefficient to shuffle this all through constants.
 244     unsigned Idx = cast<ConstantInt>(Srcs[1])->getZExtValue();
 245     return ExtractValueInst::Create(Srcs[0], {Idx}, "E", Inst);
 246   };
 247   // TODO: Should we handle multiple indices?
 248   return {Weight, {anyAggregateType(), validExtractValueIndex()}, buildExtract};
 249 }
 250
 251 static SourcePred matchScalarInAggregate() {
 252   auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
 253     if (auto *ArrayT = dyn_cast<ArrayType>(Cur[0]->getType()))
 254       return V->getType() == ArrayT->getElementType();
 255
 256     auto *STy = cast<StructType>(Cur[0]->getType());
 257     for (int I = 0, E = STy->getNumElements(); I < E; ++I)
 258       if (STy->getTypeAtIndex(I) == V->getType())
 259         return true;
 260     return false;
 261   };
 262   auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *>) {
 263     if (auto *ArrayT = dyn_cast<ArrayType>(Cur[0]->getType()))
 264       return makeConstantsWithType(ArrayT->getElementType());
 265
 266     std::vector<Constant *> Result;
 267     auto *STy = cast<StructType>(Cur[0]->getType());
 268     for (int I = 0, E = STy->getNumElements(); I < E; ++I)
 269       makeConstantsWithType(STy->getTypeAtIndex(I), Result);
 270     return Result;
 271   };
 272   return {Pred, Make};
 273 }
 274
 275 static SourcePred validInsertValueIndex() {
 276   auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
 277     if (auto *CI = dyn_cast<ConstantInt>(V))
 278       if (CI->getBitWidth() == 32) {
 279         Type *Indexed = ExtractValueInst::getIndexedType(Cur[0]->getType(),
 280                                                          CI->getZExtValue());
 281         return Indexed == Cur[1]->getType();
 282       }
 283     return false;
 284   };
 285   auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) {
 286     std::vector<Constant *> Result;
 287     auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext());
 288     auto *BaseTy = Cur[0]->getType();
 289     int I = 0;
 290     while (Type *Indexed = ExtractValueInst::getIndexedType(BaseTy, I)) {
 291       if (Indexed == Cur[1]->getType())
 292         Result.push_back(ConstantInt::get(Int32Ty, I));
 293       ++I;
 294     }
 295     return Result;
 296   };
 297   return {Pred, Make};
 298 }
 299
 300 OpDescriptor llvm::fuzzerop::insertValueDescriptor(unsigned Weight) {
 301   auto buildInsert = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 302     // TODO: It's pretty inefficient to shuffle this all through constants.
 303     unsigned Idx = cast<ConstantInt>(Srcs[2])->getZExtValue();
 304     return InsertValueInst::Create(Srcs[0], Srcs[1], {Idx}, "I", Inst);
 305   };
 306   return {
 307       Weight,
 308       {anyAggregateType(), matchScalarInAggregate(), validInsertValueIndex()},
 309       buildInsert};
 310 }
 311
 312 OpDescriptor llvm::fuzzerop::extractElementDescriptor(unsigned Weight) {
 313   auto buildExtract = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 314     return ExtractElementInst::Create(Srcs[0], Srcs[1], "E", Inst);
 315   };
 316   // TODO: Try to avoid undefined accesses.
 317   return {Weight, {anyVectorType(), anyIntType()}, buildExtract};
 318 }
 319
 320 OpDescriptor llvm::fuzzerop::insertElementDescriptor(unsigned Weight) {
 321   auto buildInsert = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 322     return InsertElementInst::Create(Srcs[0], Srcs[1], Srcs[2], "I", Inst);
 323   };
 324   // TODO: Try to avoid undefined accesses.
 325   return {Weight,
 326           {anyVectorType(), matchScalarOfFirstType(), anyIntType()},
 327           buildInsert};
 328 }
 329
 330 static SourcePred validShuffleVectorIndex() {
 331   auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
 332     return ShuffleVectorInst::isValidOperands(Cur[0], Cur[1], V);
 333   };
 334   auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) {
 335     auto *FirstTy = cast<VectorType>(Cur[0]->getType());
 336     auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext());
 337     // TODO: It's straighforward to make up reasonable values, but listing them
 338     // exhaustively would be insane. Come up with a couple of sensible ones.
 339     return std::vector<Constant *>{
 340         UndefValue::get(VectorType::get(Int32Ty, FirstTy->getElementCount()))};
 341   };
 342   return {Pred, Make};
 343 }
 344
 345 OpDescriptor llvm::fuzzerop::shuffleVectorDescriptor(unsigned Weight) {
 346   auto buildShuffle = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
 347     return new ShuffleVectorInst(Srcs[0], Srcs[1], Srcs[2], "S", Inst);
 348   };
 349   return {Weight,
 350           {anyVectorType(), matchFirstType(), validShuffleVectorIndex()},
 351           buildShuffle};
 352 }