1 //===-- RandomIRBuilder.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/FuzzMutate/RandomIRBuilder.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/FuzzMutate/OpDescriptor.h"
12 #include "llvm/FuzzMutate/Random.h"
13 #include "llvm/IR/BasicBlock.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Dominators.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/IR/Module.h"
23 using namespace fuzzerop
;
25 /// Return a vector of Blocks that dominates this block, excluding current
27 static std::vector
<BasicBlock
*> getDominators(BasicBlock
*BB
) {
28 std::vector
<BasicBlock
*> ret
;
29 DominatorTree
DT(*BB
->getParent());
30 DomTreeNode
*Node
= DT
.getNode(BB
);
31 // It's possible that an orphan block is not in the dom tree. In that case we
32 // just return nothing.
35 Node
= Node
->getIDom();
36 while (Node
&& Node
->getBlock()) {
37 ret
.push_back(Node
->getBlock());
39 Node
= Node
->getIDom();
44 /// Return a vector of Blocks that is dominated by this block, excluding current
46 static std::vector
<BasicBlock
*> getDominatees(BasicBlock
*BB
) {
47 DominatorTree
DT(*BB
->getParent());
48 std::vector
<BasicBlock
*> ret
;
49 DomTreeNode
*Parent
= DT
.getNode(BB
);
50 // It's possible that an orphan block is not in the dom tree. In that case we
51 // just return nothing.
54 for (DomTreeNode
*Child
: Parent
->children())
55 ret
.push_back(Child
->getBlock());
57 while (Idx
< ret
.size()) {
58 DomTreeNode
*Node
= DT
[ret
[Idx
]];
60 for (DomTreeNode
*Child
: Node
->children())
61 ret
.push_back(Child
->getBlock());
66 AllocaInst
*RandomIRBuilder::createStackMemory(Function
*F
, Type
*Ty
,
68 /// TODO: For all Allocas, maybe allocate an array.
69 BasicBlock
*EntryBB
= &F
->getEntryBlock();
70 DataLayout
DL(F
->getParent());
71 AllocaInst
*Alloca
= new AllocaInst(Ty
, DL
.getAllocaAddrSpace(), "A",
72 &*EntryBB
->getFirstInsertionPt());
74 new StoreInst(Init
, Alloca
, Alloca
->getNextNode());
78 std::pair
<GlobalVariable
*, bool>
79 RandomIRBuilder::findOrCreateGlobalVariable(Module
*M
, ArrayRef
<Value
*> Srcs
,
80 fuzzerop::SourcePred Pred
) {
81 auto MatchesPred
= [&Srcs
, &Pred
](GlobalVariable
*GV
) {
82 // Can't directly compare GV's type, as it would be a pointer to the actual
84 return Pred
.matches(Srcs
, UndefValue::get(GV
->getValueType()));
86 bool DidCreate
= false;
87 SmallVector
<GlobalVariable
*, 4> GlobalVars
;
88 for (GlobalVariable
&GV
: M
->globals()) {
89 GlobalVars
.push_back(&GV
);
91 auto RS
= makeSampler(Rand
, make_filter_range(GlobalVars
, MatchesPred
));
92 RS
.sample(nullptr, 1);
93 GlobalVariable
*GV
= RS
.getSelection();
96 using LinkageTypes
= GlobalVariable::LinkageTypes
;
97 auto TRS
= makeSampler
<Constant
*>(Rand
);
98 TRS
.sample(Pred
.generate(Srcs
, KnownTypes
));
99 Constant
*Init
= TRS
.getSelection();
100 Type
*Ty
= Init
->getType();
101 GV
= new GlobalVariable(*M
, Ty
, false, LinkageTypes::ExternalLinkage
, Init
,
103 GlobalValue::ThreadLocalMode::NotThreadLocal
,
104 M
->getDataLayout().getDefaultGlobalsAddressSpace());
106 return {GV
, DidCreate
};
109 Value
*RandomIRBuilder::findOrCreateSource(BasicBlock
&BB
,
110 ArrayRef
<Instruction
*> Insts
) {
111 return findOrCreateSource(BB
, Insts
, {}, anyType());
114 Value
*RandomIRBuilder::findOrCreateSource(BasicBlock
&BB
,
115 ArrayRef
<Instruction
*> Insts
,
116 ArrayRef
<Value
*> Srcs
,
118 bool allowConstant
) {
119 auto MatchesPred
= [&Srcs
, &Pred
](Value
*V
) { return Pred
.matches(Srcs
, V
); };
120 SmallVector
<uint64_t, 8> SrcTys
;
121 for (uint64_t i
= 0; i
< EndOfValueSource
; i
++)
123 std::shuffle(SrcTys
.begin(), SrcTys
.end(), Rand
);
124 for (uint64_t SrcTy
: SrcTys
) {
126 case SrcFromInstInCurBlock
: {
127 auto RS
= makeSampler(Rand
, make_filter_range(Insts
, MatchesPred
));
129 return RS
.getSelection();
133 case FunctionArgument
: {
134 Function
*F
= BB
.getParent();
135 SmallVector
<Argument
*, 8> Args
;
136 for (uint64_t i
= 0; i
< F
->arg_size(); i
++) {
137 Args
.push_back(F
->getArg(i
));
139 auto RS
= makeSampler(Rand
, make_filter_range(Args
, MatchesPred
));
141 return RS
.getSelection();
145 case InstInDominator
: {
146 auto Dominators
= getDominators(&BB
);
147 std::shuffle(Dominators
.begin(), Dominators
.end(), Rand
);
148 for (BasicBlock
*Dom
: Dominators
) {
149 SmallVector
<Instruction
*, 16> Instructions
;
150 for (Instruction
&I
: *Dom
) {
151 Instructions
.push_back(&I
);
154 makeSampler(Rand
, make_filter_range(Instructions
, MatchesPred
));
155 // Also consider choosing no source, meaning we want a new one.
157 return RS
.getSelection();
162 case SrcFromGlobalVariable
: {
163 Module
*M
= BB
.getParent()->getParent();
164 auto [GV
, DidCreate
] = findOrCreateGlobalVariable(M
, Srcs
, Pred
);
165 Type
*Ty
= GV
->getValueType();
166 LoadInst
*LoadGV
= nullptr;
167 if (BB
.getTerminator()) {
168 LoadGV
= new LoadInst(Ty
, GV
, "LGV", &*BB
.getFirstInsertionPt());
170 LoadGV
= new LoadInst(Ty
, GV
, "LGV", &BB
);
172 // Because we might be generating new values, we have to check if it
175 if (Pred
.matches(Srcs
, LoadGV
)) {
178 LoadGV
->eraseFromParent();
179 // If no one is using this GlobalVariable, delete it too.
180 if (GV
->use_empty()) {
181 GV
->eraseFromParent();
186 case NewConstOrStack
: {
187 return newSource(BB
, Insts
, Srcs
, Pred
, allowConstant
);
190 case EndOfValueSource
: {
191 llvm_unreachable("EndOfValueSource executed");
195 llvm_unreachable("Can't find a source");
198 Value
*RandomIRBuilder::newSource(BasicBlock
&BB
, ArrayRef
<Instruction
*> Insts
,
199 ArrayRef
<Value
*> Srcs
, SourcePred Pred
,
200 bool allowConstant
) {
201 // Generate some constants to choose from.
202 auto RS
= makeSampler
<Value
*>(Rand
);
203 RS
.sample(Pred
.generate(Srcs
, KnownTypes
));
205 // If we can find a pointer to load from, use it half the time.
206 Value
*Ptr
= findPointer(BB
, Insts
);
208 // Create load from the chosen pointer
209 auto IP
= BB
.getFirstInsertionPt();
210 if (auto *I
= dyn_cast
<Instruction
>(Ptr
)) {
211 IP
= ++I
->getIterator();
212 assert(IP
!= BB
.end() && "guaranteed by the findPointer");
214 // Pick the type independently.
215 Type
*AccessTy
= RS
.getSelection()->getType();
216 auto *NewLoad
= new LoadInst(AccessTy
, Ptr
, "L", &*IP
);
218 // Only sample this load if it really matches the descriptor
219 if (Pred
.matches(Srcs
, NewLoad
))
220 RS
.sample(NewLoad
, RS
.totalWeight());
222 NewLoad
->eraseFromParent();
225 Value
*newSrc
= RS
.getSelection();
226 // Generate a stack alloca and store the constant to it if constant is not
227 // allowed, our hope is that later mutations can generate some values and
228 // store to this placeholder.
229 if (!allowConstant
&& isa
<Constant
>(newSrc
)) {
230 Type
*Ty
= newSrc
->getType();
231 Function
*F
= BB
.getParent();
232 AllocaInst
*Alloca
= createStackMemory(F
, Ty
, newSrc
);
233 if (BB
.getTerminator()) {
234 newSrc
= new LoadInst(Ty
, Alloca
, /*ArrLen,*/ "L", BB
.getTerminator());
236 newSrc
= new LoadInst(Ty
, Alloca
, /*ArrLen,*/ "L", &BB
);
242 static bool isCompatibleReplacement(const Instruction
*I
, const Use
&Operand
,
243 const Value
*Replacement
) {
244 unsigned int OperandNo
= Operand
.getOperandNo();
245 if (Operand
->getType() != Replacement
->getType())
247 switch (I
->getOpcode()) {
248 case Instruction::GetElementPtr
:
249 case Instruction::ExtractElement
:
250 case Instruction::ExtractValue
:
251 // TODO: We could potentially validate these, but for now just leave indices
256 case Instruction::InsertValue
:
257 case Instruction::InsertElement
:
258 case Instruction::ShuffleVector
:
262 // For Br/Switch, we only try to modify the 1st Operand (condition).
263 // Modify other operands, like switch case may accidently change case from
264 // ConstantInt to a register, which is illegal.
265 case Instruction::Switch
:
266 case Instruction::Br
:
270 case Instruction::Call
:
271 case Instruction::Invoke
:
272 case Instruction::CallBr
: {
273 const Function
*Callee
= cast
<CallBase
>(I
)->getCalledFunction();
274 // If it's an indirect call, give up.
277 // If callee is not an intrinsic, operand 0 is the function to be called.
278 // Since we cannot assume that the replacement is a function pointer,
280 if (!Callee
->getIntrinsicID() && OperandNo
== 0)
282 return !Callee
->hasParamAttribute(OperandNo
, Attribute::ImmArg
);
290 Instruction
*RandomIRBuilder::connectToSink(BasicBlock
&BB
,
291 ArrayRef
<Instruction
*> Insts
,
293 SmallVector
<uint64_t, 8> SinkTys
;
294 for (uint64_t i
= 0; i
< EndOfValueSink
; i
++)
295 SinkTys
.push_back(i
);
296 std::shuffle(SinkTys
.begin(), SinkTys
.end(), Rand
);
297 auto findSinkAndConnect
=
298 [this, V
](ArrayRef
<Instruction
*> Instructions
) -> Instruction
* {
299 auto RS
= makeSampler
<Use
*>(Rand
);
300 for (auto &I
: Instructions
) {
301 for (Use
&U
: I
->operands())
302 if (isCompatibleReplacement(I
, U
, V
))
306 Use
*Sink
= RS
.getSelection();
307 User
*U
= Sink
->getUser();
308 unsigned OpNo
= Sink
->getOperandNo();
309 U
->setOperand(OpNo
, V
);
310 return cast
<Instruction
>(U
);
314 Instruction
*Sink
= nullptr;
315 for (uint64_t SinkTy
: SinkTys
) {
317 case SinkToInstInCurBlock
:
318 Sink
= findSinkAndConnect(Insts
);
322 case PointersInDominator
: {
323 auto Dominators
= getDominators(&BB
);
324 std::shuffle(Dominators
.begin(), Dominators
.end(), Rand
);
325 for (BasicBlock
*Dom
: Dominators
) {
326 for (Instruction
&I
: *Dom
) {
327 if (isa
<PointerType
>(I
.getType()))
328 return new StoreInst(V
, &I
, Insts
.back());
333 case InstInDominatee
: {
334 auto Dominatees
= getDominatees(&BB
);
335 std::shuffle(Dominatees
.begin(), Dominatees
.end(), Rand
);
336 for (BasicBlock
*Dominee
: Dominatees
) {
337 std::vector
<Instruction
*> Instructions
;
338 for (Instruction
&I
: *Dominee
)
339 Instructions
.push_back(&I
);
340 Sink
= findSinkAndConnect(Instructions
);
348 /// TODO: allocate a new stack memory.
349 return newSink(BB
, Insts
, V
);
350 case SinkToGlobalVariable
: {
351 Module
*M
= BB
.getParent()->getParent();
352 auto [GV
, DidCreate
] =
353 findOrCreateGlobalVariable(M
, {}, fuzzerop::onlyType(V
->getType()));
354 return new StoreInst(V
, GV
, Insts
.back());
358 llvm_unreachable("EndOfValueSink executed");
361 llvm_unreachable("Can't find a sink");
364 Instruction
*RandomIRBuilder::newSink(BasicBlock
&BB
,
365 ArrayRef
<Instruction
*> Insts
, Value
*V
) {
366 Value
*Ptr
= findPointer(BB
, Insts
);
368 if (uniform(Rand
, 0, 1)) {
369 Type
*Ty
= V
->getType();
370 Ptr
= createStackMemory(BB
.getParent(), Ty
, UndefValue::get(Ty
));
372 Ptr
= UndefValue::get(PointerType::get(V
->getType(), 0));
376 return new StoreInst(V
, Ptr
, Insts
.back());
379 Value
*RandomIRBuilder::findPointer(BasicBlock
&BB
,
380 ArrayRef
<Instruction
*> Insts
) {
381 auto IsMatchingPtr
= [](Instruction
*Inst
) {
382 // Invoke instructions sometimes produce valid pointers but currently
383 // we can't insert loads or stores from them
384 if (Inst
->isTerminator())
387 return Inst
->getType()->isPointerTy();
389 if (auto RS
= makeSampler(Rand
, make_filter_range(Insts
, IsMatchingPtr
)))
390 return RS
.getSelection();
394 Type
*RandomIRBuilder::randomType() {
395 uint64_t TyIdx
= uniform
<uint64_t>(Rand
, 0, KnownTypes
.size() - 1);
396 return KnownTypes
[TyIdx
];
399 Function
*RandomIRBuilder::createFunctionDeclaration(Module
&M
,
401 Type
*RetType
= randomType();
403 SmallVector
<Type
*, 2> Args
;
404 for (uint64_t i
= 0; i
< ArgNum
; i
++) {
405 Args
.push_back(randomType());
408 Function
*F
= Function::Create(FunctionType::get(RetType
, Args
,
410 GlobalValue::ExternalLinkage
, "f", &M
);
413 Function
*RandomIRBuilder::createFunctionDeclaration(Module
&M
) {
414 return createFunctionDeclaration(
415 M
, uniform
<uint64_t>(Rand
, MinArgNum
, MaxArgNum
));
418 Function
*RandomIRBuilder::createFunctionDefinition(Module
&M
,
420 Function
*F
= this->createFunctionDeclaration(M
, ArgNum
);
422 // TODO: Some arguments and a return value would probably be more
424 LLVMContext
&Context
= M
.getContext();
426 BasicBlock
*BB
= BasicBlock::Create(Context
, "BB", F
);
427 Type
*RetTy
= F
->getReturnType();
428 if (RetTy
!= Type::getVoidTy(Context
)) {
429 Instruction
*RetAlloca
=
430 new AllocaInst(RetTy
, DL
.getAllocaAddrSpace(), "RP", BB
);
431 Instruction
*RetLoad
= new LoadInst(RetTy
, RetAlloca
, "", BB
);
432 ReturnInst::Create(Context
, RetLoad
, BB
);
434 ReturnInst::Create(Context
, BB
);
439 Function
*RandomIRBuilder::createFunctionDefinition(Module
&M
) {
440 return createFunctionDefinition(
441 M
, uniform
<uint64_t>(Rand
, MinArgNum
, MaxArgNum
));