1 //===-- RandomIRBuilder.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/FuzzMutate/RandomIRBuilder.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/FuzzMutate/OpDescriptor.h"
12 #include "llvm/FuzzMutate/Random.h"
13 #include "llvm/IR/BasicBlock.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Dominators.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/IR/Module.h"
23 using namespace fuzzerop
;
25 /// Return a vector of Blocks that dominates this block, excluding current
27 static std::vector
<BasicBlock
*> getDominators(BasicBlock
*BB
) {
28 std::vector
<BasicBlock
*> ret
;
29 DominatorTree
DT(*BB
->getParent());
30 DomTreeNode
*Node
= DT
[BB
]->getIDom();
31 while (Node
&& Node
->getBlock()) {
32 ret
.push_back(Node
->getBlock());
34 Node
= Node
->getIDom();
39 /// Return a vector of Blocks that is dominated by this block, excluding current
41 static std::vector
<BasicBlock
*> getDominatees(BasicBlock
*BB
) {
42 DominatorTree
DT(*BB
->getParent());
43 std::vector
<BasicBlock
*> ret
;
44 for (DomTreeNode
*Child
: DT
[BB
]->children())
45 ret
.push_back(Child
->getBlock());
47 while (Idx
< ret
.size()) {
48 DomTreeNode
*Node
= DT
[ret
[Idx
]];
50 for (DomTreeNode
*Child
: Node
->children())
51 ret
.push_back(Child
->getBlock());
56 AllocaInst
*RandomIRBuilder::createStackMemory(Function
*F
, Type
*Ty
,
58 /// TODO: For all Allocas, maybe allocate an array.
59 BasicBlock
*EntryBB
= &F
->getEntryBlock();
60 DataLayout
DL(F
->getParent());
61 AllocaInst
*Alloca
= new AllocaInst(Ty
, DL
.getAllocaAddrSpace(), "A",
62 &*EntryBB
->getFirstInsertionPt());
64 new StoreInst(Init
, Alloca
, Alloca
->getNextNode());
68 std::pair
<GlobalVariable
*, bool>
69 RandomIRBuilder::findOrCreateGlobalVariable(Module
*M
, ArrayRef
<Value
*> Srcs
,
70 fuzzerop::SourcePred Pred
) {
71 auto MatchesPred
= [&Srcs
, &Pred
](GlobalVariable
*GV
) {
72 // Can't directly compare GV's type, as it would be a pointer to the actual
74 return Pred
.matches(Srcs
, UndefValue::get(GV
->getValueType()));
76 bool DidCreate
= false;
77 SmallVector
<GlobalVariable
*, 4> GlobalVars
;
78 for (GlobalVariable
&GV
: M
->globals()) {
79 GlobalVars
.push_back(&GV
);
81 auto RS
= makeSampler(Rand
, make_filter_range(GlobalVars
, MatchesPred
));
82 RS
.sample(nullptr, 1);
83 GlobalVariable
*GV
= RS
.getSelection();
86 using LinkageTypes
= GlobalVariable::LinkageTypes
;
87 auto TRS
= makeSampler
<Constant
*>(Rand
);
88 TRS
.sample(Pred
.generate(Srcs
, KnownTypes
));
89 Constant
*Init
= TRS
.getSelection();
90 Type
*Ty
= Init
->getType();
91 GV
= new GlobalVariable(*M
, Ty
, false, LinkageTypes::ExternalLinkage
, Init
,
93 GlobalValue::ThreadLocalMode::NotThreadLocal
,
94 M
->getDataLayout().getDefaultGlobalsAddressSpace());
96 return {GV
, DidCreate
};
99 Value
*RandomIRBuilder::findOrCreateSource(BasicBlock
&BB
,
100 ArrayRef
<Instruction
*> Insts
) {
101 return findOrCreateSource(BB
, Insts
, {}, anyType());
104 Value
*RandomIRBuilder::findOrCreateSource(BasicBlock
&BB
,
105 ArrayRef
<Instruction
*> Insts
,
106 ArrayRef
<Value
*> Srcs
,
108 bool allowConstant
) {
109 auto MatchesPred
= [&Srcs
, &Pred
](Value
*V
) { return Pred
.matches(Srcs
, V
); };
110 SmallVector
<uint64_t, 8> SrcTys
;
111 for (uint64_t i
= 0; i
< EndOfValueSource
; i
++)
113 std::shuffle(SrcTys
.begin(), SrcTys
.end(), Rand
);
114 for (uint64_t SrcTy
: SrcTys
) {
116 case SrcFromInstInCurBlock
: {
117 auto RS
= makeSampler(Rand
, make_filter_range(Insts
, MatchesPred
));
119 return RS
.getSelection();
123 case FunctionArgument
: {
124 Function
*F
= BB
.getParent();
125 SmallVector
<Argument
*, 8> Args
;
126 for (uint64_t i
= 0; i
< F
->arg_size(); i
++) {
127 Args
.push_back(F
->getArg(i
));
129 auto RS
= makeSampler(Rand
, make_filter_range(Args
, MatchesPred
));
131 return RS
.getSelection();
135 case InstInDominator
: {
136 auto Dominators
= getDominators(&BB
);
137 std::shuffle(Dominators
.begin(), Dominators
.end(), Rand
);
138 for (BasicBlock
*Dom
: Dominators
) {
139 SmallVector
<Instruction
*, 16> Instructions
;
140 for (Instruction
&I
: *Dom
) {
141 Instructions
.push_back(&I
);
144 makeSampler(Rand
, make_filter_range(Instructions
, MatchesPred
));
145 // Also consider choosing no source, meaning we want a new one.
147 return RS
.getSelection();
152 case SrcFromGlobalVariable
: {
153 Module
*M
= BB
.getParent()->getParent();
154 auto [GV
, DidCreate
] = findOrCreateGlobalVariable(M
, Srcs
, Pred
);
155 Type
*Ty
= GV
->getValueType();
156 LoadInst
*LoadGV
= nullptr;
157 if (BB
.getTerminator()) {
158 LoadGV
= new LoadInst(Ty
, GV
, "LGV", &*BB
.getFirstInsertionPt());
160 LoadGV
= new LoadInst(Ty
, GV
, "LGV", &BB
);
162 // Because we might be generating new values, we have to check if it
165 if (Pred
.matches(Srcs
, LoadGV
)) {
168 LoadGV
->eraseFromParent();
169 // If no one is using this GlobalVariable, delete it too.
170 if (GV
->use_empty()) {
171 GV
->eraseFromParent();
176 case NewConstOrStack
: {
177 return newSource(BB
, Insts
, Srcs
, Pred
, allowConstant
);
180 case EndOfValueSource
: {
181 llvm_unreachable("EndOfValueSource executed");
185 llvm_unreachable("Can't find a source");
188 Value
*RandomIRBuilder::newSource(BasicBlock
&BB
, ArrayRef
<Instruction
*> Insts
,
189 ArrayRef
<Value
*> Srcs
, SourcePred Pred
,
190 bool allowConstant
) {
191 // Generate some constants to choose from.
192 auto RS
= makeSampler
<Value
*>(Rand
);
193 RS
.sample(Pred
.generate(Srcs
, KnownTypes
));
195 // If we can find a pointer to load from, use it half the time.
196 Value
*Ptr
= findPointer(BB
, Insts
, Srcs
, Pred
);
198 // Create load from the chosen pointer
199 auto IP
= BB
.getFirstInsertionPt();
200 if (auto *I
= dyn_cast
<Instruction
>(Ptr
)) {
201 IP
= ++I
->getIterator();
202 assert(IP
!= BB
.end() && "guaranteed by the findPointer");
204 // For opaque pointers, pick the type independently.
205 Type
*AccessTy
= Ptr
->getType()->isOpaquePointerTy()
206 ? RS
.getSelection()->getType()
207 : Ptr
->getType()->getNonOpaquePointerElementType();
208 auto *NewLoad
= new LoadInst(AccessTy
, Ptr
, "L", &*IP
);
210 // Only sample this load if it really matches the descriptor
211 if (Pred
.matches(Srcs
, NewLoad
))
212 RS
.sample(NewLoad
, RS
.totalWeight());
214 NewLoad
->eraseFromParent();
217 Value
*newSrc
= RS
.getSelection();
218 // Generate a stack alloca and store the constant to it if constant is not
219 // allowed, our hope is that later mutations can generate some values and
220 // store to this placeholder.
221 if (!allowConstant
&& isa
<Constant
>(newSrc
)) {
222 Type
*Ty
= newSrc
->getType();
223 Function
*F
= BB
.getParent();
224 AllocaInst
*Alloca
= createStackMemory(F
, Ty
, newSrc
);
225 if (BB
.getTerminator()) {
226 newSrc
= new LoadInst(Ty
, Alloca
, /*ArrLen,*/ "L", BB
.getTerminator());
228 newSrc
= new LoadInst(Ty
, Alloca
, /*ArrLen,*/ "L", &BB
);
234 static bool isCompatibleReplacement(const Instruction
*I
, const Use
&Operand
,
235 const Value
*Replacement
) {
236 unsigned int OperandNo
= Operand
.getOperandNo();
237 if (Operand
->getType() != Replacement
->getType())
239 switch (I
->getOpcode()) {
240 case Instruction::GetElementPtr
:
241 case Instruction::ExtractElement
:
242 case Instruction::ExtractValue
:
243 // TODO: We could potentially validate these, but for now just leave indices
248 case Instruction::InsertValue
:
249 case Instruction::InsertElement
:
250 case Instruction::ShuffleVector
:
254 // For Br/Switch, we only try to modify the 1st Operand (condition).
255 // Modify other operands, like switch case may accidently change case from
256 // ConstantInt to a register, which is illegal.
257 case Instruction::Switch
:
258 case Instruction::Br
:
262 case Instruction::Call
:
263 case Instruction::Invoke
:
264 case Instruction::CallBr
: {
265 const Function
*Callee
= cast
<CallBase
>(I
)->getCalledFunction();
266 // If it's an indirect call, give up.
269 // If callee is not an intrinsic, operand 0 is the function to be called.
270 // Since we cannot assume that the replacement is a function pointer,
272 if (!Callee
->getIntrinsicID() && OperandNo
== 0)
274 return !Callee
->hasParamAttribute(OperandNo
, Attribute::ImmArg
);
282 Instruction
*RandomIRBuilder::connectToSink(BasicBlock
&BB
,
283 ArrayRef
<Instruction
*> Insts
,
285 SmallVector
<uint64_t, 8> SinkTys
;
286 for (uint64_t i
= 0; i
< EndOfValueSink
; i
++)
287 SinkTys
.push_back(i
);
288 std::shuffle(SinkTys
.begin(), SinkTys
.end(), Rand
);
289 auto findSinkAndConnect
=
290 [this, V
](ArrayRef
<Instruction
*> Instructions
) -> Instruction
* {
291 auto RS
= makeSampler
<Use
*>(Rand
);
292 for (auto &I
: Instructions
) {
293 for (Use
&U
: I
->operands())
294 if (isCompatibleReplacement(I
, U
, V
))
298 Use
*Sink
= RS
.getSelection();
299 User
*U
= Sink
->getUser();
300 unsigned OpNo
= Sink
->getOperandNo();
301 U
->setOperand(OpNo
, V
);
302 return cast
<Instruction
>(U
);
306 Instruction
*Sink
= nullptr;
307 for (uint64_t SinkTy
: SinkTys
) {
309 case SinkToInstInCurBlock
:
310 Sink
= findSinkAndConnect(Insts
);
314 case PointersInDominator
: {
315 auto Dominators
= getDominators(&BB
);
316 std::shuffle(Dominators
.begin(), Dominators
.end(), Rand
);
317 for (BasicBlock
*Dom
: Dominators
) {
318 for (Instruction
&I
: *Dom
) {
319 if (isa
<PointerType
>(I
.getType()))
320 return new StoreInst(V
, &I
, Insts
.back());
325 case InstInDominatee
: {
326 auto Dominatees
= getDominatees(&BB
);
327 std::shuffle(Dominatees
.begin(), Dominatees
.end(), Rand
);
328 for (BasicBlock
*Dominee
: Dominatees
) {
329 std::vector
<Instruction
*> Instructions
;
330 for (Instruction
&I
: *Dominee
)
331 Instructions
.push_back(&I
);
332 Sink
= findSinkAndConnect(Instructions
);
340 /// TODO: allocate a new stack memory.
341 return newSink(BB
, Insts
, V
);
342 case SinkToGlobalVariable
: {
343 Module
*M
= BB
.getParent()->getParent();
344 auto [GV
, DidCreate
] =
345 findOrCreateGlobalVariable(M
, {}, fuzzerop::onlyType(V
->getType()));
346 return new StoreInst(V
, GV
, Insts
.back());
350 llvm_unreachable("EndOfValueSink executed");
353 llvm_unreachable("Can't find a sink");
356 Instruction
*RandomIRBuilder::newSink(BasicBlock
&BB
,
357 ArrayRef
<Instruction
*> Insts
, Value
*V
) {
358 Value
*Ptr
= findPointer(BB
, Insts
, {V
}, matchFirstType());
360 if (uniform(Rand
, 0, 1)) {
361 Type
*Ty
= V
->getType();
362 Ptr
= createStackMemory(BB
.getParent(), Ty
, UndefValue::get(Ty
));
364 Ptr
= UndefValue::get(PointerType::get(V
->getType(), 0));
368 return new StoreInst(V
, Ptr
, Insts
.back());
371 Value
*RandomIRBuilder::findPointer(BasicBlock
&BB
,
372 ArrayRef
<Instruction
*> Insts
,
373 ArrayRef
<Value
*> Srcs
, SourcePred Pred
) {
374 auto IsMatchingPtr
= [&Srcs
, &Pred
](Instruction
*Inst
) {
375 // Invoke instructions sometimes produce valid pointers but currently
376 // we can't insert loads or stores from them
377 if (Inst
->isTerminator())
380 if (auto *PtrTy
= dyn_cast
<PointerType
>(Inst
->getType())) {
381 if (PtrTy
->isOpaque())
384 // We can never generate loads from non first class or non sized types
385 Type
*ElemTy
= PtrTy
->getNonOpaquePointerElementType();
386 if (!ElemTy
->isSized() || !ElemTy
->isFirstClassType())
389 // TODO: Check if this is horribly expensive.
390 return Pred
.matches(Srcs
, UndefValue::get(ElemTy
));
394 if (auto RS
= makeSampler(Rand
, make_filter_range(Insts
, IsMatchingPtr
)))
395 return RS
.getSelection();
399 Type
*RandomIRBuilder::randomType() {
400 uint64_t TyIdx
= uniform
<uint64_t>(Rand
, 0, KnownTypes
.size() - 1);
401 return KnownTypes
[TyIdx
];
404 Function
*RandomIRBuilder::createFunctionDeclaration(Module
&M
,
406 Type
*RetType
= randomType();
408 SmallVector
<Type
*, 2> Args
;
409 for (uint64_t i
= 0; i
< ArgNum
; i
++) {
410 Args
.push_back(randomType());
413 Function
*F
= Function::Create(FunctionType::get(RetType
, Args
,
415 GlobalValue::ExternalLinkage
, "f", &M
);
418 Function
*RandomIRBuilder::createFunctionDeclaration(Module
&M
) {
419 return createFunctionDeclaration(
420 M
, uniform
<uint64_t>(Rand
, MinArgNum
, MaxArgNum
));
423 Function
*RandomIRBuilder::createFunctionDefinition(Module
&M
,
425 Function
*F
= this->createFunctionDeclaration(M
, ArgNum
);
427 // TODO: Some arguments and a return value would probably be more
429 LLVMContext
&Context
= M
.getContext();
431 BasicBlock
*BB
= BasicBlock::Create(Context
, "BB", F
);
432 Type
*RetTy
= F
->getReturnType();
433 if (RetTy
!= Type::getVoidTy(Context
)) {
434 Instruction
*RetAlloca
=
435 new AllocaInst(RetTy
, DL
.getAllocaAddrSpace(), "RP", BB
);
436 Instruction
*RetLoad
= new LoadInst(RetTy
, RetAlloca
, "", BB
);
437 ReturnInst::Create(Context
, RetLoad
, BB
);
439 ReturnInst::Create(Context
, BB
);
444 Function
*RandomIRBuilder::createFunctionDefinition(Module
&M
) {
445 return createFunctionDefinition(
446 M
, uniform
<uint64_t>(Rand
, MinArgNum
, MaxArgNum
));