1 //===-- RandomIRBuilder.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/FuzzMutate/RandomIRBuilder.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/FuzzMutate/OpDescriptor.h"
12 #include "llvm/FuzzMutate/Random.h"
13 #include "llvm/IR/BasicBlock.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Dominators.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/IR/Module.h"
23 using namespace fuzzerop
;
25 /// Return a vector of Blocks that dominates this block, excluding current
27 static std::vector
<BasicBlock
*> getDominators(BasicBlock
*BB
) {
28 std::vector
<BasicBlock
*> ret
;
29 DominatorTree
DT(*BB
->getParent());
30 DomTreeNode
*Node
= DT
.getNode(BB
);
31 // It's possible that an orphan block is not in the dom tree. In that case we
32 // just return nothing.
35 Node
= Node
->getIDom();
36 while (Node
&& Node
->getBlock()) {
37 ret
.push_back(Node
->getBlock());
39 Node
= Node
->getIDom();
44 /// Return a vector of Blocks that is dominated by this block, excluding current
46 static std::vector
<BasicBlock
*> getDominatees(BasicBlock
*BB
) {
47 DominatorTree
DT(*BB
->getParent());
48 std::vector
<BasicBlock
*> ret
;
49 DomTreeNode
*Parent
= DT
.getNode(BB
);
50 // It's possible that an orphan block is not in the dom tree. In that case we
51 // just return nothing.
54 for (DomTreeNode
*Child
: Parent
->children())
55 ret
.push_back(Child
->getBlock());
57 while (Idx
< ret
.size()) {
58 DomTreeNode
*Node
= DT
[ret
[Idx
]];
60 for (DomTreeNode
*Child
: Node
->children())
61 ret
.push_back(Child
->getBlock());
66 AllocaInst
*RandomIRBuilder::createStackMemory(Function
*F
, Type
*Ty
,
68 /// TODO: For all Allocas, maybe allocate an array.
69 BasicBlock
*EntryBB
= &F
->getEntryBlock();
70 const DataLayout
&DL
= F
->getDataLayout();
71 AllocaInst
*Alloca
= new AllocaInst(Ty
, DL
.getAllocaAddrSpace(), "A",
72 EntryBB
->getFirstInsertionPt());
74 new StoreInst(Init
, Alloca
, std::next(Alloca
->getIterator()));
78 std::pair
<GlobalVariable
*, bool>
79 RandomIRBuilder::findOrCreateGlobalVariable(Module
*M
, ArrayRef
<Value
*> Srcs
,
80 fuzzerop::SourcePred Pred
) {
81 auto MatchesPred
= [&Srcs
, &Pred
](GlobalVariable
*GV
) {
82 // Can't directly compare GV's type, as it would be a pointer to the actual
84 return Pred
.matches(Srcs
, PoisonValue::get(GV
->getValueType()));
86 bool DidCreate
= false;
87 SmallVector
<GlobalVariable
*, 4> GlobalVars
;
88 for (GlobalVariable
&GV
: M
->globals()) {
89 GlobalVars
.push_back(&GV
);
91 auto RS
= makeSampler(Rand
, make_filter_range(GlobalVars
, MatchesPred
));
92 RS
.sample(nullptr, 1);
93 GlobalVariable
*GV
= RS
.getSelection();
96 using LinkageTypes
= GlobalVariable::LinkageTypes
;
97 auto TRS
= makeSampler
<Constant
*>(Rand
);
98 TRS
.sample(Pred
.generate(Srcs
, KnownTypes
));
99 Constant
*Init
= TRS
.getSelection();
100 Type
*Ty
= Init
->getType();
101 GV
= new GlobalVariable(*M
, Ty
, false, LinkageTypes::ExternalLinkage
, Init
,
103 GlobalValue::ThreadLocalMode::NotThreadLocal
,
104 M
->getDataLayout().getDefaultGlobalsAddressSpace());
106 return {GV
, DidCreate
};
109 Value
*RandomIRBuilder::findOrCreateSource(BasicBlock
&BB
,
110 ArrayRef
<Instruction
*> Insts
) {
111 return findOrCreateSource(BB
, Insts
, {}, anyType());
114 Value
*RandomIRBuilder::findOrCreateSource(BasicBlock
&BB
,
115 ArrayRef
<Instruction
*> Insts
,
116 ArrayRef
<Value
*> Srcs
,
118 bool allowConstant
) {
119 auto MatchesPred
= [&Srcs
, &Pred
](Value
*V
) { return Pred
.matches(Srcs
, V
); };
120 SmallVector
<uint64_t, 8> SrcTys
;
121 for (uint64_t i
= 0; i
< EndOfValueSource
; i
++)
123 std::shuffle(SrcTys
.begin(), SrcTys
.end(), Rand
);
124 for (uint64_t SrcTy
: SrcTys
) {
126 case SrcFromInstInCurBlock
: {
127 auto RS
= makeSampler(Rand
, make_filter_range(Insts
, MatchesPred
));
129 return RS
.getSelection();
133 case FunctionArgument
: {
134 Function
*F
= BB
.getParent();
135 SmallVector
<Argument
*, 8> Args
;
136 for (uint64_t i
= 0; i
< F
->arg_size(); i
++) {
137 Args
.push_back(F
->getArg(i
));
139 auto RS
= makeSampler(Rand
, make_filter_range(Args
, MatchesPred
));
141 return RS
.getSelection();
145 case InstInDominator
: {
146 auto Dominators
= getDominators(&BB
);
147 std::shuffle(Dominators
.begin(), Dominators
.end(), Rand
);
148 for (BasicBlock
*Dom
: Dominators
) {
149 SmallVector
<Instruction
*, 16> Instructions
;
150 for (Instruction
&I
: *Dom
) {
151 Instructions
.push_back(&I
);
154 makeSampler(Rand
, make_filter_range(Instructions
, MatchesPred
));
155 // Also consider choosing no source, meaning we want a new one.
157 return RS
.getSelection();
162 case SrcFromGlobalVariable
: {
163 Module
*M
= BB
.getParent()->getParent();
164 auto [GV
, DidCreate
] = findOrCreateGlobalVariable(M
, Srcs
, Pred
);
165 Type
*Ty
= GV
->getValueType();
166 LoadInst
*LoadGV
= nullptr;
167 if (BB
.getTerminator()) {
168 LoadGV
= new LoadInst(Ty
, GV
, "LGV", BB
.getFirstInsertionPt());
170 LoadGV
= new LoadInst(Ty
, GV
, "LGV", &BB
);
172 // Because we might be generating new values, we have to check if it
175 if (Pred
.matches(Srcs
, LoadGV
)) {
178 LoadGV
->eraseFromParent();
179 // If no one is using this GlobalVariable, delete it too.
180 if (GV
->use_empty()) {
181 GV
->eraseFromParent();
186 case NewConstOrStack
: {
187 return newSource(BB
, Insts
, Srcs
, Pred
, allowConstant
);
190 case EndOfValueSource
: {
191 llvm_unreachable("EndOfValueSource executed");
195 llvm_unreachable("Can't find a source");
198 Value
*RandomIRBuilder::newSource(BasicBlock
&BB
, ArrayRef
<Instruction
*> Insts
,
199 ArrayRef
<Value
*> Srcs
, SourcePred Pred
,
200 bool allowConstant
) {
201 // Generate some constants to choose from.
202 auto RS
= makeSampler
<Value
*>(Rand
);
203 RS
.sample(Pred
.generate(Srcs
, KnownTypes
));
205 // If we can find a pointer to load from, use it half the time.
206 Value
*Ptr
= findPointer(BB
, Insts
);
208 // Create load from the chosen pointer
209 auto IP
= BB
.getFirstInsertionPt();
210 if (auto *I
= dyn_cast
<Instruction
>(Ptr
)) {
211 IP
= ++I
->getIterator();
212 assert(IP
!= BB
.end() && "guaranteed by the findPointer");
214 // Pick the type independently.
215 Type
*AccessTy
= RS
.getSelection()->getType();
216 auto *NewLoad
= new LoadInst(AccessTy
, Ptr
, "L", IP
);
218 // Only sample this load if it really matches the descriptor
219 if (Pred
.matches(Srcs
, NewLoad
))
220 RS
.sample(NewLoad
, RS
.totalWeight());
222 NewLoad
->eraseFromParent();
225 Value
*newSrc
= RS
.getSelection();
226 // Generate a stack alloca and store the constant to it if constant is not
227 // allowed, our hope is that later mutations can generate some values and
228 // store to this placeholder.
229 if (!allowConstant
&& isa
<Constant
>(newSrc
)) {
230 Type
*Ty
= newSrc
->getType();
231 Function
*F
= BB
.getParent();
232 AllocaInst
*Alloca
= createStackMemory(F
, Ty
, newSrc
);
233 if (BB
.getTerminator()) {
234 newSrc
= new LoadInst(Ty
, Alloca
, /*ArrLen,*/ "L",
235 BB
.getTerminator()->getIterator());
237 newSrc
= new LoadInst(Ty
, Alloca
, /*ArrLen,*/ "L", &BB
);
243 static bool isCompatibleReplacement(const Instruction
*I
, const Use
&Operand
,
244 const Value
*Replacement
) {
245 unsigned int OperandNo
= Operand
.getOperandNo();
246 if (Operand
->getType() != Replacement
->getType())
248 switch (I
->getOpcode()) {
249 case Instruction::GetElementPtr
:
250 case Instruction::ExtractElement
:
251 case Instruction::ExtractValue
:
252 // TODO: We could potentially validate these, but for now just leave indices
257 case Instruction::InsertValue
:
258 case Instruction::InsertElement
:
259 case Instruction::ShuffleVector
:
263 // For Br/Switch, we only try to modify the 1st Operand (condition).
264 // Modify other operands, like switch case may accidently change case from
265 // ConstantInt to a register, which is illegal.
266 case Instruction::Switch
:
267 case Instruction::Br
:
271 case Instruction::Call
:
272 case Instruction::Invoke
:
273 case Instruction::CallBr
: {
274 const Function
*Callee
= cast
<CallBase
>(I
)->getCalledFunction();
275 // If it's an indirect call, give up.
278 // If callee is not an intrinsic, operand 0 is the function to be called.
279 // Since we cannot assume that the replacement is a function pointer,
281 if (!Callee
->getIntrinsicID() && OperandNo
== 0)
283 return !Callee
->hasParamAttribute(OperandNo
, Attribute::ImmArg
);
291 Instruction
*RandomIRBuilder::connectToSink(BasicBlock
&BB
,
292 ArrayRef
<Instruction
*> Insts
,
294 SmallVector
<uint64_t, 8> SinkTys
;
295 for (uint64_t i
= 0; i
< EndOfValueSink
; i
++)
296 SinkTys
.push_back(i
);
297 std::shuffle(SinkTys
.begin(), SinkTys
.end(), Rand
);
298 auto findSinkAndConnect
=
299 [this, V
](ArrayRef
<Instruction
*> Instructions
) -> Instruction
* {
300 auto RS
= makeSampler
<Use
*>(Rand
);
301 for (auto &I
: Instructions
) {
302 for (Use
&U
: I
->operands())
303 if (isCompatibleReplacement(I
, U
, V
))
307 Use
*Sink
= RS
.getSelection();
308 User
*U
= Sink
->getUser();
309 unsigned OpNo
= Sink
->getOperandNo();
310 U
->setOperand(OpNo
, V
);
311 return cast
<Instruction
>(U
);
315 Instruction
*Sink
= nullptr;
316 for (uint64_t SinkTy
: SinkTys
) {
318 case SinkToInstInCurBlock
:
319 Sink
= findSinkAndConnect(Insts
);
323 case PointersInDominator
: {
324 auto Dominators
= getDominators(&BB
);
325 std::shuffle(Dominators
.begin(), Dominators
.end(), Rand
);
326 for (BasicBlock
*Dom
: Dominators
) {
327 for (Instruction
&I
: *Dom
) {
328 if (isa
<PointerType
>(I
.getType()))
329 return new StoreInst(V
, &I
, Insts
.back()->getIterator());
334 case InstInDominatee
: {
335 auto Dominatees
= getDominatees(&BB
);
336 std::shuffle(Dominatees
.begin(), Dominatees
.end(), Rand
);
337 for (BasicBlock
*Dominee
: Dominatees
) {
338 std::vector
<Instruction
*> Instructions
;
339 for (Instruction
&I
: *Dominee
)
340 Instructions
.push_back(&I
);
341 Sink
= findSinkAndConnect(Instructions
);
349 /// TODO: allocate a new stack memory.
350 return newSink(BB
, Insts
, V
);
351 case SinkToGlobalVariable
: {
352 Module
*M
= BB
.getParent()->getParent();
353 auto [GV
, DidCreate
] =
354 findOrCreateGlobalVariable(M
, {}, fuzzerop::onlyType(V
->getType()));
355 return new StoreInst(V
, GV
, Insts
.back()->getIterator());
359 llvm_unreachable("EndOfValueSink executed");
362 llvm_unreachable("Can't find a sink");
365 Instruction
*RandomIRBuilder::newSink(BasicBlock
&BB
,
366 ArrayRef
<Instruction
*> Insts
, Value
*V
) {
367 Value
*Ptr
= findPointer(BB
, Insts
);
369 if (uniform(Rand
, 0, 1)) {
370 Type
*Ty
= V
->getType();
371 Ptr
= createStackMemory(BB
.getParent(), Ty
, PoisonValue::get(Ty
));
373 Ptr
= PoisonValue::get(PointerType::get(V
->getType(), 0));
377 return new StoreInst(V
, Ptr
, Insts
.back()->getIterator());
380 Value
*RandomIRBuilder::findPointer(BasicBlock
&BB
,
381 ArrayRef
<Instruction
*> Insts
) {
382 auto IsMatchingPtr
= [](Instruction
*Inst
) {
383 // Invoke instructions sometimes produce valid pointers but currently
384 // we can't insert loads or stores from them
385 if (Inst
->isTerminator())
388 return Inst
->getType()->isPointerTy();
390 if (auto RS
= makeSampler(Rand
, make_filter_range(Insts
, IsMatchingPtr
)))
391 return RS
.getSelection();
395 Type
*RandomIRBuilder::randomType() {
396 uint64_t TyIdx
= uniform
<uint64_t>(Rand
, 0, KnownTypes
.size() - 1);
397 return KnownTypes
[TyIdx
];
400 Function
*RandomIRBuilder::createFunctionDeclaration(Module
&M
,
402 Type
*RetType
= randomType();
404 SmallVector
<Type
*, 2> Args
;
405 for (uint64_t i
= 0; i
< ArgNum
; i
++) {
406 Args
.push_back(randomType());
409 Function
*F
= Function::Create(FunctionType::get(RetType
, Args
,
411 GlobalValue::ExternalLinkage
, "f", &M
);
414 Function
*RandomIRBuilder::createFunctionDeclaration(Module
&M
) {
415 return createFunctionDeclaration(
416 M
, uniform
<uint64_t>(Rand
, MinArgNum
, MaxArgNum
));
419 Function
*RandomIRBuilder::createFunctionDefinition(Module
&M
,
421 Function
*F
= this->createFunctionDeclaration(M
, ArgNum
);
423 // TODO: Some arguments and a return value would probably be more
425 LLVMContext
&Context
= M
.getContext();
426 const DataLayout
&DL
= M
.getDataLayout();
427 BasicBlock
*BB
= BasicBlock::Create(Context
, "BB", F
);
428 Type
*RetTy
= F
->getReturnType();
429 if (RetTy
!= Type::getVoidTy(Context
)) {
430 Instruction
*RetAlloca
=
431 new AllocaInst(RetTy
, DL
.getAllocaAddrSpace(), "RP", BB
);
432 Instruction
*RetLoad
= new LoadInst(RetTy
, RetAlloca
, "", BB
);
433 ReturnInst::Create(Context
, RetLoad
, BB
);
435 ReturnInst::Create(Context
, BB
);
440 Function
*RandomIRBuilder::createFunctionDefinition(Module
&M
) {
441 return createFunctionDefinition(
442 M
, uniform
<uint64_t>(Rand
, MinArgNum
, MaxArgNum
));