1 //===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_ANALYSIS_CFLGRAPH_H
16 #define LLVM_LIB_ANALYSIS_CFLGRAPH_H
18 #include "AliasAnalysisSummary.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/MemoryBuiltins.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/IR/Argument.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/InstVisitor.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Operator.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/ErrorHandling.h"
47 /// The Program Expression Graph (PEG) of CFL analysis
48 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
49 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
50 /// the main purpose of this graph is to abstract away unrelated facts and
51 /// translate the rest into a form that can be easily digested by CFL analyses.
52 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
53 /// pointer assignment between InstantiatedValue. Pointer
54 /// references/dereferences are not explicitly stored in the graph: we
55 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
56 /// I+1) and a reference edge to (X, I-1).
59 using Node
= InstantiatedValue
;
66 using EdgeList
= std::vector
<Edge
>;
69 EdgeList Edges
, ReverseEdges
;
74 std::vector
<NodeInfo
> Levels
;
77 bool addNodeToLevel(unsigned Level
) {
78 auto NumLevels
= Levels
.size();
79 if (NumLevels
> Level
)
81 Levels
.resize(Level
+ 1);
85 NodeInfo
&getNodeInfoAtLevel(unsigned Level
) {
86 assert(Level
< Levels
.size());
89 const NodeInfo
&getNodeInfoAtLevel(unsigned Level
) const {
90 assert(Level
< Levels
.size());
94 unsigned getNumLevels() const { return Levels
.size(); }
98 using ValueMap
= DenseMap
<Value
*, ValueInfo
>;
102 NodeInfo
*getNode(Node N
) {
103 auto Itr
= ValueImpls
.find(N
.Val
);
104 if (Itr
== ValueImpls
.end() || Itr
->second
.getNumLevels() <= N
.DerefLevel
)
106 return &Itr
->second
.getNodeInfoAtLevel(N
.DerefLevel
);
110 using const_value_iterator
= ValueMap::const_iterator
;
112 bool addNode(Node N
, AliasAttrs Attr
= AliasAttrs()) {
113 assert(N
.Val
!= nullptr);
114 auto &ValInfo
= ValueImpls
[N
.Val
];
115 auto Changed
= ValInfo
.addNodeToLevel(N
.DerefLevel
);
116 ValInfo
.getNodeInfoAtLevel(N
.DerefLevel
).Attr
|= Attr
;
120 void addAttr(Node N
, AliasAttrs Attr
) {
121 auto *Info
= getNode(N
);
122 assert(Info
!= nullptr);
126 void addEdge(Node From
, Node To
, int64_t Offset
= 0) {
127 auto *FromInfo
= getNode(From
);
128 assert(FromInfo
!= nullptr);
129 auto *ToInfo
= getNode(To
);
130 assert(ToInfo
!= nullptr);
132 FromInfo
->Edges
.push_back(Edge
{To
, Offset
});
133 ToInfo
->ReverseEdges
.push_back(Edge
{From
, Offset
});
136 const NodeInfo
*getNode(Node N
) const {
137 auto Itr
= ValueImpls
.find(N
.Val
);
138 if (Itr
== ValueImpls
.end() || Itr
->second
.getNumLevels() <= N
.DerefLevel
)
140 return &Itr
->second
.getNodeInfoAtLevel(N
.DerefLevel
);
143 AliasAttrs
attrFor(Node N
) const {
144 auto *Info
= getNode(N
);
145 assert(Info
!= nullptr);
149 iterator_range
<const_value_iterator
> value_mappings() const {
150 return make_range
<const_value_iterator
>(ValueImpls
.begin(),
155 /// A builder class used to create CFLGraph instance from a given function
156 /// The CFL-AA that uses this builder must provide its own type as a template
157 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
158 /// needs a way of obtaining the summary of other functions when callinsts are
160 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
161 /// member function that takes a Function& and returns the corresponding summary
162 /// as a const AliasSummary*.
163 template <typename CFLAA
> class CFLGraphBuilder
{
164 // Input of the builder
166 const TargetLibraryInfo
&TLI
;
168 // Output of the builder
170 SmallVector
<Value
*, 4> ReturnedValues
;
173 /// Gets the edges our graph should have, based on an Instruction*
174 class GetEdgesVisitor
: public InstVisitor
<GetEdgesVisitor
, void> {
176 const DataLayout
&DL
;
177 const TargetLibraryInfo
&TLI
;
180 SmallVectorImpl
<Value
*> &ReturnValues
;
182 static bool hasUsefulEdges(ConstantExpr
*CE
) {
183 // ConstantExpr doesn't have terminators, invokes, or fences, so only
184 // needs to check for compares.
185 return CE
->getOpcode() != Instruction::ICmp
&&
186 CE
->getOpcode() != Instruction::FCmp
;
189 // Returns possible functions called by CS into the given SmallVectorImpl.
190 // Returns true if targets found, false otherwise.
191 static bool getPossibleTargets(CallBase
&Call
,
192 SmallVectorImpl
<Function
*> &Output
) {
193 if (auto *Fn
= Call
.getCalledFunction()) {
194 Output
.push_back(Fn
);
198 // TODO: If the call is indirect, we might be able to enumerate all
199 // potential targets of the call and return them, rather than just
204 void addNode(Value
*Val
, AliasAttrs Attr
= AliasAttrs()) {
205 assert(Val
!= nullptr && Val
->getType()->isPointerTy());
206 if (auto GVal
= dyn_cast
<GlobalValue
>(Val
)) {
207 if (Graph
.addNode(InstantiatedValue
{GVal
, 0},
208 getGlobalOrArgAttrFromValue(*GVal
)))
209 Graph
.addNode(InstantiatedValue
{GVal
, 1}, getAttrUnknown());
210 } else if (auto CExpr
= dyn_cast
<ConstantExpr
>(Val
)) {
211 if (hasUsefulEdges(CExpr
)) {
212 if (Graph
.addNode(InstantiatedValue
{CExpr
, 0}))
213 visitConstantExpr(CExpr
);
216 Graph
.addNode(InstantiatedValue
{Val
, 0}, Attr
);
219 void addAssignEdge(Value
*From
, Value
*To
, int64_t Offset
= 0) {
220 assert(From
!= nullptr && To
!= nullptr);
221 if (!From
->getType()->isPointerTy() || !To
->getType()->isPointerTy())
226 Graph
.addEdge(InstantiatedValue
{From
, 0}, InstantiatedValue
{To
, 0},
231 void addDerefEdge(Value
*From
, Value
*To
, bool IsRead
) {
232 assert(From
!= nullptr && To
!= nullptr);
233 // FIXME: This is subtly broken, due to how we model some instructions
234 // (e.g. extractvalue, extractelement) as loads. Since those take
235 // non-pointer operands, we'll entirely skip adding edges for those.
237 // addAssignEdge seems to have a similar issue with insertvalue, etc.
238 if (!From
->getType()->isPointerTy() || !To
->getType()->isPointerTy())
243 Graph
.addNode(InstantiatedValue
{From
, 1});
244 Graph
.addEdge(InstantiatedValue
{From
, 1}, InstantiatedValue
{To
, 0});
246 Graph
.addNode(InstantiatedValue
{To
, 1});
247 Graph
.addEdge(InstantiatedValue
{From
, 0}, InstantiatedValue
{To
, 1});
251 void addLoadEdge(Value
*From
, Value
*To
) { addDerefEdge(From
, To
, true); }
252 void addStoreEdge(Value
*From
, Value
*To
) { addDerefEdge(From
, To
, false); }
255 GetEdgesVisitor(CFLGraphBuilder
&Builder
, const DataLayout
&DL
)
256 : AA(Builder
.Analysis
), DL(DL
), TLI(Builder
.TLI
), Graph(Builder
.Graph
),
257 ReturnValues(Builder
.ReturnedValues
) {}
259 void visitInstruction(Instruction
&) {
260 llvm_unreachable("Unsupported instruction encountered");
263 void visitReturnInst(ReturnInst
&Inst
) {
264 if (auto RetVal
= Inst
.getReturnValue()) {
265 if (RetVal
->getType()->isPointerTy()) {
267 ReturnValues
.push_back(RetVal
);
272 void visitPtrToIntInst(PtrToIntInst
&Inst
) {
273 auto *Ptr
= Inst
.getOperand(0);
274 addNode(Ptr
, getAttrEscaped());
277 void visitIntToPtrInst(IntToPtrInst
&Inst
) {
279 addNode(Ptr
, getAttrUnknown());
282 void visitCastInst(CastInst
&Inst
) {
283 auto *Src
= Inst
.getOperand(0);
284 addAssignEdge(Src
, &Inst
);
287 void visitBinaryOperator(BinaryOperator
&Inst
) {
288 auto *Op1
= Inst
.getOperand(0);
289 auto *Op2
= Inst
.getOperand(1);
290 addAssignEdge(Op1
, &Inst
);
291 addAssignEdge(Op2
, &Inst
);
294 void visitUnaryOperator(UnaryOperator
&Inst
) {
295 auto *Src
= Inst
.getOperand(0);
296 addAssignEdge(Src
, &Inst
);
299 void visitAtomicCmpXchgInst(AtomicCmpXchgInst
&Inst
) {
300 auto *Ptr
= Inst
.getPointerOperand();
301 auto *Val
= Inst
.getNewValOperand();
302 addStoreEdge(Val
, Ptr
);
305 void visitAtomicRMWInst(AtomicRMWInst
&Inst
) {
306 auto *Ptr
= Inst
.getPointerOperand();
307 auto *Val
= Inst
.getValOperand();
308 addStoreEdge(Val
, Ptr
);
311 void visitPHINode(PHINode
&Inst
) {
312 for (Value
*Val
: Inst
.incoming_values())
313 addAssignEdge(Val
, &Inst
);
316 void visitGEP(GEPOperator
&GEPOp
) {
317 uint64_t Offset
= UnknownOffset
;
318 APInt
APOffset(DL
.getPointerSizeInBits(GEPOp
.getPointerAddressSpace()),
320 if (GEPOp
.accumulateConstantOffset(DL
, APOffset
))
321 Offset
= APOffset
.getSExtValue();
323 auto *Op
= GEPOp
.getPointerOperand();
324 addAssignEdge(Op
, &GEPOp
, Offset
);
327 void visitGetElementPtrInst(GetElementPtrInst
&Inst
) {
328 auto *GEPOp
= cast
<GEPOperator
>(&Inst
);
332 void visitSelectInst(SelectInst
&Inst
) {
333 // Condition is not processed here (The actual statement producing
334 // the condition result is processed elsewhere). For select, the
335 // condition is evaluated, but not loaded, stored, or assigned
336 // simply as a result of being the condition of a select.
338 auto *TrueVal
= Inst
.getTrueValue();
339 auto *FalseVal
= Inst
.getFalseValue();
340 addAssignEdge(TrueVal
, &Inst
);
341 addAssignEdge(FalseVal
, &Inst
);
344 void visitAllocaInst(AllocaInst
&Inst
) { addNode(&Inst
); }
346 void visitLoadInst(LoadInst
&Inst
) {
347 auto *Ptr
= Inst
.getPointerOperand();
349 addLoadEdge(Ptr
, Val
);
352 void visitStoreInst(StoreInst
&Inst
) {
353 auto *Ptr
= Inst
.getPointerOperand();
354 auto *Val
= Inst
.getValueOperand();
355 addStoreEdge(Val
, Ptr
);
358 void visitVAArgInst(VAArgInst
&Inst
) {
359 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
362 // 1. Loads a value from *((T*)*Ptr).
363 // 2. Increments (stores to) *Ptr by some target-specific amount.
364 // For now, we'll handle this like a landingpad instruction (by placing
366 // result in its own group, and having that group alias externals).
367 if (Inst
.getType()->isPointerTy())
368 addNode(&Inst
, getAttrUnknown());
371 static bool isFunctionExternal(Function
*Fn
) {
372 return !Fn
->hasExactDefinition();
375 bool tryInterproceduralAnalysis(CallBase
&Call
,
376 const SmallVectorImpl
<Function
*> &Fns
) {
377 assert(Fns
.size() > 0);
379 if (Call
.arg_size() > MaxSupportedArgsInSummary
)
382 // Exit early if we'll fail anyway
383 for (auto *Fn
: Fns
) {
384 if (isFunctionExternal(Fn
) || Fn
->isVarArg())
386 // Fail if the caller does not provide enough arguments
387 assert(Fn
->arg_size() <= Call
.arg_size());
388 if (!AA
.getAliasSummary(*Fn
))
392 for (auto *Fn
: Fns
) {
393 auto Summary
= AA
.getAliasSummary(*Fn
);
394 assert(Summary
!= nullptr);
396 auto &RetParamRelations
= Summary
->RetParamRelations
;
397 for (auto &Relation
: RetParamRelations
) {
398 auto IRelation
= instantiateExternalRelation(Relation
, Call
);
399 if (IRelation
.hasValue()) {
400 Graph
.addNode(IRelation
->From
);
401 Graph
.addNode(IRelation
->To
);
402 Graph
.addEdge(IRelation
->From
, IRelation
->To
);
406 auto &RetParamAttributes
= Summary
->RetParamAttributes
;
407 for (auto &Attribute
: RetParamAttributes
) {
408 auto IAttr
= instantiateExternalAttribute(Attribute
, Call
);
409 if (IAttr
.hasValue())
410 Graph
.addNode(IAttr
->IValue
, IAttr
->Attr
);
417 void visitCallBase(CallBase
&Call
) {
418 // Make sure all arguments and return value are added to the graph first
419 for (Value
*V
: Call
.args())
420 if (V
->getType()->isPointerTy())
422 if (Call
.getType()->isPointerTy())
425 // Check if Inst is a call to a library function that
426 // allocates/deallocates on the heap. Those kinds of functions do not
427 // introduce any aliases.
428 // TODO: address other common library functions such as realloc(),
430 if (isMallocOrCallocLikeFn(&Call
, &TLI
) || isFreeCall(&Call
, &TLI
))
433 // TODO: Add support for noalias args/all the other fun function
434 // attributes that we can tack on.
435 SmallVector
<Function
*, 4> Targets
;
436 if (getPossibleTargets(Call
, Targets
))
437 if (tryInterproceduralAnalysis(Call
, Targets
))
440 // Because the function is opaque, we need to note that anything
441 // could have happened to the arguments (unless the function is marked
442 // readonly or readnone), and that the result could alias just about
443 // anything, too (unless the result is marked noalias).
444 if (!Call
.onlyReadsMemory())
445 for (Value
*V
: Call
.args()) {
446 if (V
->getType()->isPointerTy()) {
447 // The argument itself escapes.
448 Graph
.addAttr(InstantiatedValue
{V
, 0}, getAttrEscaped());
449 // The fate of argument memory is unknown. Note that since
450 // AliasAttrs is transitive with respect to dereference, we only
451 // need to specify it for the first-level memory.
452 Graph
.addNode(InstantiatedValue
{V
, 1}, getAttrUnknown());
456 if (Call
.getType()->isPointerTy()) {
457 auto *Fn
= Call
.getCalledFunction();
458 if (Fn
== nullptr || !Fn
->returnDoesNotAlias())
459 // No need to call addNode() since we've added Inst at the
460 // beginning of this function and we know it is not a global.
461 Graph
.addAttr(InstantiatedValue
{&Call
, 0}, getAttrUnknown());
465 /// Because vectors/aggregates are immutable and unaddressable, there's
466 /// nothing we can do to coax a value out of them, other than calling
467 /// Extract{Element,Value}. We can effectively treat them as pointers to
468 /// arbitrary memory locations we can store in and load from.
469 void visitExtractElementInst(ExtractElementInst
&Inst
) {
470 auto *Ptr
= Inst
.getVectorOperand();
472 addLoadEdge(Ptr
, Val
);
475 void visitInsertElementInst(InsertElementInst
&Inst
) {
476 auto *Vec
= Inst
.getOperand(0);
477 auto *Val
= Inst
.getOperand(1);
478 addAssignEdge(Vec
, &Inst
);
479 addStoreEdge(Val
, &Inst
);
482 void visitLandingPadInst(LandingPadInst
&Inst
) {
483 // Exceptions come from "nowhere", from our analysis' perspective.
484 // So we place the instruction its own group, noting that said group may
486 if (Inst
.getType()->isPointerTy())
487 addNode(&Inst
, getAttrUnknown());
490 void visitInsertValueInst(InsertValueInst
&Inst
) {
491 auto *Agg
= Inst
.getOperand(0);
492 auto *Val
= Inst
.getOperand(1);
493 addAssignEdge(Agg
, &Inst
);
494 addStoreEdge(Val
, &Inst
);
497 void visitExtractValueInst(ExtractValueInst
&Inst
) {
498 auto *Ptr
= Inst
.getAggregateOperand();
499 addLoadEdge(Ptr
, &Inst
);
502 void visitShuffleVectorInst(ShuffleVectorInst
&Inst
) {
503 auto *From1
= Inst
.getOperand(0);
504 auto *From2
= Inst
.getOperand(1);
505 addAssignEdge(From1
, &Inst
);
506 addAssignEdge(From2
, &Inst
);
509 void visitConstantExpr(ConstantExpr
*CE
) {
510 switch (CE
->getOpcode()) {
511 case Instruction::GetElementPtr
: {
512 auto GEPOp
= cast
<GEPOperator
>(CE
);
517 case Instruction::PtrToInt
: {
518 addNode(CE
->getOperand(0), getAttrEscaped());
522 case Instruction::IntToPtr
: {
523 addNode(CE
, getAttrUnknown());
527 case Instruction::BitCast
:
528 case Instruction::AddrSpaceCast
:
529 case Instruction::Trunc
:
530 case Instruction::ZExt
:
531 case Instruction::SExt
:
532 case Instruction::FPExt
:
533 case Instruction::FPTrunc
:
534 case Instruction::UIToFP
:
535 case Instruction::SIToFP
:
536 case Instruction::FPToUI
:
537 case Instruction::FPToSI
: {
538 addAssignEdge(CE
->getOperand(0), CE
);
542 case Instruction::Select
: {
543 addAssignEdge(CE
->getOperand(1), CE
);
544 addAssignEdge(CE
->getOperand(2), CE
);
548 case Instruction::InsertElement
:
549 case Instruction::InsertValue
: {
550 addAssignEdge(CE
->getOperand(0), CE
);
551 addStoreEdge(CE
->getOperand(1), CE
);
555 case Instruction::ExtractElement
:
556 case Instruction::ExtractValue
: {
557 addLoadEdge(CE
->getOperand(0), CE
);
561 case Instruction::Add
:
562 case Instruction::FAdd
:
563 case Instruction::Sub
:
564 case Instruction::FSub
:
565 case Instruction::Mul
:
566 case Instruction::FMul
:
567 case Instruction::UDiv
:
568 case Instruction::SDiv
:
569 case Instruction::FDiv
:
570 case Instruction::URem
:
571 case Instruction::SRem
:
572 case Instruction::FRem
:
573 case Instruction::And
:
574 case Instruction::Or
:
575 case Instruction::Xor
:
576 case Instruction::Shl
:
577 case Instruction::LShr
:
578 case Instruction::AShr
:
579 case Instruction::ICmp
:
580 case Instruction::FCmp
:
581 case Instruction::ShuffleVector
: {
582 addAssignEdge(CE
->getOperand(0), CE
);
583 addAssignEdge(CE
->getOperand(1), CE
);
587 case Instruction::FNeg
: {
588 addAssignEdge(CE
->getOperand(0), CE
);
593 llvm_unreachable("Unknown instruction type encountered!");
600 // Determines whether or not we an instruction is useless to us (e.g.
602 static bool hasUsefulEdges(Instruction
*Inst
) {
603 bool IsNonInvokeRetTerminator
= Inst
->isTerminator() &&
604 !isa
<InvokeInst
>(Inst
) &&
605 !isa
<ReturnInst
>(Inst
);
606 return !isa
<CmpInst
>(Inst
) && !isa
<FenceInst
>(Inst
) &&
607 !IsNonInvokeRetTerminator
;
610 void addArgumentToGraph(Argument
&Arg
) {
611 if (Arg
.getType()->isPointerTy()) {
612 Graph
.addNode(InstantiatedValue
{&Arg
, 0},
613 getGlobalOrArgAttrFromValue(Arg
));
614 // Pointees of a formal parameter is known to the caller
615 Graph
.addNode(InstantiatedValue
{&Arg
, 1}, getAttrCaller());
619 // Given an Instruction, this will add it to the graph, along with any
620 // Instructions that are potentially only available from said Instruction
621 // For example, given the following line:
622 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
623 // addInstructionToGraph would add both the `load` and `getelementptr`
624 // instructions to the graph appropriately.
625 void addInstructionToGraph(GetEdgesVisitor
&Visitor
, Instruction
&Inst
) {
626 if (!hasUsefulEdges(&Inst
))
632 // Builds the graph needed for constructing the StratifiedSets for the given
634 void buildGraphFrom(Function
&Fn
) {
635 GetEdgesVisitor
Visitor(*this, Fn
.getParent()->getDataLayout());
637 for (auto &Bb
: Fn
.getBasicBlockList())
638 for (auto &Inst
: Bb
.getInstList())
639 addInstructionToGraph(Visitor
, Inst
);
641 for (auto &Arg
: Fn
.args())
642 addArgumentToGraph(Arg
);
646 CFLGraphBuilder(CFLAA
&Analysis
, const TargetLibraryInfo
&TLI
, Function
&Fn
)
647 : Analysis(Analysis
), TLI(TLI
) {
651 const CFLGraph
&getCFLGraph() const { return Graph
; }
652 const SmallVector
<Value
*, 4> &getReturnValues() const {
653 return ReturnedValues
;
657 } // end namespace cflaa
658 } // end namespace llvm
660 #endif // LLVM_LIB_ANALYSIS_CFLGRAPH_H