1 //===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_LIB_ANALYSIS_CFLGRAPH_H
17 #define LLVM_LIB_ANALYSIS_CFLGRAPH_H
19 #include "AliasAnalysisSummary.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/iterator_range.h"
24 #include "llvm/Analysis/MemoryBuiltins.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/IR/Argument.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/InstVisitor.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/ErrorHandling.h"
49 /// The Program Expression Graph (PEG) of CFL analysis
50 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
51 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
52 /// the main purpose of this graph is to abstract away unrelated facts and
53 /// translate the rest into a form that can be easily digested by CFL analyses.
54 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
55 /// pointer assignment between InstantiatedValue. Pointer
56 /// references/dereferences are not explicitly stored in the graph: we
57 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
58 /// I+1) and a reference edge to (X, I-1).
61 using Node
= InstantiatedValue
;
68 using EdgeList
= std::vector
<Edge
>;
71 EdgeList Edges
, ReverseEdges
;
76 std::vector
<NodeInfo
> Levels
;
79 bool addNodeToLevel(unsigned Level
) {
80 auto NumLevels
= Levels
.size();
81 if (NumLevels
> Level
)
83 Levels
.resize(Level
+ 1);
87 NodeInfo
&getNodeInfoAtLevel(unsigned Level
) {
88 assert(Level
< Levels
.size());
91 const NodeInfo
&getNodeInfoAtLevel(unsigned Level
) const {
92 assert(Level
< Levels
.size());
96 unsigned getNumLevels() const { return Levels
.size(); }
100 using ValueMap
= DenseMap
<Value
*, ValueInfo
>;
104 NodeInfo
*getNode(Node N
) {
105 auto Itr
= ValueImpls
.find(N
.Val
);
106 if (Itr
== ValueImpls
.end() || Itr
->second
.getNumLevels() <= N
.DerefLevel
)
108 return &Itr
->second
.getNodeInfoAtLevel(N
.DerefLevel
);
112 using const_value_iterator
= ValueMap::const_iterator
;
114 bool addNode(Node N
, AliasAttrs Attr
= AliasAttrs()) {
115 assert(N
.Val
!= nullptr);
116 auto &ValInfo
= ValueImpls
[N
.Val
];
117 auto Changed
= ValInfo
.addNodeToLevel(N
.DerefLevel
);
118 ValInfo
.getNodeInfoAtLevel(N
.DerefLevel
).Attr
|= Attr
;
122 void addAttr(Node N
, AliasAttrs Attr
) {
123 auto *Info
= getNode(N
);
124 assert(Info
!= nullptr);
128 void addEdge(Node From
, Node To
, int64_t Offset
= 0) {
129 auto *FromInfo
= getNode(From
);
130 assert(FromInfo
!= nullptr);
131 auto *ToInfo
= getNode(To
);
132 assert(ToInfo
!= nullptr);
134 FromInfo
->Edges
.push_back(Edge
{To
, Offset
});
135 ToInfo
->ReverseEdges
.push_back(Edge
{From
, Offset
});
138 const NodeInfo
*getNode(Node N
) const {
139 auto Itr
= ValueImpls
.find(N
.Val
);
140 if (Itr
== ValueImpls
.end() || Itr
->second
.getNumLevels() <= N
.DerefLevel
)
142 return &Itr
->second
.getNodeInfoAtLevel(N
.DerefLevel
);
145 AliasAttrs
attrFor(Node N
) const {
146 auto *Info
= getNode(N
);
147 assert(Info
!= nullptr);
151 iterator_range
<const_value_iterator
> value_mappings() const {
152 return make_range
<const_value_iterator
>(ValueImpls
.begin(),
157 ///A builder class used to create CFLGraph instance from a given function
158 /// The CFL-AA that uses this builder must provide its own type as a template
159 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
160 /// needs a way of obtaining the summary of other functions when callinsts are
162 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
163 /// member function that takes a Function& and returns the corresponding summary
164 /// as a const AliasSummary*.
165 template <typename CFLAA
> class CFLGraphBuilder
{
166 // Input of the builder
168 const TargetLibraryInfo
&TLI
;
170 // Output of the builder
172 SmallVector
<Value
*, 4> ReturnedValues
;
175 /// Gets the edges our graph should have, based on an Instruction*
176 class GetEdgesVisitor
: public InstVisitor
<GetEdgesVisitor
, void> {
178 const DataLayout
&DL
;
179 const TargetLibraryInfo
&TLI
;
182 SmallVectorImpl
<Value
*> &ReturnValues
;
184 static bool hasUsefulEdges(ConstantExpr
*CE
) {
185 // ConstantExpr doesn't have terminators, invokes, or fences, so only
187 // to check for compares.
188 return CE
->getOpcode() != Instruction::ICmp
&&
189 CE
->getOpcode() != Instruction::FCmp
;
192 // Returns possible functions called by CS into the given SmallVectorImpl.
193 // Returns true if targets found, false otherwise.
194 static bool getPossibleTargets(CallSite CS
,
195 SmallVectorImpl
<Function
*> &Output
) {
196 if (auto *Fn
= CS
.getCalledFunction()) {
197 Output
.push_back(Fn
);
201 // TODO: If the call is indirect, we might be able to enumerate all
203 // targets of the call and return them, rather than just failing.
207 void addNode(Value
*Val
, AliasAttrs Attr
= AliasAttrs()) {
208 assert(Val
!= nullptr && Val
->getType()->isPointerTy());
209 if (auto GVal
= dyn_cast
<GlobalValue
>(Val
)) {
210 if (Graph
.addNode(InstantiatedValue
{GVal
, 0},
211 getGlobalOrArgAttrFromValue(*GVal
)))
212 Graph
.addNode(InstantiatedValue
{GVal
, 1}, getAttrUnknown());
213 } else if (auto CExpr
= dyn_cast
<ConstantExpr
>(Val
)) {
214 if (hasUsefulEdges(CExpr
)) {
215 if (Graph
.addNode(InstantiatedValue
{CExpr
, 0}))
216 visitConstantExpr(CExpr
);
219 Graph
.addNode(InstantiatedValue
{Val
, 0}, Attr
);
222 void addAssignEdge(Value
*From
, Value
*To
, int64_t Offset
= 0) {
223 assert(From
!= nullptr && To
!= nullptr);
224 if (!From
->getType()->isPointerTy() || !To
->getType()->isPointerTy())
229 Graph
.addEdge(InstantiatedValue
{From
, 0}, InstantiatedValue
{To
, 0},
234 void addDerefEdge(Value
*From
, Value
*To
, bool IsRead
) {
235 assert(From
!= nullptr && To
!= nullptr);
236 // FIXME: This is subtly broken, due to how we model some instructions
237 // (e.g. extractvalue, extractelement) as loads. Since those take
238 // non-pointer operands, we'll entirely skip adding edges for those.
240 // addAssignEdge seems to have a similar issue with insertvalue, etc.
241 if (!From
->getType()->isPointerTy() || !To
->getType()->isPointerTy())
246 Graph
.addNode(InstantiatedValue
{From
, 1});
247 Graph
.addEdge(InstantiatedValue
{From
, 1}, InstantiatedValue
{To
, 0});
249 Graph
.addNode(InstantiatedValue
{To
, 1});
250 Graph
.addEdge(InstantiatedValue
{From
, 0}, InstantiatedValue
{To
, 1});
254 void addLoadEdge(Value
*From
, Value
*To
) { addDerefEdge(From
, To
, true); }
255 void addStoreEdge(Value
*From
, Value
*To
) { addDerefEdge(From
, To
, false); }
258 GetEdgesVisitor(CFLGraphBuilder
&Builder
, const DataLayout
&DL
)
259 : AA(Builder
.Analysis
), DL(DL
), TLI(Builder
.TLI
), Graph(Builder
.Graph
),
260 ReturnValues(Builder
.ReturnedValues
) {}
262 void visitInstruction(Instruction
&) {
263 llvm_unreachable("Unsupported instruction encountered");
266 void visitReturnInst(ReturnInst
&Inst
) {
267 if (auto RetVal
= Inst
.getReturnValue()) {
268 if (RetVal
->getType()->isPointerTy()) {
270 ReturnValues
.push_back(RetVal
);
275 void visitPtrToIntInst(PtrToIntInst
&Inst
) {
276 auto *Ptr
= Inst
.getOperand(0);
277 addNode(Ptr
, getAttrEscaped());
280 void visitIntToPtrInst(IntToPtrInst
&Inst
) {
282 addNode(Ptr
, getAttrUnknown());
285 void visitCastInst(CastInst
&Inst
) {
286 auto *Src
= Inst
.getOperand(0);
287 addAssignEdge(Src
, &Inst
);
290 void visitBinaryOperator(BinaryOperator
&Inst
) {
291 auto *Op1
= Inst
.getOperand(0);
292 auto *Op2
= Inst
.getOperand(1);
293 addAssignEdge(Op1
, &Inst
);
294 addAssignEdge(Op2
, &Inst
);
297 void visitAtomicCmpXchgInst(AtomicCmpXchgInst
&Inst
) {
298 auto *Ptr
= Inst
.getPointerOperand();
299 auto *Val
= Inst
.getNewValOperand();
300 addStoreEdge(Val
, Ptr
);
303 void visitAtomicRMWInst(AtomicRMWInst
&Inst
) {
304 auto *Ptr
= Inst
.getPointerOperand();
305 auto *Val
= Inst
.getValOperand();
306 addStoreEdge(Val
, Ptr
);
309 void visitPHINode(PHINode
&Inst
) {
310 for (Value
*Val
: Inst
.incoming_values())
311 addAssignEdge(Val
, &Inst
);
314 void visitGEP(GEPOperator
&GEPOp
) {
315 uint64_t Offset
= UnknownOffset
;
316 APInt
APOffset(DL
.getPointerSizeInBits(GEPOp
.getPointerAddressSpace()),
318 if (GEPOp
.accumulateConstantOffset(DL
, APOffset
))
319 Offset
= APOffset
.getSExtValue();
321 auto *Op
= GEPOp
.getPointerOperand();
322 addAssignEdge(Op
, &GEPOp
, Offset
);
325 void visitGetElementPtrInst(GetElementPtrInst
&Inst
) {
326 auto *GEPOp
= cast
<GEPOperator
>(&Inst
);
330 void visitSelectInst(SelectInst
&Inst
) {
331 // Condition is not processed here (The actual statement producing
332 // the condition result is processed elsewhere). For select, the
333 // condition is evaluated, but not loaded, stored, or assigned
334 // simply as a result of being the condition of a select.
336 auto *TrueVal
= Inst
.getTrueValue();
337 auto *FalseVal
= Inst
.getFalseValue();
338 addAssignEdge(TrueVal
, &Inst
);
339 addAssignEdge(FalseVal
, &Inst
);
342 void visitAllocaInst(AllocaInst
&Inst
) { addNode(&Inst
); }
344 void visitLoadInst(LoadInst
&Inst
) {
345 auto *Ptr
= Inst
.getPointerOperand();
347 addLoadEdge(Ptr
, Val
);
350 void visitStoreInst(StoreInst
&Inst
) {
351 auto *Ptr
= Inst
.getPointerOperand();
352 auto *Val
= Inst
.getValueOperand();
353 addStoreEdge(Val
, Ptr
);
356 void visitVAArgInst(VAArgInst
&Inst
) {
357 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
360 // 1. Loads a value from *((T*)*Ptr).
361 // 2. Increments (stores to) *Ptr by some target-specific amount.
362 // For now, we'll handle this like a landingpad instruction (by placing
364 // result in its own group, and having that group alias externals).
365 if (Inst
.getType()->isPointerTy())
366 addNode(&Inst
, getAttrUnknown());
369 static bool isFunctionExternal(Function
*Fn
) {
370 return !Fn
->hasExactDefinition();
373 bool tryInterproceduralAnalysis(CallSite CS
,
374 const SmallVectorImpl
<Function
*> &Fns
) {
375 assert(Fns
.size() > 0);
377 if (CS
.arg_size() > MaxSupportedArgsInSummary
)
380 // Exit early if we'll fail anyway
381 for (auto *Fn
: Fns
) {
382 if (isFunctionExternal(Fn
) || Fn
->isVarArg())
384 // Fail if the caller does not provide enough arguments
385 assert(Fn
->arg_size() <= CS
.arg_size());
386 if (!AA
.getAliasSummary(*Fn
))
390 for (auto *Fn
: Fns
) {
391 auto Summary
= AA
.getAliasSummary(*Fn
);
392 assert(Summary
!= nullptr);
394 auto &RetParamRelations
= Summary
->RetParamRelations
;
395 for (auto &Relation
: RetParamRelations
) {
396 auto IRelation
= instantiateExternalRelation(Relation
, CS
);
397 if (IRelation
.hasValue()) {
398 Graph
.addNode(IRelation
->From
);
399 Graph
.addNode(IRelation
->To
);
400 Graph
.addEdge(IRelation
->From
, IRelation
->To
);
404 auto &RetParamAttributes
= Summary
->RetParamAttributes
;
405 for (auto &Attribute
: RetParamAttributes
) {
406 auto IAttr
= instantiateExternalAttribute(Attribute
, CS
);
407 if (IAttr
.hasValue())
408 Graph
.addNode(IAttr
->IValue
, IAttr
->Attr
);
415 void visitCallSite(CallSite CS
) {
416 auto Inst
= CS
.getInstruction();
418 // Make sure all arguments and return value are added to the graph first
419 for (Value
*V
: CS
.args())
420 if (V
->getType()->isPointerTy())
422 if (Inst
->getType()->isPointerTy())
425 // Check if Inst is a call to a library function that
426 // allocates/deallocates on the heap. Those kinds of functions do not
427 // introduce any aliases.
428 // TODO: address other common library functions such as realloc(),
430 if (isMallocOrCallocLikeFn(Inst
, &TLI
) || isFreeCall(Inst
, &TLI
))
433 // TODO: Add support for noalias args/all the other fun function
434 // attributes that we can tack on.
435 SmallVector
<Function
*, 4> Targets
;
436 if (getPossibleTargets(CS
, Targets
))
437 if (tryInterproceduralAnalysis(CS
, Targets
))
440 // Because the function is opaque, we need to note that anything
441 // could have happened to the arguments (unless the function is marked
442 // readonly or readnone), and that the result could alias just about
443 // anything, too (unless the result is marked noalias).
444 if (!CS
.onlyReadsMemory())
445 for (Value
*V
: CS
.args()) {
446 if (V
->getType()->isPointerTy()) {
447 // The argument itself escapes.
448 Graph
.addAttr(InstantiatedValue
{V
, 0}, getAttrEscaped());
449 // The fate of argument memory is unknown. Note that since
450 // AliasAttrs is transitive with respect to dereference, we only
451 // need to specify it for the first-level memory.
452 Graph
.addNode(InstantiatedValue
{V
, 1}, getAttrUnknown());
456 if (Inst
->getType()->isPointerTy()) {
457 auto *Fn
= CS
.getCalledFunction();
458 if (Fn
== nullptr || !Fn
->returnDoesNotAlias())
459 // No need to call addNode() since we've added Inst at the
460 // beginning of this function and we know it is not a global.
461 Graph
.addAttr(InstantiatedValue
{Inst
, 0}, getAttrUnknown());
465 /// Because vectors/aggregates are immutable and unaddressable, there's
466 /// nothing we can do to coax a value out of them, other than calling
467 /// Extract{Element,Value}. We can effectively treat them as pointers to
468 /// arbitrary memory locations we can store in and load from.
469 void visitExtractElementInst(ExtractElementInst
&Inst
) {
470 auto *Ptr
= Inst
.getVectorOperand();
472 addLoadEdge(Ptr
, Val
);
475 void visitInsertElementInst(InsertElementInst
&Inst
) {
476 auto *Vec
= Inst
.getOperand(0);
477 auto *Val
= Inst
.getOperand(1);
478 addAssignEdge(Vec
, &Inst
);
479 addStoreEdge(Val
, &Inst
);
482 void visitLandingPadInst(LandingPadInst
&Inst
) {
483 // Exceptions come from "nowhere", from our analysis' perspective.
484 // So we place the instruction its own group, noting that said group may
486 if (Inst
.getType()->isPointerTy())
487 addNode(&Inst
, getAttrUnknown());
490 void visitInsertValueInst(InsertValueInst
&Inst
) {
491 auto *Agg
= Inst
.getOperand(0);
492 auto *Val
= Inst
.getOperand(1);
493 addAssignEdge(Agg
, &Inst
);
494 addStoreEdge(Val
, &Inst
);
497 void visitExtractValueInst(ExtractValueInst
&Inst
) {
498 auto *Ptr
= Inst
.getAggregateOperand();
499 addLoadEdge(Ptr
, &Inst
);
502 void visitShuffleVectorInst(ShuffleVectorInst
&Inst
) {
503 auto *From1
= Inst
.getOperand(0);
504 auto *From2
= Inst
.getOperand(1);
505 addAssignEdge(From1
, &Inst
);
506 addAssignEdge(From2
, &Inst
);
509 void visitConstantExpr(ConstantExpr
*CE
) {
510 switch (CE
->getOpcode()) {
511 case Instruction::GetElementPtr
: {
512 auto GEPOp
= cast
<GEPOperator
>(CE
);
517 case Instruction::PtrToInt
: {
518 addNode(CE
->getOperand(0), getAttrEscaped());
522 case Instruction::IntToPtr
: {
523 addNode(CE
, getAttrUnknown());
527 case Instruction::BitCast
:
528 case Instruction::AddrSpaceCast
:
529 case Instruction::Trunc
:
530 case Instruction::ZExt
:
531 case Instruction::SExt
:
532 case Instruction::FPExt
:
533 case Instruction::FPTrunc
:
534 case Instruction::UIToFP
:
535 case Instruction::SIToFP
:
536 case Instruction::FPToUI
:
537 case Instruction::FPToSI
: {
538 addAssignEdge(CE
->getOperand(0), CE
);
542 case Instruction::Select
: {
543 addAssignEdge(CE
->getOperand(1), CE
);
544 addAssignEdge(CE
->getOperand(2), CE
);
548 case Instruction::InsertElement
:
549 case Instruction::InsertValue
: {
550 addAssignEdge(CE
->getOperand(0), CE
);
551 addStoreEdge(CE
->getOperand(1), CE
);
555 case Instruction::ExtractElement
:
556 case Instruction::ExtractValue
: {
557 addLoadEdge(CE
->getOperand(0), CE
);
561 case Instruction::Add
:
562 case Instruction::Sub
:
563 case Instruction::FSub
:
564 case Instruction::Mul
:
565 case Instruction::FMul
:
566 case Instruction::UDiv
:
567 case Instruction::SDiv
:
568 case Instruction::FDiv
:
569 case Instruction::URem
:
570 case Instruction::SRem
:
571 case Instruction::FRem
:
572 case Instruction::And
:
573 case Instruction::Or
:
574 case Instruction::Xor
:
575 case Instruction::Shl
:
576 case Instruction::LShr
:
577 case Instruction::AShr
:
578 case Instruction::ICmp
:
579 case Instruction::FCmp
:
580 case Instruction::ShuffleVector
: {
581 addAssignEdge(CE
->getOperand(0), CE
);
582 addAssignEdge(CE
->getOperand(1), CE
);
587 llvm_unreachable("Unknown instruction type encountered!");
594 // Determines whether or not we an instruction is useless to us (e.g.
596 static bool hasUsefulEdges(Instruction
*Inst
) {
597 bool IsNonInvokeRetTerminator
= Inst
->isTerminator() &&
598 !isa
<InvokeInst
>(Inst
) &&
599 !isa
<ReturnInst
>(Inst
);
600 return !isa
<CmpInst
>(Inst
) && !isa
<FenceInst
>(Inst
) &&
601 !IsNonInvokeRetTerminator
;
604 void addArgumentToGraph(Argument
&Arg
) {
605 if (Arg
.getType()->isPointerTy()) {
606 Graph
.addNode(InstantiatedValue
{&Arg
, 0},
607 getGlobalOrArgAttrFromValue(Arg
));
608 // Pointees of a formal parameter is known to the caller
609 Graph
.addNode(InstantiatedValue
{&Arg
, 1}, getAttrCaller());
613 // Given an Instruction, this will add it to the graph, along with any
614 // Instructions that are potentially only available from said Instruction
615 // For example, given the following line:
616 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
617 // addInstructionToGraph would add both the `load` and `getelementptr`
618 // instructions to the graph appropriately.
619 void addInstructionToGraph(GetEdgesVisitor
&Visitor
, Instruction
&Inst
) {
620 if (!hasUsefulEdges(&Inst
))
626 // Builds the graph needed for constructing the StratifiedSets for the given
628 void buildGraphFrom(Function
&Fn
) {
629 GetEdgesVisitor
Visitor(*this, Fn
.getParent()->getDataLayout());
631 for (auto &Bb
: Fn
.getBasicBlockList())
632 for (auto &Inst
: Bb
.getInstList())
633 addInstructionToGraph(Visitor
, Inst
);
635 for (auto &Arg
: Fn
.args())
636 addArgumentToGraph(Arg
);
640 CFLGraphBuilder(CFLAA
&Analysis
, const TargetLibraryInfo
&TLI
, Function
&Fn
)
641 : Analysis(Analysis
), TLI(TLI
) {
645 const CFLGraph
&getCFLGraph() const { return Graph
; }
646 const SmallVector
<Value
*, 4> &getReturnValues() const {
647 return ReturnedValues
;
651 } // end namespace cflaa
652 } // end namespace llvm
654 #endif // LLVM_LIB_ANALYSIS_CFLGRAPH_H