1 //===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_ANALYSIS_CFLGRAPH_H
16 #define LLVM_LIB_ANALYSIS_CFLGRAPH_H
18 #include "AliasAnalysisSummary.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/MemoryBuiltins.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/IR/Argument.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/InstVisitor.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Operator.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/ErrorHandling.h"
47 /// The Program Expression Graph (PEG) of CFL analysis
48 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
49 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
50 /// the main purpose of this graph is to abstract away unrelated facts and
51 /// translate the rest into a form that can be easily digested by CFL analyses.
52 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
53 /// pointer assignment between InstantiatedValue. Pointer
54 /// references/dereferences are not explicitly stored in the graph: we
55 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
56 /// I+1) and a reference edge to (X, I-1).
59 using Node
= InstantiatedValue
;
66 using EdgeList
= std::vector
<Edge
>;
69 EdgeList Edges
, ReverseEdges
;
74 std::vector
<NodeInfo
> Levels
;
77 bool addNodeToLevel(unsigned Level
) {
78 auto NumLevels
= Levels
.size();
79 if (NumLevels
> Level
)
81 Levels
.resize(Level
+ 1);
85 NodeInfo
&getNodeInfoAtLevel(unsigned Level
) {
86 assert(Level
< Levels
.size());
89 const NodeInfo
&getNodeInfoAtLevel(unsigned Level
) const {
90 assert(Level
< Levels
.size());
94 unsigned getNumLevels() const { return Levels
.size(); }
98 using ValueMap
= DenseMap
<Value
*, ValueInfo
>;
102 NodeInfo
*getNode(Node N
) {
103 auto Itr
= ValueImpls
.find(N
.Val
);
104 if (Itr
== ValueImpls
.end() || Itr
->second
.getNumLevels() <= N
.DerefLevel
)
106 return &Itr
->second
.getNodeInfoAtLevel(N
.DerefLevel
);
110 using const_value_iterator
= ValueMap::const_iterator
;
112 bool addNode(Node N
, AliasAttrs Attr
= AliasAttrs()) {
113 assert(N
.Val
!= nullptr);
114 auto &ValInfo
= ValueImpls
[N
.Val
];
115 auto Changed
= ValInfo
.addNodeToLevel(N
.DerefLevel
);
116 ValInfo
.getNodeInfoAtLevel(N
.DerefLevel
).Attr
|= Attr
;
120 void addAttr(Node N
, AliasAttrs Attr
) {
121 auto *Info
= getNode(N
);
122 assert(Info
!= nullptr);
126 void addEdge(Node From
, Node To
, int64_t Offset
= 0) {
127 auto *FromInfo
= getNode(From
);
128 assert(FromInfo
!= nullptr);
129 auto *ToInfo
= getNode(To
);
130 assert(ToInfo
!= nullptr);
132 FromInfo
->Edges
.push_back(Edge
{To
, Offset
});
133 ToInfo
->ReverseEdges
.push_back(Edge
{From
, Offset
});
136 const NodeInfo
*getNode(Node N
) const {
137 auto Itr
= ValueImpls
.find(N
.Val
);
138 if (Itr
== ValueImpls
.end() || Itr
->second
.getNumLevels() <= N
.DerefLevel
)
140 return &Itr
->second
.getNodeInfoAtLevel(N
.DerefLevel
);
143 AliasAttrs
attrFor(Node N
) const {
144 auto *Info
= getNode(N
);
145 assert(Info
!= nullptr);
149 iterator_range
<const_value_iterator
> value_mappings() const {
150 return make_range
<const_value_iterator
>(ValueImpls
.begin(),
155 /// A builder class used to create CFLGraph instance from a given function
156 /// The CFL-AA that uses this builder must provide its own type as a template
157 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
158 /// needs a way of obtaining the summary of other functions when callinsts are
160 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
161 /// member function that takes a Function& and returns the corresponding summary
162 /// as a const AliasSummary*.
163 template <typename CFLAA
> class CFLGraphBuilder
{
164 // Input of the builder
166 const TargetLibraryInfo
&TLI
;
168 // Output of the builder
170 SmallVector
<Value
*, 4> ReturnedValues
;
173 /// Gets the edges our graph should have, based on an Instruction*
174 class GetEdgesVisitor
: public InstVisitor
<GetEdgesVisitor
, void> {
176 const DataLayout
&DL
;
177 const TargetLibraryInfo
&TLI
;
180 SmallVectorImpl
<Value
*> &ReturnValues
;
182 static bool hasUsefulEdges(ConstantExpr
*CE
) {
183 // ConstantExpr doesn't have terminators, invokes, or fences, so only
184 // needs to check for compares.
185 return CE
->getOpcode() != Instruction::ICmp
&&
186 CE
->getOpcode() != Instruction::FCmp
;
189 // Returns possible functions called by CS into the given SmallVectorImpl.
190 // Returns true if targets found, false otherwise.
191 static bool getPossibleTargets(CallBase
&Call
,
192 SmallVectorImpl
<Function
*> &Output
) {
193 if (auto *Fn
= Call
.getCalledFunction()) {
194 Output
.push_back(Fn
);
198 // TODO: If the call is indirect, we might be able to enumerate all
199 // potential targets of the call and return them, rather than just
204 void addNode(Value
*Val
, AliasAttrs Attr
= AliasAttrs()) {
205 assert(Val
!= nullptr && Val
->getType()->isPointerTy());
206 if (auto GVal
= dyn_cast
<GlobalValue
>(Val
)) {
207 if (Graph
.addNode(InstantiatedValue
{GVal
, 0},
208 getGlobalOrArgAttrFromValue(*GVal
)))
209 Graph
.addNode(InstantiatedValue
{GVal
, 1}, getAttrUnknown());
210 } else if (auto CExpr
= dyn_cast
<ConstantExpr
>(Val
)) {
211 if (hasUsefulEdges(CExpr
)) {
212 if (Graph
.addNode(InstantiatedValue
{CExpr
, 0}))
213 visitConstantExpr(CExpr
);
216 Graph
.addNode(InstantiatedValue
{Val
, 0}, Attr
);
219 void addAssignEdge(Value
*From
, Value
*To
, int64_t Offset
= 0) {
220 assert(From
!= nullptr && To
!= nullptr);
221 if (!From
->getType()->isPointerTy() || !To
->getType()->isPointerTy())
226 Graph
.addEdge(InstantiatedValue
{From
, 0}, InstantiatedValue
{To
, 0},
231 void addDerefEdge(Value
*From
, Value
*To
, bool IsRead
) {
232 assert(From
!= nullptr && To
!= nullptr);
233 // FIXME: This is subtly broken, due to how we model some instructions
234 // (e.g. extractvalue, extractelement) as loads. Since those take
235 // non-pointer operands, we'll entirely skip adding edges for those.
237 // addAssignEdge seems to have a similar issue with insertvalue, etc.
238 if (!From
->getType()->isPointerTy() || !To
->getType()->isPointerTy())
243 Graph
.addNode(InstantiatedValue
{From
, 1});
244 Graph
.addEdge(InstantiatedValue
{From
, 1}, InstantiatedValue
{To
, 0});
246 Graph
.addNode(InstantiatedValue
{To
, 1});
247 Graph
.addEdge(InstantiatedValue
{From
, 0}, InstantiatedValue
{To
, 1});
251 void addLoadEdge(Value
*From
, Value
*To
) { addDerefEdge(From
, To
, true); }
252 void addStoreEdge(Value
*From
, Value
*To
) { addDerefEdge(From
, To
, false); }
255 GetEdgesVisitor(CFLGraphBuilder
&Builder
, const DataLayout
&DL
)
256 : AA(Builder
.Analysis
), DL(DL
), TLI(Builder
.TLI
), Graph(Builder
.Graph
),
257 ReturnValues(Builder
.ReturnedValues
) {}
259 void visitInstruction(Instruction
&) {
260 llvm_unreachable("Unsupported instruction encountered");
263 void visitReturnInst(ReturnInst
&Inst
) {
264 if (auto RetVal
= Inst
.getReturnValue()) {
265 if (RetVal
->getType()->isPointerTy()) {
267 ReturnValues
.push_back(RetVal
);
272 void visitPtrToIntInst(PtrToIntInst
&Inst
) {
273 auto *Ptr
= Inst
.getOperand(0);
274 addNode(Ptr
, getAttrEscaped());
277 void visitIntToPtrInst(IntToPtrInst
&Inst
) {
279 addNode(Ptr
, getAttrUnknown());
282 void visitCastInst(CastInst
&Inst
) {
283 auto *Src
= Inst
.getOperand(0);
284 addAssignEdge(Src
, &Inst
);
287 void visitFreezeInst(FreezeInst
&Inst
) {
288 // Accessing freeze(ptr) is equivalent to accessing ptr.
289 // The former raises UB iff latter raises UB.
290 auto *Src
= Inst
.getOperand(0);
291 addAssignEdge(Src
, &Inst
);
294 void visitBinaryOperator(BinaryOperator
&Inst
) {
295 auto *Op1
= Inst
.getOperand(0);
296 auto *Op2
= Inst
.getOperand(1);
297 addAssignEdge(Op1
, &Inst
);
298 addAssignEdge(Op2
, &Inst
);
301 void visitUnaryOperator(UnaryOperator
&Inst
) {
302 auto *Src
= Inst
.getOperand(0);
303 addAssignEdge(Src
, &Inst
);
306 void visitAtomicCmpXchgInst(AtomicCmpXchgInst
&Inst
) {
307 auto *Ptr
= Inst
.getPointerOperand();
308 auto *Val
= Inst
.getNewValOperand();
309 addStoreEdge(Val
, Ptr
);
312 void visitAtomicRMWInst(AtomicRMWInst
&Inst
) {
313 auto *Ptr
= Inst
.getPointerOperand();
314 auto *Val
= Inst
.getValOperand();
315 addStoreEdge(Val
, Ptr
);
318 void visitPHINode(PHINode
&Inst
) {
319 for (Value
*Val
: Inst
.incoming_values())
320 addAssignEdge(Val
, &Inst
);
323 void visitGEP(GEPOperator
&GEPOp
) {
324 uint64_t Offset
= UnknownOffset
;
325 APInt
APOffset(DL
.getPointerSizeInBits(GEPOp
.getPointerAddressSpace()),
327 if (GEPOp
.accumulateConstantOffset(DL
, APOffset
))
328 Offset
= APOffset
.getSExtValue();
330 auto *Op
= GEPOp
.getPointerOperand();
331 addAssignEdge(Op
, &GEPOp
, Offset
);
334 void visitGetElementPtrInst(GetElementPtrInst
&Inst
) {
335 auto *GEPOp
= cast
<GEPOperator
>(&Inst
);
339 void visitSelectInst(SelectInst
&Inst
) {
340 // Condition is not processed here (The actual statement producing
341 // the condition result is processed elsewhere). For select, the
342 // condition is evaluated, but not loaded, stored, or assigned
343 // simply as a result of being the condition of a select.
345 auto *TrueVal
= Inst
.getTrueValue();
346 auto *FalseVal
= Inst
.getFalseValue();
347 addAssignEdge(TrueVal
, &Inst
);
348 addAssignEdge(FalseVal
, &Inst
);
351 void visitAllocaInst(AllocaInst
&Inst
) { addNode(&Inst
); }
353 void visitLoadInst(LoadInst
&Inst
) {
354 auto *Ptr
= Inst
.getPointerOperand();
356 addLoadEdge(Ptr
, Val
);
359 void visitStoreInst(StoreInst
&Inst
) {
360 auto *Ptr
= Inst
.getPointerOperand();
361 auto *Val
= Inst
.getValueOperand();
362 addStoreEdge(Val
, Ptr
);
365 void visitVAArgInst(VAArgInst
&Inst
) {
366 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
369 // 1. Loads a value from *((T*)*Ptr).
370 // 2. Increments (stores to) *Ptr by some target-specific amount.
371 // For now, we'll handle this like a landingpad instruction (by placing
373 // result in its own group, and having that group alias externals).
374 if (Inst
.getType()->isPointerTy())
375 addNode(&Inst
, getAttrUnknown());
378 static bool isFunctionExternal(Function
*Fn
) {
379 return !Fn
->hasExactDefinition();
382 bool tryInterproceduralAnalysis(CallBase
&Call
,
383 const SmallVectorImpl
<Function
*> &Fns
) {
384 assert(Fns
.size() > 0);
386 if (Call
.arg_size() > MaxSupportedArgsInSummary
)
389 // Exit early if we'll fail anyway
390 for (auto *Fn
: Fns
) {
391 if (isFunctionExternal(Fn
) || Fn
->isVarArg())
393 // Fail if the caller does not provide enough arguments
394 assert(Fn
->arg_size() <= Call
.arg_size());
395 if (!AA
.getAliasSummary(*Fn
))
399 for (auto *Fn
: Fns
) {
400 auto Summary
= AA
.getAliasSummary(*Fn
);
401 assert(Summary
!= nullptr);
403 auto &RetParamRelations
= Summary
->RetParamRelations
;
404 for (auto &Relation
: RetParamRelations
) {
405 auto IRelation
= instantiateExternalRelation(Relation
, Call
);
406 if (IRelation
.hasValue()) {
407 Graph
.addNode(IRelation
->From
);
408 Graph
.addNode(IRelation
->To
);
409 Graph
.addEdge(IRelation
->From
, IRelation
->To
);
413 auto &RetParamAttributes
= Summary
->RetParamAttributes
;
414 for (auto &Attribute
: RetParamAttributes
) {
415 auto IAttr
= instantiateExternalAttribute(Attribute
, Call
);
416 if (IAttr
.hasValue())
417 Graph
.addNode(IAttr
->IValue
, IAttr
->Attr
);
424 void visitCallBase(CallBase
&Call
) {
425 // Make sure all arguments and return value are added to the graph first
426 for (Value
*V
: Call
.args())
427 if (V
->getType()->isPointerTy())
429 if (Call
.getType()->isPointerTy())
432 // Check if Inst is a call to a library function that
433 // allocates/deallocates on the heap. Those kinds of functions do not
434 // introduce any aliases.
435 // TODO: address other common library functions such as realloc(),
437 if (isMallocOrCallocLikeFn(&Call
, &TLI
) || isFreeCall(&Call
, &TLI
))
440 // TODO: Add support for noalias args/all the other fun function
441 // attributes that we can tack on.
442 SmallVector
<Function
*, 4> Targets
;
443 if (getPossibleTargets(Call
, Targets
))
444 if (tryInterproceduralAnalysis(Call
, Targets
))
447 // Because the function is opaque, we need to note that anything
448 // could have happened to the arguments (unless the function is marked
449 // readonly or readnone), and that the result could alias just about
450 // anything, too (unless the result is marked noalias).
451 if (!Call
.onlyReadsMemory())
452 for (Value
*V
: Call
.args()) {
453 if (V
->getType()->isPointerTy()) {
454 // The argument itself escapes.
455 Graph
.addAttr(InstantiatedValue
{V
, 0}, getAttrEscaped());
456 // The fate of argument memory is unknown. Note that since
457 // AliasAttrs is transitive with respect to dereference, we only
458 // need to specify it for the first-level memory.
459 Graph
.addNode(InstantiatedValue
{V
, 1}, getAttrUnknown());
463 if (Call
.getType()->isPointerTy()) {
464 auto *Fn
= Call
.getCalledFunction();
465 if (Fn
== nullptr || !Fn
->returnDoesNotAlias())
466 // No need to call addNode() since we've added Inst at the
467 // beginning of this function and we know it is not a global.
468 Graph
.addAttr(InstantiatedValue
{&Call
, 0}, getAttrUnknown());
472 /// Because vectors/aggregates are immutable and unaddressable, there's
473 /// nothing we can do to coax a value out of them, other than calling
474 /// Extract{Element,Value}. We can effectively treat them as pointers to
475 /// arbitrary memory locations we can store in and load from.
476 void visitExtractElementInst(ExtractElementInst
&Inst
) {
477 auto *Ptr
= Inst
.getVectorOperand();
479 addLoadEdge(Ptr
, Val
);
482 void visitInsertElementInst(InsertElementInst
&Inst
) {
483 auto *Vec
= Inst
.getOperand(0);
484 auto *Val
= Inst
.getOperand(1);
485 addAssignEdge(Vec
, &Inst
);
486 addStoreEdge(Val
, &Inst
);
489 void visitLandingPadInst(LandingPadInst
&Inst
) {
490 // Exceptions come from "nowhere", from our analysis' perspective.
491 // So we place the instruction its own group, noting that said group may
493 if (Inst
.getType()->isPointerTy())
494 addNode(&Inst
, getAttrUnknown());
497 void visitInsertValueInst(InsertValueInst
&Inst
) {
498 auto *Agg
= Inst
.getOperand(0);
499 auto *Val
= Inst
.getOperand(1);
500 addAssignEdge(Agg
, &Inst
);
501 addStoreEdge(Val
, &Inst
);
504 void visitExtractValueInst(ExtractValueInst
&Inst
) {
505 auto *Ptr
= Inst
.getAggregateOperand();
506 addLoadEdge(Ptr
, &Inst
);
509 void visitShuffleVectorInst(ShuffleVectorInst
&Inst
) {
510 auto *From1
= Inst
.getOperand(0);
511 auto *From2
= Inst
.getOperand(1);
512 addAssignEdge(From1
, &Inst
);
513 addAssignEdge(From2
, &Inst
);
516 void visitConstantExpr(ConstantExpr
*CE
) {
517 switch (CE
->getOpcode()) {
518 case Instruction::GetElementPtr
: {
519 auto GEPOp
= cast
<GEPOperator
>(CE
);
524 case Instruction::PtrToInt
: {
525 addNode(CE
->getOperand(0), getAttrEscaped());
529 case Instruction::IntToPtr
: {
530 addNode(CE
, getAttrUnknown());
534 case Instruction::BitCast
:
535 case Instruction::AddrSpaceCast
:
536 case Instruction::Trunc
:
537 case Instruction::ZExt
:
538 case Instruction::SExt
:
539 case Instruction::FPExt
:
540 case Instruction::FPTrunc
:
541 case Instruction::UIToFP
:
542 case Instruction::SIToFP
:
543 case Instruction::FPToUI
:
544 case Instruction::FPToSI
: {
545 addAssignEdge(CE
->getOperand(0), CE
);
549 case Instruction::Select
: {
550 addAssignEdge(CE
->getOperand(1), CE
);
551 addAssignEdge(CE
->getOperand(2), CE
);
555 case Instruction::InsertElement
:
556 case Instruction::InsertValue
: {
557 addAssignEdge(CE
->getOperand(0), CE
);
558 addStoreEdge(CE
->getOperand(1), CE
);
562 case Instruction::ExtractElement
:
563 case Instruction::ExtractValue
: {
564 addLoadEdge(CE
->getOperand(0), CE
);
568 case Instruction::Add
:
569 case Instruction::FAdd
:
570 case Instruction::Sub
:
571 case Instruction::FSub
:
572 case Instruction::Mul
:
573 case Instruction::FMul
:
574 case Instruction::UDiv
:
575 case Instruction::SDiv
:
576 case Instruction::FDiv
:
577 case Instruction::URem
:
578 case Instruction::SRem
:
579 case Instruction::FRem
:
580 case Instruction::And
:
581 case Instruction::Or
:
582 case Instruction::Xor
:
583 case Instruction::Shl
:
584 case Instruction::LShr
:
585 case Instruction::AShr
:
586 case Instruction::ICmp
:
587 case Instruction::FCmp
:
588 case Instruction::ShuffleVector
: {
589 addAssignEdge(CE
->getOperand(0), CE
);
590 addAssignEdge(CE
->getOperand(1), CE
);
594 case Instruction::FNeg
: {
595 addAssignEdge(CE
->getOperand(0), CE
);
600 llvm_unreachable("Unknown instruction type encountered!");
607 // Determines whether or not we an instruction is useless to us (e.g.
609 static bool hasUsefulEdges(Instruction
*Inst
) {
610 bool IsNonInvokeRetTerminator
= Inst
->isTerminator() &&
611 !isa
<InvokeInst
>(Inst
) &&
612 !isa
<ReturnInst
>(Inst
);
613 return !isa
<CmpInst
>(Inst
) && !isa
<FenceInst
>(Inst
) &&
614 !IsNonInvokeRetTerminator
;
617 void addArgumentToGraph(Argument
&Arg
) {
618 if (Arg
.getType()->isPointerTy()) {
619 Graph
.addNode(InstantiatedValue
{&Arg
, 0},
620 getGlobalOrArgAttrFromValue(Arg
));
621 // Pointees of a formal parameter is known to the caller
622 Graph
.addNode(InstantiatedValue
{&Arg
, 1}, getAttrCaller());
626 // Given an Instruction, this will add it to the graph, along with any
627 // Instructions that are potentially only available from said Instruction
628 // For example, given the following line:
629 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
630 // addInstructionToGraph would add both the `load` and `getelementptr`
631 // instructions to the graph appropriately.
632 void addInstructionToGraph(GetEdgesVisitor
&Visitor
, Instruction
&Inst
) {
633 if (!hasUsefulEdges(&Inst
))
639 // Builds the graph needed for constructing the StratifiedSets for the given
641 void buildGraphFrom(Function
&Fn
) {
642 GetEdgesVisitor
Visitor(*this, Fn
.getParent()->getDataLayout());
644 for (auto &Bb
: Fn
.getBasicBlockList())
645 for (auto &Inst
: Bb
.getInstList())
646 addInstructionToGraph(Visitor
, Inst
);
648 for (auto &Arg
: Fn
.args())
649 addArgumentToGraph(Arg
);
653 CFLGraphBuilder(CFLAA
&Analysis
, const TargetLibraryInfo
&TLI
, Function
&Fn
)
654 : Analysis(Analysis
), TLI(TLI
) {
658 const CFLGraph
&getCFLGraph() const { return Graph
; }
659 const SmallVector
<Value
*, 4> &getReturnValues() const {
660 return ReturnedValues
;
664 } // end namespace cflaa
665 } // end namespace llvm
667 #endif // LLVM_LIB_ANALYSIS_CFLGRAPH_H