1 //===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_ANALYSIS_CFLGRAPH_H
16 #define LLVM_LIB_ANALYSIS_CFLGRAPH_H
18 #include "AliasAnalysisSummary.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/MemoryBuiltins.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/IR/Argument.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/InstVisitor.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Operator.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/ErrorHandling.h"
47 /// The Program Expression Graph (PEG) of CFL analysis
48 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
49 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
50 /// the main purpose of this graph is to abstract away unrelated facts and
51 /// translate the rest into a form that can be easily digested by CFL analyses.
52 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
53 /// pointer assignment between InstantiatedValue. Pointer
54 /// references/dereferences are not explicitly stored in the graph: we
55 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
56 /// I+1) and a reference edge to (X, I-1).
59 using Node
= InstantiatedValue
;
66 using EdgeList
= std::vector
<Edge
>;
69 EdgeList Edges
, ReverseEdges
;
74 std::vector
<NodeInfo
> Levels
;
77 bool addNodeToLevel(unsigned Level
) {
78 auto NumLevels
= Levels
.size();
79 if (NumLevels
> Level
)
81 Levels
.resize(Level
+ 1);
85 NodeInfo
&getNodeInfoAtLevel(unsigned Level
) {
86 assert(Level
< Levels
.size());
89 const NodeInfo
&getNodeInfoAtLevel(unsigned Level
) const {
90 assert(Level
< Levels
.size());
94 unsigned getNumLevels() const { return Levels
.size(); }
98 using ValueMap
= DenseMap
<Value
*, ValueInfo
>;
102 NodeInfo
*getNode(Node N
) {
103 auto Itr
= ValueImpls
.find(N
.Val
);
104 if (Itr
== ValueImpls
.end() || Itr
->second
.getNumLevels() <= N
.DerefLevel
)
106 return &Itr
->second
.getNodeInfoAtLevel(N
.DerefLevel
);
110 using const_value_iterator
= ValueMap::const_iterator
;
112 bool addNode(Node N
, AliasAttrs Attr
= AliasAttrs()) {
113 assert(N
.Val
!= nullptr);
114 auto &ValInfo
= ValueImpls
[N
.Val
];
115 auto Changed
= ValInfo
.addNodeToLevel(N
.DerefLevel
);
116 ValInfo
.getNodeInfoAtLevel(N
.DerefLevel
).Attr
|= Attr
;
120 void addAttr(Node N
, AliasAttrs Attr
) {
121 auto *Info
= getNode(N
);
122 assert(Info
!= nullptr);
126 void addEdge(Node From
, Node To
, int64_t Offset
= 0) {
127 auto *FromInfo
= getNode(From
);
128 assert(FromInfo
!= nullptr);
129 auto *ToInfo
= getNode(To
);
130 assert(ToInfo
!= nullptr);
132 FromInfo
->Edges
.push_back(Edge
{To
, Offset
});
133 ToInfo
->ReverseEdges
.push_back(Edge
{From
, Offset
});
136 const NodeInfo
*getNode(Node N
) const {
137 auto Itr
= ValueImpls
.find(N
.Val
);
138 if (Itr
== ValueImpls
.end() || Itr
->second
.getNumLevels() <= N
.DerefLevel
)
140 return &Itr
->second
.getNodeInfoAtLevel(N
.DerefLevel
);
143 AliasAttrs
attrFor(Node N
) const {
144 auto *Info
= getNode(N
);
145 assert(Info
!= nullptr);
149 iterator_range
<const_value_iterator
> value_mappings() const {
150 return make_range
<const_value_iterator
>(ValueImpls
.begin(),
155 ///A builder class used to create CFLGraph instance from a given function
156 /// The CFL-AA that uses this builder must provide its own type as a template
157 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
158 /// needs a way of obtaining the summary of other functions when callinsts are
160 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
161 /// member function that takes a Function& and returns the corresponding summary
162 /// as a const AliasSummary*.
163 template <typename CFLAA
> class CFLGraphBuilder
{
164 // Input of the builder
166 const TargetLibraryInfo
&TLI
;
168 // Output of the builder
170 SmallVector
<Value
*, 4> ReturnedValues
;
173 /// Gets the edges our graph should have, based on an Instruction*
174 class GetEdgesVisitor
: public InstVisitor
<GetEdgesVisitor
, void> {
176 const DataLayout
&DL
;
177 const TargetLibraryInfo
&TLI
;
180 SmallVectorImpl
<Value
*> &ReturnValues
;
182 static bool hasUsefulEdges(ConstantExpr
*CE
) {
183 // ConstantExpr doesn't have terminators, invokes, or fences, so only
185 // to check for compares.
186 return CE
->getOpcode() != Instruction::ICmp
&&
187 CE
->getOpcode() != Instruction::FCmp
;
190 // Returns possible functions called by CS into the given SmallVectorImpl.
191 // Returns true if targets found, false otherwise.
192 static bool getPossibleTargets(CallBase
&Call
,
193 SmallVectorImpl
<Function
*> &Output
) {
194 if (auto *Fn
= Call
.getCalledFunction()) {
195 Output
.push_back(Fn
);
199 // TODO: If the call is indirect, we might be able to enumerate all
201 // targets of the call and return them, rather than just failing.
205 void addNode(Value
*Val
, AliasAttrs Attr
= AliasAttrs()) {
206 assert(Val
!= nullptr && Val
->getType()->isPointerTy());
207 if (auto GVal
= dyn_cast
<GlobalValue
>(Val
)) {
208 if (Graph
.addNode(InstantiatedValue
{GVal
, 0},
209 getGlobalOrArgAttrFromValue(*GVal
)))
210 Graph
.addNode(InstantiatedValue
{GVal
, 1}, getAttrUnknown());
211 } else if (auto CExpr
= dyn_cast
<ConstantExpr
>(Val
)) {
212 if (hasUsefulEdges(CExpr
)) {
213 if (Graph
.addNode(InstantiatedValue
{CExpr
, 0}))
214 visitConstantExpr(CExpr
);
217 Graph
.addNode(InstantiatedValue
{Val
, 0}, Attr
);
220 void addAssignEdge(Value
*From
, Value
*To
, int64_t Offset
= 0) {
221 assert(From
!= nullptr && To
!= nullptr);
222 if (!From
->getType()->isPointerTy() || !To
->getType()->isPointerTy())
227 Graph
.addEdge(InstantiatedValue
{From
, 0}, InstantiatedValue
{To
, 0},
232 void addDerefEdge(Value
*From
, Value
*To
, bool IsRead
) {
233 assert(From
!= nullptr && To
!= nullptr);
234 // FIXME: This is subtly broken, due to how we model some instructions
235 // (e.g. extractvalue, extractelement) as loads. Since those take
236 // non-pointer operands, we'll entirely skip adding edges for those.
238 // addAssignEdge seems to have a similar issue with insertvalue, etc.
239 if (!From
->getType()->isPointerTy() || !To
->getType()->isPointerTy())
244 Graph
.addNode(InstantiatedValue
{From
, 1});
245 Graph
.addEdge(InstantiatedValue
{From
, 1}, InstantiatedValue
{To
, 0});
247 Graph
.addNode(InstantiatedValue
{To
, 1});
248 Graph
.addEdge(InstantiatedValue
{From
, 0}, InstantiatedValue
{To
, 1});
252 void addLoadEdge(Value
*From
, Value
*To
) { addDerefEdge(From
, To
, true); }
253 void addStoreEdge(Value
*From
, Value
*To
) { addDerefEdge(From
, To
, false); }
256 GetEdgesVisitor(CFLGraphBuilder
&Builder
, const DataLayout
&DL
)
257 : AA(Builder
.Analysis
), DL(DL
), TLI(Builder
.TLI
), Graph(Builder
.Graph
),
258 ReturnValues(Builder
.ReturnedValues
) {}
260 void visitInstruction(Instruction
&) {
261 llvm_unreachable("Unsupported instruction encountered");
264 void visitReturnInst(ReturnInst
&Inst
) {
265 if (auto RetVal
= Inst
.getReturnValue()) {
266 if (RetVal
->getType()->isPointerTy()) {
268 ReturnValues
.push_back(RetVal
);
273 void visitPtrToIntInst(PtrToIntInst
&Inst
) {
274 auto *Ptr
= Inst
.getOperand(0);
275 addNode(Ptr
, getAttrEscaped());
278 void visitIntToPtrInst(IntToPtrInst
&Inst
) {
280 addNode(Ptr
, getAttrUnknown());
283 void visitCastInst(CastInst
&Inst
) {
284 auto *Src
= Inst
.getOperand(0);
285 addAssignEdge(Src
, &Inst
);
288 void visitBinaryOperator(BinaryOperator
&Inst
) {
289 auto *Op1
= Inst
.getOperand(0);
290 auto *Op2
= Inst
.getOperand(1);
291 addAssignEdge(Op1
, &Inst
);
292 addAssignEdge(Op2
, &Inst
);
295 void visitAtomicCmpXchgInst(AtomicCmpXchgInst
&Inst
) {
296 auto *Ptr
= Inst
.getPointerOperand();
297 auto *Val
= Inst
.getNewValOperand();
298 addStoreEdge(Val
, Ptr
);
301 void visitAtomicRMWInst(AtomicRMWInst
&Inst
) {
302 auto *Ptr
= Inst
.getPointerOperand();
303 auto *Val
= Inst
.getValOperand();
304 addStoreEdge(Val
, Ptr
);
307 void visitPHINode(PHINode
&Inst
) {
308 for (Value
*Val
: Inst
.incoming_values())
309 addAssignEdge(Val
, &Inst
);
312 void visitGEP(GEPOperator
&GEPOp
) {
313 uint64_t Offset
= UnknownOffset
;
314 APInt
APOffset(DL
.getPointerSizeInBits(GEPOp
.getPointerAddressSpace()),
316 if (GEPOp
.accumulateConstantOffset(DL
, APOffset
))
317 Offset
= APOffset
.getSExtValue();
319 auto *Op
= GEPOp
.getPointerOperand();
320 addAssignEdge(Op
, &GEPOp
, Offset
);
323 void visitGetElementPtrInst(GetElementPtrInst
&Inst
) {
324 auto *GEPOp
= cast
<GEPOperator
>(&Inst
);
328 void visitSelectInst(SelectInst
&Inst
) {
329 // Condition is not processed here (The actual statement producing
330 // the condition result is processed elsewhere). For select, the
331 // condition is evaluated, but not loaded, stored, or assigned
332 // simply as a result of being the condition of a select.
334 auto *TrueVal
= Inst
.getTrueValue();
335 auto *FalseVal
= Inst
.getFalseValue();
336 addAssignEdge(TrueVal
, &Inst
);
337 addAssignEdge(FalseVal
, &Inst
);
340 void visitAllocaInst(AllocaInst
&Inst
) { addNode(&Inst
); }
342 void visitLoadInst(LoadInst
&Inst
) {
343 auto *Ptr
= Inst
.getPointerOperand();
345 addLoadEdge(Ptr
, Val
);
348 void visitStoreInst(StoreInst
&Inst
) {
349 auto *Ptr
= Inst
.getPointerOperand();
350 auto *Val
= Inst
.getValueOperand();
351 addStoreEdge(Val
, Ptr
);
354 void visitVAArgInst(VAArgInst
&Inst
) {
355 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
358 // 1. Loads a value from *((T*)*Ptr).
359 // 2. Increments (stores to) *Ptr by some target-specific amount.
360 // For now, we'll handle this like a landingpad instruction (by placing
362 // result in its own group, and having that group alias externals).
363 if (Inst
.getType()->isPointerTy())
364 addNode(&Inst
, getAttrUnknown());
367 static bool isFunctionExternal(Function
*Fn
) {
368 return !Fn
->hasExactDefinition();
371 bool tryInterproceduralAnalysis(CallBase
&Call
,
372 const SmallVectorImpl
<Function
*> &Fns
) {
373 assert(Fns
.size() > 0);
375 if (Call
.arg_size() > MaxSupportedArgsInSummary
)
378 // Exit early if we'll fail anyway
379 for (auto *Fn
: Fns
) {
380 if (isFunctionExternal(Fn
) || Fn
->isVarArg())
382 // Fail if the caller does not provide enough arguments
383 assert(Fn
->arg_size() <= Call
.arg_size());
384 if (!AA
.getAliasSummary(*Fn
))
388 for (auto *Fn
: Fns
) {
389 auto Summary
= AA
.getAliasSummary(*Fn
);
390 assert(Summary
!= nullptr);
392 auto &RetParamRelations
= Summary
->RetParamRelations
;
393 for (auto &Relation
: RetParamRelations
) {
394 auto IRelation
= instantiateExternalRelation(Relation
, Call
);
395 if (IRelation
.hasValue()) {
396 Graph
.addNode(IRelation
->From
);
397 Graph
.addNode(IRelation
->To
);
398 Graph
.addEdge(IRelation
->From
, IRelation
->To
);
402 auto &RetParamAttributes
= Summary
->RetParamAttributes
;
403 for (auto &Attribute
: RetParamAttributes
) {
404 auto IAttr
= instantiateExternalAttribute(Attribute
, Call
);
405 if (IAttr
.hasValue())
406 Graph
.addNode(IAttr
->IValue
, IAttr
->Attr
);
413 void visitCallBase(CallBase
&Call
) {
414 // Make sure all arguments and return value are added to the graph first
415 for (Value
*V
: Call
.args())
416 if (V
->getType()->isPointerTy())
418 if (Call
.getType()->isPointerTy())
421 // Check if Inst is a call to a library function that
422 // allocates/deallocates on the heap. Those kinds of functions do not
423 // introduce any aliases.
424 // TODO: address other common library functions such as realloc(),
426 if (isMallocOrCallocLikeFn(&Call
, &TLI
) || isFreeCall(&Call
, &TLI
))
429 // TODO: Add support for noalias args/all the other fun function
430 // attributes that we can tack on.
431 SmallVector
<Function
*, 4> Targets
;
432 if (getPossibleTargets(Call
, Targets
))
433 if (tryInterproceduralAnalysis(Call
, Targets
))
436 // Because the function is opaque, we need to note that anything
437 // could have happened to the arguments (unless the function is marked
438 // readonly or readnone), and that the result could alias just about
439 // anything, too (unless the result is marked noalias).
440 if (!Call
.onlyReadsMemory())
441 for (Value
*V
: Call
.args()) {
442 if (V
->getType()->isPointerTy()) {
443 // The argument itself escapes.
444 Graph
.addAttr(InstantiatedValue
{V
, 0}, getAttrEscaped());
445 // The fate of argument memory is unknown. Note that since
446 // AliasAttrs is transitive with respect to dereference, we only
447 // need to specify it for the first-level memory.
448 Graph
.addNode(InstantiatedValue
{V
, 1}, getAttrUnknown());
452 if (Call
.getType()->isPointerTy()) {
453 auto *Fn
= Call
.getCalledFunction();
454 if (Fn
== nullptr || !Fn
->returnDoesNotAlias())
455 // No need to call addNode() since we've added Inst at the
456 // beginning of this function and we know it is not a global.
457 Graph
.addAttr(InstantiatedValue
{&Call
, 0}, getAttrUnknown());
461 /// Because vectors/aggregates are immutable and unaddressable, there's
462 /// nothing we can do to coax a value out of them, other than calling
463 /// Extract{Element,Value}. We can effectively treat them as pointers to
464 /// arbitrary memory locations we can store in and load from.
465 void visitExtractElementInst(ExtractElementInst
&Inst
) {
466 auto *Ptr
= Inst
.getVectorOperand();
468 addLoadEdge(Ptr
, Val
);
471 void visitInsertElementInst(InsertElementInst
&Inst
) {
472 auto *Vec
= Inst
.getOperand(0);
473 auto *Val
= Inst
.getOperand(1);
474 addAssignEdge(Vec
, &Inst
);
475 addStoreEdge(Val
, &Inst
);
478 void visitLandingPadInst(LandingPadInst
&Inst
) {
479 // Exceptions come from "nowhere", from our analysis' perspective.
480 // So we place the instruction its own group, noting that said group may
482 if (Inst
.getType()->isPointerTy())
483 addNode(&Inst
, getAttrUnknown());
486 void visitInsertValueInst(InsertValueInst
&Inst
) {
487 auto *Agg
= Inst
.getOperand(0);
488 auto *Val
= Inst
.getOperand(1);
489 addAssignEdge(Agg
, &Inst
);
490 addStoreEdge(Val
, &Inst
);
493 void visitExtractValueInst(ExtractValueInst
&Inst
) {
494 auto *Ptr
= Inst
.getAggregateOperand();
495 addLoadEdge(Ptr
, &Inst
);
498 void visitShuffleVectorInst(ShuffleVectorInst
&Inst
) {
499 auto *From1
= Inst
.getOperand(0);
500 auto *From2
= Inst
.getOperand(1);
501 addAssignEdge(From1
, &Inst
);
502 addAssignEdge(From2
, &Inst
);
505 void visitConstantExpr(ConstantExpr
*CE
) {
506 switch (CE
->getOpcode()) {
507 case Instruction::GetElementPtr
: {
508 auto GEPOp
= cast
<GEPOperator
>(CE
);
513 case Instruction::PtrToInt
: {
514 addNode(CE
->getOperand(0), getAttrEscaped());
518 case Instruction::IntToPtr
: {
519 addNode(CE
, getAttrUnknown());
523 case Instruction::BitCast
:
524 case Instruction::AddrSpaceCast
:
525 case Instruction::Trunc
:
526 case Instruction::ZExt
:
527 case Instruction::SExt
:
528 case Instruction::FPExt
:
529 case Instruction::FPTrunc
:
530 case Instruction::UIToFP
:
531 case Instruction::SIToFP
:
532 case Instruction::FPToUI
:
533 case Instruction::FPToSI
: {
534 addAssignEdge(CE
->getOperand(0), CE
);
538 case Instruction::Select
: {
539 addAssignEdge(CE
->getOperand(1), CE
);
540 addAssignEdge(CE
->getOperand(2), CE
);
544 case Instruction::InsertElement
:
545 case Instruction::InsertValue
: {
546 addAssignEdge(CE
->getOperand(0), CE
);
547 addStoreEdge(CE
->getOperand(1), CE
);
551 case Instruction::ExtractElement
:
552 case Instruction::ExtractValue
: {
553 addLoadEdge(CE
->getOperand(0), CE
);
557 case Instruction::Add
:
558 case Instruction::Sub
:
559 case Instruction::FSub
:
560 case Instruction::Mul
:
561 case Instruction::FMul
:
562 case Instruction::UDiv
:
563 case Instruction::SDiv
:
564 case Instruction::FDiv
:
565 case Instruction::URem
:
566 case Instruction::SRem
:
567 case Instruction::FRem
:
568 case Instruction::And
:
569 case Instruction::Or
:
570 case Instruction::Xor
:
571 case Instruction::Shl
:
572 case Instruction::LShr
:
573 case Instruction::AShr
:
574 case Instruction::ICmp
:
575 case Instruction::FCmp
:
576 case Instruction::ShuffleVector
: {
577 addAssignEdge(CE
->getOperand(0), CE
);
578 addAssignEdge(CE
->getOperand(1), CE
);
583 llvm_unreachable("Unknown instruction type encountered!");
590 // Determines whether or not we an instruction is useless to us (e.g.
592 static bool hasUsefulEdges(Instruction
*Inst
) {
593 bool IsNonInvokeRetTerminator
= Inst
->isTerminator() &&
594 !isa
<InvokeInst
>(Inst
) &&
595 !isa
<ReturnInst
>(Inst
);
596 return !isa
<CmpInst
>(Inst
) && !isa
<FenceInst
>(Inst
) &&
597 !IsNonInvokeRetTerminator
;
600 void addArgumentToGraph(Argument
&Arg
) {
601 if (Arg
.getType()->isPointerTy()) {
602 Graph
.addNode(InstantiatedValue
{&Arg
, 0},
603 getGlobalOrArgAttrFromValue(Arg
));
604 // Pointees of a formal parameter is known to the caller
605 Graph
.addNode(InstantiatedValue
{&Arg
, 1}, getAttrCaller());
609 // Given an Instruction, this will add it to the graph, along with any
610 // Instructions that are potentially only available from said Instruction
611 // For example, given the following line:
612 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
613 // addInstructionToGraph would add both the `load` and `getelementptr`
614 // instructions to the graph appropriately.
615 void addInstructionToGraph(GetEdgesVisitor
&Visitor
, Instruction
&Inst
) {
616 if (!hasUsefulEdges(&Inst
))
622 // Builds the graph needed for constructing the StratifiedSets for the given
624 void buildGraphFrom(Function
&Fn
) {
625 GetEdgesVisitor
Visitor(*this, Fn
.getParent()->getDataLayout());
627 for (auto &Bb
: Fn
.getBasicBlockList())
628 for (auto &Inst
: Bb
.getInstList())
629 addInstructionToGraph(Visitor
, Inst
);
631 for (auto &Arg
: Fn
.args())
632 addArgumentToGraph(Arg
);
636 CFLGraphBuilder(CFLAA
&Analysis
, const TargetLibraryInfo
&TLI
, Function
&Fn
)
637 : Analysis(Analysis
), TLI(TLI
) {
641 const CFLGraph
&getCFLGraph() const { return Graph
; }
642 const SmallVector
<Value
*, 4> &getReturnValues() const {
643 return ReturnedValues
;
647 } // end namespace cflaa
648 } // end namespace llvm
650 #endif // LLVM_LIB_ANALYSIS_CFLGRAPH_H