[NFC] Update memcpy tests
[llvm-complete.git] / lib / Analysis / CFLGraph.h
blob5783c5bc9bdd5ca9e4a7e8c6c90788706baa42a1
1 //===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
11 /// alias analysis.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_ANALYSIS_CFLGRAPH_H
16 #define LLVM_LIB_ANALYSIS_CFLGRAPH_H
18 #include "AliasAnalysisSummary.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/MemoryBuiltins.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/IR/Argument.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/InstVisitor.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Operator.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/ErrorHandling.h"
40 #include <cassert>
41 #include <cstdint>
42 #include <vector>
44 namespace llvm {
45 namespace cflaa {
47 /// The Program Expression Graph (PEG) of CFL analysis
48 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
49 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
50 /// the main purpose of this graph is to abstract away unrelated facts and
51 /// translate the rest into a form that can be easily digested by CFL analyses.
52 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
53 /// pointer assignment between InstantiatedValue. Pointer
54 /// references/dereferences are not explicitly stored in the graph: we
55 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
56 /// I+1) and a reference edge to (X, I-1).
57 class CFLGraph {
58 public:
59 using Node = InstantiatedValue;
61 struct Edge {
62 Node Other;
63 int64_t Offset;
66 using EdgeList = std::vector<Edge>;
68 struct NodeInfo {
69 EdgeList Edges, ReverseEdges;
70 AliasAttrs Attr;
73 class ValueInfo {
74 std::vector<NodeInfo> Levels;
76 public:
77 bool addNodeToLevel(unsigned Level) {
78 auto NumLevels = Levels.size();
79 if (NumLevels > Level)
80 return false;
81 Levels.resize(Level + 1);
82 return true;
85 NodeInfo &getNodeInfoAtLevel(unsigned Level) {
86 assert(Level < Levels.size());
87 return Levels[Level];
89 const NodeInfo &getNodeInfoAtLevel(unsigned Level) const {
90 assert(Level < Levels.size());
91 return Levels[Level];
94 unsigned getNumLevels() const { return Levels.size(); }
97 private:
98 using ValueMap = DenseMap<Value *, ValueInfo>;
100 ValueMap ValueImpls;
102 NodeInfo *getNode(Node N) {
103 auto Itr = ValueImpls.find(N.Val);
104 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
105 return nullptr;
106 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
109 public:
110 using const_value_iterator = ValueMap::const_iterator;
112 bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
113 assert(N.Val != nullptr);
114 auto &ValInfo = ValueImpls[N.Val];
115 auto Changed = ValInfo.addNodeToLevel(N.DerefLevel);
116 ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr;
117 return Changed;
120 void addAttr(Node N, AliasAttrs Attr) {
121 auto *Info = getNode(N);
122 assert(Info != nullptr);
123 Info->Attr |= Attr;
126 void addEdge(Node From, Node To, int64_t Offset = 0) {
127 auto *FromInfo = getNode(From);
128 assert(FromInfo != nullptr);
129 auto *ToInfo = getNode(To);
130 assert(ToInfo != nullptr);
132 FromInfo->Edges.push_back(Edge{To, Offset});
133 ToInfo->ReverseEdges.push_back(Edge{From, Offset});
136 const NodeInfo *getNode(Node N) const {
137 auto Itr = ValueImpls.find(N.Val);
138 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
139 return nullptr;
140 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
143 AliasAttrs attrFor(Node N) const {
144 auto *Info = getNode(N);
145 assert(Info != nullptr);
146 return Info->Attr;
149 iterator_range<const_value_iterator> value_mappings() const {
150 return make_range<const_value_iterator>(ValueImpls.begin(),
151 ValueImpls.end());
155 ///A builder class used to create CFLGraph instance from a given function
156 /// The CFL-AA that uses this builder must provide its own type as a template
157 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
158 /// needs a way of obtaining the summary of other functions when callinsts are
159 /// encountered.
160 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
161 /// member function that takes a Function& and returns the corresponding summary
162 /// as a const AliasSummary*.
163 template <typename CFLAA> class CFLGraphBuilder {
164 // Input of the builder
165 CFLAA &Analysis;
166 const TargetLibraryInfo &TLI;
168 // Output of the builder
169 CFLGraph Graph;
170 SmallVector<Value *, 4> ReturnedValues;
172 // Helper class
173 /// Gets the edges our graph should have, based on an Instruction*
174 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
175 CFLAA &AA;
176 const DataLayout &DL;
177 const TargetLibraryInfo &TLI;
179 CFLGraph &Graph;
180 SmallVectorImpl<Value *> &ReturnValues;
182 static bool hasUsefulEdges(ConstantExpr *CE) {
183 // ConstantExpr doesn't have terminators, invokes, or fences, so only
184 // needs
185 // to check for compares.
186 return CE->getOpcode() != Instruction::ICmp &&
187 CE->getOpcode() != Instruction::FCmp;
190 // Returns possible functions called by CS into the given SmallVectorImpl.
191 // Returns true if targets found, false otherwise.
192 static bool getPossibleTargets(CallBase &Call,
193 SmallVectorImpl<Function *> &Output) {
194 if (auto *Fn = Call.getCalledFunction()) {
195 Output.push_back(Fn);
196 return true;
199 // TODO: If the call is indirect, we might be able to enumerate all
200 // potential
201 // targets of the call and return them, rather than just failing.
202 return false;
205 void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) {
206 assert(Val != nullptr && Val->getType()->isPointerTy());
207 if (auto GVal = dyn_cast<GlobalValue>(Val)) {
208 if (Graph.addNode(InstantiatedValue{GVal, 0},
209 getGlobalOrArgAttrFromValue(*GVal)))
210 Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown());
211 } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) {
212 if (hasUsefulEdges(CExpr)) {
213 if (Graph.addNode(InstantiatedValue{CExpr, 0}))
214 visitConstantExpr(CExpr);
216 } else
217 Graph.addNode(InstantiatedValue{Val, 0}, Attr);
220 void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) {
221 assert(From != nullptr && To != nullptr);
222 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
223 return;
224 addNode(From);
225 if (To != From) {
226 addNode(To);
227 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0},
228 Offset);
232 void addDerefEdge(Value *From, Value *To, bool IsRead) {
233 assert(From != nullptr && To != nullptr);
234 // FIXME: This is subtly broken, due to how we model some instructions
235 // (e.g. extractvalue, extractelement) as loads. Since those take
236 // non-pointer operands, we'll entirely skip adding edges for those.
238 // addAssignEdge seems to have a similar issue with insertvalue, etc.
239 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
240 return;
241 addNode(From);
242 addNode(To);
243 if (IsRead) {
244 Graph.addNode(InstantiatedValue{From, 1});
245 Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0});
246 } else {
247 Graph.addNode(InstantiatedValue{To, 1});
248 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 1});
252 void addLoadEdge(Value *From, Value *To) { addDerefEdge(From, To, true); }
253 void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); }
255 public:
256 GetEdgesVisitor(CFLGraphBuilder &Builder, const DataLayout &DL)
257 : AA(Builder.Analysis), DL(DL), TLI(Builder.TLI), Graph(Builder.Graph),
258 ReturnValues(Builder.ReturnedValues) {}
260 void visitInstruction(Instruction &) {
261 llvm_unreachable("Unsupported instruction encountered");
264 void visitReturnInst(ReturnInst &Inst) {
265 if (auto RetVal = Inst.getReturnValue()) {
266 if (RetVal->getType()->isPointerTy()) {
267 addNode(RetVal);
268 ReturnValues.push_back(RetVal);
273 void visitPtrToIntInst(PtrToIntInst &Inst) {
274 auto *Ptr = Inst.getOperand(0);
275 addNode(Ptr, getAttrEscaped());
278 void visitIntToPtrInst(IntToPtrInst &Inst) {
279 auto *Ptr = &Inst;
280 addNode(Ptr, getAttrUnknown());
283 void visitCastInst(CastInst &Inst) {
284 auto *Src = Inst.getOperand(0);
285 addAssignEdge(Src, &Inst);
288 void visitBinaryOperator(BinaryOperator &Inst) {
289 auto *Op1 = Inst.getOperand(0);
290 auto *Op2 = Inst.getOperand(1);
291 addAssignEdge(Op1, &Inst);
292 addAssignEdge(Op2, &Inst);
295 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
296 auto *Ptr = Inst.getPointerOperand();
297 auto *Val = Inst.getNewValOperand();
298 addStoreEdge(Val, Ptr);
301 void visitAtomicRMWInst(AtomicRMWInst &Inst) {
302 auto *Ptr = Inst.getPointerOperand();
303 auto *Val = Inst.getValOperand();
304 addStoreEdge(Val, Ptr);
307 void visitPHINode(PHINode &Inst) {
308 for (Value *Val : Inst.incoming_values())
309 addAssignEdge(Val, &Inst);
312 void visitGEP(GEPOperator &GEPOp) {
313 uint64_t Offset = UnknownOffset;
314 APInt APOffset(DL.getPointerSizeInBits(GEPOp.getPointerAddressSpace()),
316 if (GEPOp.accumulateConstantOffset(DL, APOffset))
317 Offset = APOffset.getSExtValue();
319 auto *Op = GEPOp.getPointerOperand();
320 addAssignEdge(Op, &GEPOp, Offset);
323 void visitGetElementPtrInst(GetElementPtrInst &Inst) {
324 auto *GEPOp = cast<GEPOperator>(&Inst);
325 visitGEP(*GEPOp);
328 void visitSelectInst(SelectInst &Inst) {
329 // Condition is not processed here (The actual statement producing
330 // the condition result is processed elsewhere). For select, the
331 // condition is evaluated, but not loaded, stored, or assigned
332 // simply as a result of being the condition of a select.
334 auto *TrueVal = Inst.getTrueValue();
335 auto *FalseVal = Inst.getFalseValue();
336 addAssignEdge(TrueVal, &Inst);
337 addAssignEdge(FalseVal, &Inst);
340 void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); }
342 void visitLoadInst(LoadInst &Inst) {
343 auto *Ptr = Inst.getPointerOperand();
344 auto *Val = &Inst;
345 addLoadEdge(Ptr, Val);
348 void visitStoreInst(StoreInst &Inst) {
349 auto *Ptr = Inst.getPointerOperand();
350 auto *Val = Inst.getValueOperand();
351 addStoreEdge(Val, Ptr);
354 void visitVAArgInst(VAArgInst &Inst) {
355 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
356 // does
357 // two things:
358 // 1. Loads a value from *((T*)*Ptr).
359 // 2. Increments (stores to) *Ptr by some target-specific amount.
360 // For now, we'll handle this like a landingpad instruction (by placing
361 // the
362 // result in its own group, and having that group alias externals).
363 if (Inst.getType()->isPointerTy())
364 addNode(&Inst, getAttrUnknown());
367 static bool isFunctionExternal(Function *Fn) {
368 return !Fn->hasExactDefinition();
371 bool tryInterproceduralAnalysis(CallBase &Call,
372 const SmallVectorImpl<Function *> &Fns) {
373 assert(Fns.size() > 0);
375 if (Call.arg_size() > MaxSupportedArgsInSummary)
376 return false;
378 // Exit early if we'll fail anyway
379 for (auto *Fn : Fns) {
380 if (isFunctionExternal(Fn) || Fn->isVarArg())
381 return false;
382 // Fail if the caller does not provide enough arguments
383 assert(Fn->arg_size() <= Call.arg_size());
384 if (!AA.getAliasSummary(*Fn))
385 return false;
388 for (auto *Fn : Fns) {
389 auto Summary = AA.getAliasSummary(*Fn);
390 assert(Summary != nullptr);
392 auto &RetParamRelations = Summary->RetParamRelations;
393 for (auto &Relation : RetParamRelations) {
394 auto IRelation = instantiateExternalRelation(Relation, Call);
395 if (IRelation.hasValue()) {
396 Graph.addNode(IRelation->From);
397 Graph.addNode(IRelation->To);
398 Graph.addEdge(IRelation->From, IRelation->To);
402 auto &RetParamAttributes = Summary->RetParamAttributes;
403 for (auto &Attribute : RetParamAttributes) {
404 auto IAttr = instantiateExternalAttribute(Attribute, Call);
405 if (IAttr.hasValue())
406 Graph.addNode(IAttr->IValue, IAttr->Attr);
410 return true;
413 void visitCallBase(CallBase &Call) {
414 // Make sure all arguments and return value are added to the graph first
415 for (Value *V : Call.args())
416 if (V->getType()->isPointerTy())
417 addNode(V);
418 if (Call.getType()->isPointerTy())
419 addNode(&Call);
421 // Check if Inst is a call to a library function that
422 // allocates/deallocates on the heap. Those kinds of functions do not
423 // introduce any aliases.
424 // TODO: address other common library functions such as realloc(),
425 // strdup(), etc.
426 if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI))
427 return;
429 // TODO: Add support for noalias args/all the other fun function
430 // attributes that we can tack on.
431 SmallVector<Function *, 4> Targets;
432 if (getPossibleTargets(Call, Targets))
433 if (tryInterproceduralAnalysis(Call, Targets))
434 return;
436 // Because the function is opaque, we need to note that anything
437 // could have happened to the arguments (unless the function is marked
438 // readonly or readnone), and that the result could alias just about
439 // anything, too (unless the result is marked noalias).
440 if (!Call.onlyReadsMemory())
441 for (Value *V : Call.args()) {
442 if (V->getType()->isPointerTy()) {
443 // The argument itself escapes.
444 Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
445 // The fate of argument memory is unknown. Note that since
446 // AliasAttrs is transitive with respect to dereference, we only
447 // need to specify it for the first-level memory.
448 Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown());
452 if (Call.getType()->isPointerTy()) {
453 auto *Fn = Call.getCalledFunction();
454 if (Fn == nullptr || !Fn->returnDoesNotAlias())
455 // No need to call addNode() since we've added Inst at the
456 // beginning of this function and we know it is not a global.
457 Graph.addAttr(InstantiatedValue{&Call, 0}, getAttrUnknown());
461 /// Because vectors/aggregates are immutable and unaddressable, there's
462 /// nothing we can do to coax a value out of them, other than calling
463 /// Extract{Element,Value}. We can effectively treat them as pointers to
464 /// arbitrary memory locations we can store in and load from.
465 void visitExtractElementInst(ExtractElementInst &Inst) {
466 auto *Ptr = Inst.getVectorOperand();
467 auto *Val = &Inst;
468 addLoadEdge(Ptr, Val);
471 void visitInsertElementInst(InsertElementInst &Inst) {
472 auto *Vec = Inst.getOperand(0);
473 auto *Val = Inst.getOperand(1);
474 addAssignEdge(Vec, &Inst);
475 addStoreEdge(Val, &Inst);
478 void visitLandingPadInst(LandingPadInst &Inst) {
479 // Exceptions come from "nowhere", from our analysis' perspective.
480 // So we place the instruction its own group, noting that said group may
481 // alias externals
482 if (Inst.getType()->isPointerTy())
483 addNode(&Inst, getAttrUnknown());
486 void visitInsertValueInst(InsertValueInst &Inst) {
487 auto *Agg = Inst.getOperand(0);
488 auto *Val = Inst.getOperand(1);
489 addAssignEdge(Agg, &Inst);
490 addStoreEdge(Val, &Inst);
493 void visitExtractValueInst(ExtractValueInst &Inst) {
494 auto *Ptr = Inst.getAggregateOperand();
495 addLoadEdge(Ptr, &Inst);
498 void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
499 auto *From1 = Inst.getOperand(0);
500 auto *From2 = Inst.getOperand(1);
501 addAssignEdge(From1, &Inst);
502 addAssignEdge(From2, &Inst);
505 void visitConstantExpr(ConstantExpr *CE) {
506 switch (CE->getOpcode()) {
507 case Instruction::GetElementPtr: {
508 auto GEPOp = cast<GEPOperator>(CE);
509 visitGEP(*GEPOp);
510 break;
513 case Instruction::PtrToInt: {
514 addNode(CE->getOperand(0), getAttrEscaped());
515 break;
518 case Instruction::IntToPtr: {
519 addNode(CE, getAttrUnknown());
520 break;
523 case Instruction::BitCast:
524 case Instruction::AddrSpaceCast:
525 case Instruction::Trunc:
526 case Instruction::ZExt:
527 case Instruction::SExt:
528 case Instruction::FPExt:
529 case Instruction::FPTrunc:
530 case Instruction::UIToFP:
531 case Instruction::SIToFP:
532 case Instruction::FPToUI:
533 case Instruction::FPToSI: {
534 addAssignEdge(CE->getOperand(0), CE);
535 break;
538 case Instruction::Select: {
539 addAssignEdge(CE->getOperand(1), CE);
540 addAssignEdge(CE->getOperand(2), CE);
541 break;
544 case Instruction::InsertElement:
545 case Instruction::InsertValue: {
546 addAssignEdge(CE->getOperand(0), CE);
547 addStoreEdge(CE->getOperand(1), CE);
548 break;
551 case Instruction::ExtractElement:
552 case Instruction::ExtractValue: {
553 addLoadEdge(CE->getOperand(0), CE);
554 break;
557 case Instruction::Add:
558 case Instruction::Sub:
559 case Instruction::FSub:
560 case Instruction::Mul:
561 case Instruction::FMul:
562 case Instruction::UDiv:
563 case Instruction::SDiv:
564 case Instruction::FDiv:
565 case Instruction::URem:
566 case Instruction::SRem:
567 case Instruction::FRem:
568 case Instruction::And:
569 case Instruction::Or:
570 case Instruction::Xor:
571 case Instruction::Shl:
572 case Instruction::LShr:
573 case Instruction::AShr:
574 case Instruction::ICmp:
575 case Instruction::FCmp:
576 case Instruction::ShuffleVector: {
577 addAssignEdge(CE->getOperand(0), CE);
578 addAssignEdge(CE->getOperand(1), CE);
579 break;
582 default:
583 llvm_unreachable("Unknown instruction type encountered!");
588 // Helper functions
590 // Determines whether or not we an instruction is useless to us (e.g.
591 // FenceInst)
592 static bool hasUsefulEdges(Instruction *Inst) {
593 bool IsNonInvokeRetTerminator = Inst->isTerminator() &&
594 !isa<InvokeInst>(Inst) &&
595 !isa<ReturnInst>(Inst);
596 return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
597 !IsNonInvokeRetTerminator;
600 void addArgumentToGraph(Argument &Arg) {
601 if (Arg.getType()->isPointerTy()) {
602 Graph.addNode(InstantiatedValue{&Arg, 0},
603 getGlobalOrArgAttrFromValue(Arg));
604 // Pointees of a formal parameter is known to the caller
605 Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller());
609 // Given an Instruction, this will add it to the graph, along with any
610 // Instructions that are potentially only available from said Instruction
611 // For example, given the following line:
612 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
613 // addInstructionToGraph would add both the `load` and `getelementptr`
614 // instructions to the graph appropriately.
615 void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) {
616 if (!hasUsefulEdges(&Inst))
617 return;
619 Visitor.visit(Inst);
622 // Builds the graph needed for constructing the StratifiedSets for the given
623 // function
624 void buildGraphFrom(Function &Fn) {
625 GetEdgesVisitor Visitor(*this, Fn.getParent()->getDataLayout());
627 for (auto &Bb : Fn.getBasicBlockList())
628 for (auto &Inst : Bb.getInstList())
629 addInstructionToGraph(Visitor, Inst);
631 for (auto &Arg : Fn.args())
632 addArgumentToGraph(Arg);
635 public:
636 CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
637 : Analysis(Analysis), TLI(TLI) {
638 buildGraphFrom(Fn);
641 const CFLGraph &getCFLGraph() const { return Graph; }
642 const SmallVector<Value *, 4> &getReturnValues() const {
643 return ReturnedValues;
647 } // end namespace cflaa
648 } // end namespace llvm
650 #endif // LLVM_LIB_ANALYSIS_CFLGRAPH_H