[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Analysis / CFLGraph.h
blob02a13d673f40bee2b61e2b4f138784305ad5fba6
1 //===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
11 /// alias analysis.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_ANALYSIS_CFLGRAPH_H
16 #define LLVM_LIB_ANALYSIS_CFLGRAPH_H
18 #include "AliasAnalysisSummary.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/MemoryBuiltins.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/IR/Argument.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/InstVisitor.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Operator.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/ErrorHandling.h"
40 #include <cassert>
41 #include <cstdint>
42 #include <vector>
44 namespace llvm {
45 namespace cflaa {
47 /// The Program Expression Graph (PEG) of CFL analysis
48 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
49 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
50 /// the main purpose of this graph is to abstract away unrelated facts and
51 /// translate the rest into a form that can be easily digested by CFL analyses.
52 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
53 /// pointer assignment between InstantiatedValue. Pointer
54 /// references/dereferences are not explicitly stored in the graph: we
55 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
56 /// I+1) and a reference edge to (X, I-1).
57 class CFLGraph {
58 public:
59 using Node = InstantiatedValue;
61 struct Edge {
62 Node Other;
63 int64_t Offset;
66 using EdgeList = std::vector<Edge>;
68 struct NodeInfo {
69 EdgeList Edges, ReverseEdges;
70 AliasAttrs Attr;
73 class ValueInfo {
74 std::vector<NodeInfo> Levels;
76 public:
77 bool addNodeToLevel(unsigned Level) {
78 auto NumLevels = Levels.size();
79 if (NumLevels > Level)
80 return false;
81 Levels.resize(Level + 1);
82 return true;
85 NodeInfo &getNodeInfoAtLevel(unsigned Level) {
86 assert(Level < Levels.size());
87 return Levels[Level];
89 const NodeInfo &getNodeInfoAtLevel(unsigned Level) const {
90 assert(Level < Levels.size());
91 return Levels[Level];
94 unsigned getNumLevels() const { return Levels.size(); }
97 private:
98 using ValueMap = DenseMap<Value *, ValueInfo>;
100 ValueMap ValueImpls;
102 NodeInfo *getNode(Node N) {
103 auto Itr = ValueImpls.find(N.Val);
104 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
105 return nullptr;
106 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
109 public:
110 using const_value_iterator = ValueMap::const_iterator;
112 bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
113 assert(N.Val != nullptr);
114 auto &ValInfo = ValueImpls[N.Val];
115 auto Changed = ValInfo.addNodeToLevel(N.DerefLevel);
116 ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr;
117 return Changed;
120 void addAttr(Node N, AliasAttrs Attr) {
121 auto *Info = getNode(N);
122 assert(Info != nullptr);
123 Info->Attr |= Attr;
126 void addEdge(Node From, Node To, int64_t Offset = 0) {
127 auto *FromInfo = getNode(From);
128 assert(FromInfo != nullptr);
129 auto *ToInfo = getNode(To);
130 assert(ToInfo != nullptr);
132 FromInfo->Edges.push_back(Edge{To, Offset});
133 ToInfo->ReverseEdges.push_back(Edge{From, Offset});
136 const NodeInfo *getNode(Node N) const {
137 auto Itr = ValueImpls.find(N.Val);
138 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
139 return nullptr;
140 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
143 AliasAttrs attrFor(Node N) const {
144 auto *Info = getNode(N);
145 assert(Info != nullptr);
146 return Info->Attr;
149 iterator_range<const_value_iterator> value_mappings() const {
150 return make_range<const_value_iterator>(ValueImpls.begin(),
151 ValueImpls.end());
155 /// A builder class used to create CFLGraph instance from a given function
156 /// The CFL-AA that uses this builder must provide its own type as a template
157 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
158 /// needs a way of obtaining the summary of other functions when callinsts are
159 /// encountered.
160 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
161 /// member function that takes a Function& and returns the corresponding summary
162 /// as a const AliasSummary*.
163 template <typename CFLAA> class CFLGraphBuilder {
164 // Input of the builder
165 CFLAA &Analysis;
166 const TargetLibraryInfo &TLI;
168 // Output of the builder
169 CFLGraph Graph;
170 SmallVector<Value *, 4> ReturnedValues;
172 // Helper class
173 /// Gets the edges our graph should have, based on an Instruction*
174 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
175 CFLAA &AA;
176 const DataLayout &DL;
177 const TargetLibraryInfo &TLI;
179 CFLGraph &Graph;
180 SmallVectorImpl<Value *> &ReturnValues;
182 static bool hasUsefulEdges(ConstantExpr *CE) {
183 // ConstantExpr doesn't have terminators, invokes, or fences, so only
184 // needs to check for compares.
185 return CE->getOpcode() != Instruction::ICmp &&
186 CE->getOpcode() != Instruction::FCmp;
189 // Returns possible functions called by CS into the given SmallVectorImpl.
190 // Returns true if targets found, false otherwise.
191 static bool getPossibleTargets(CallBase &Call,
192 SmallVectorImpl<Function *> &Output) {
193 if (auto *Fn = Call.getCalledFunction()) {
194 Output.push_back(Fn);
195 return true;
198 // TODO: If the call is indirect, we might be able to enumerate all
199 // potential targets of the call and return them, rather than just
200 // failing.
201 return false;
204 void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) {
205 assert(Val != nullptr && Val->getType()->isPointerTy());
206 if (auto GVal = dyn_cast<GlobalValue>(Val)) {
207 if (Graph.addNode(InstantiatedValue{GVal, 0},
208 getGlobalOrArgAttrFromValue(*GVal)))
209 Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown());
210 } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) {
211 if (hasUsefulEdges(CExpr)) {
212 if (Graph.addNode(InstantiatedValue{CExpr, 0}))
213 visitConstantExpr(CExpr);
215 } else
216 Graph.addNode(InstantiatedValue{Val, 0}, Attr);
219 void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) {
220 assert(From != nullptr && To != nullptr);
221 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
222 return;
223 addNode(From);
224 if (To != From) {
225 addNode(To);
226 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0},
227 Offset);
231 void addDerefEdge(Value *From, Value *To, bool IsRead) {
232 assert(From != nullptr && To != nullptr);
233 // FIXME: This is subtly broken, due to how we model some instructions
234 // (e.g. extractvalue, extractelement) as loads. Since those take
235 // non-pointer operands, we'll entirely skip adding edges for those.
237 // addAssignEdge seems to have a similar issue with insertvalue, etc.
238 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
239 return;
240 addNode(From);
241 addNode(To);
242 if (IsRead) {
243 Graph.addNode(InstantiatedValue{From, 1});
244 Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0});
245 } else {
246 Graph.addNode(InstantiatedValue{To, 1});
247 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 1});
251 void addLoadEdge(Value *From, Value *To) { addDerefEdge(From, To, true); }
252 void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); }
254 public:
255 GetEdgesVisitor(CFLGraphBuilder &Builder, const DataLayout &DL)
256 : AA(Builder.Analysis), DL(DL), TLI(Builder.TLI), Graph(Builder.Graph),
257 ReturnValues(Builder.ReturnedValues) {}
259 void visitInstruction(Instruction &) {
260 llvm_unreachable("Unsupported instruction encountered");
263 void visitReturnInst(ReturnInst &Inst) {
264 if (auto RetVal = Inst.getReturnValue()) {
265 if (RetVal->getType()->isPointerTy()) {
266 addNode(RetVal);
267 ReturnValues.push_back(RetVal);
272 void visitPtrToIntInst(PtrToIntInst &Inst) {
273 auto *Ptr = Inst.getOperand(0);
274 addNode(Ptr, getAttrEscaped());
277 void visitIntToPtrInst(IntToPtrInst &Inst) {
278 auto *Ptr = &Inst;
279 addNode(Ptr, getAttrUnknown());
282 void visitCastInst(CastInst &Inst) {
283 auto *Src = Inst.getOperand(0);
284 addAssignEdge(Src, &Inst);
287 void visitFreezeInst(FreezeInst &Inst) {
288 // Accessing freeze(ptr) is equivalent to accessing ptr.
289 // The former raises UB iff latter raises UB.
290 auto *Src = Inst.getOperand(0);
291 addAssignEdge(Src, &Inst);
294 void visitBinaryOperator(BinaryOperator &Inst) {
295 auto *Op1 = Inst.getOperand(0);
296 auto *Op2 = Inst.getOperand(1);
297 addAssignEdge(Op1, &Inst);
298 addAssignEdge(Op2, &Inst);
301 void visitUnaryOperator(UnaryOperator &Inst) {
302 auto *Src = Inst.getOperand(0);
303 addAssignEdge(Src, &Inst);
306 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
307 auto *Ptr = Inst.getPointerOperand();
308 auto *Val = Inst.getNewValOperand();
309 addStoreEdge(Val, Ptr);
312 void visitAtomicRMWInst(AtomicRMWInst &Inst) {
313 auto *Ptr = Inst.getPointerOperand();
314 auto *Val = Inst.getValOperand();
315 addStoreEdge(Val, Ptr);
318 void visitPHINode(PHINode &Inst) {
319 for (Value *Val : Inst.incoming_values())
320 addAssignEdge(Val, &Inst);
323 void visitGEP(GEPOperator &GEPOp) {
324 uint64_t Offset = UnknownOffset;
325 APInt APOffset(DL.getPointerSizeInBits(GEPOp.getPointerAddressSpace()),
327 if (GEPOp.accumulateConstantOffset(DL, APOffset))
328 Offset = APOffset.getSExtValue();
330 auto *Op = GEPOp.getPointerOperand();
331 addAssignEdge(Op, &GEPOp, Offset);
334 void visitGetElementPtrInst(GetElementPtrInst &Inst) {
335 auto *GEPOp = cast<GEPOperator>(&Inst);
336 visitGEP(*GEPOp);
339 void visitSelectInst(SelectInst &Inst) {
340 // Condition is not processed here (The actual statement producing
341 // the condition result is processed elsewhere). For select, the
342 // condition is evaluated, but not loaded, stored, or assigned
343 // simply as a result of being the condition of a select.
345 auto *TrueVal = Inst.getTrueValue();
346 auto *FalseVal = Inst.getFalseValue();
347 addAssignEdge(TrueVal, &Inst);
348 addAssignEdge(FalseVal, &Inst);
351 void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); }
353 void visitLoadInst(LoadInst &Inst) {
354 auto *Ptr = Inst.getPointerOperand();
355 auto *Val = &Inst;
356 addLoadEdge(Ptr, Val);
359 void visitStoreInst(StoreInst &Inst) {
360 auto *Ptr = Inst.getPointerOperand();
361 auto *Val = Inst.getValueOperand();
362 addStoreEdge(Val, Ptr);
365 void visitVAArgInst(VAArgInst &Inst) {
366 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
367 // does
368 // two things:
369 // 1. Loads a value from *((T*)*Ptr).
370 // 2. Increments (stores to) *Ptr by some target-specific amount.
371 // For now, we'll handle this like a landingpad instruction (by placing
372 // the
373 // result in its own group, and having that group alias externals).
374 if (Inst.getType()->isPointerTy())
375 addNode(&Inst, getAttrUnknown());
378 static bool isFunctionExternal(Function *Fn) {
379 return !Fn->hasExactDefinition();
382 bool tryInterproceduralAnalysis(CallBase &Call,
383 const SmallVectorImpl<Function *> &Fns) {
384 assert(Fns.size() > 0);
386 if (Call.arg_size() > MaxSupportedArgsInSummary)
387 return false;
389 // Exit early if we'll fail anyway
390 for (auto *Fn : Fns) {
391 if (isFunctionExternal(Fn) || Fn->isVarArg())
392 return false;
393 // Fail if the caller does not provide enough arguments
394 assert(Fn->arg_size() <= Call.arg_size());
395 if (!AA.getAliasSummary(*Fn))
396 return false;
399 for (auto *Fn : Fns) {
400 auto Summary = AA.getAliasSummary(*Fn);
401 assert(Summary != nullptr);
403 auto &RetParamRelations = Summary->RetParamRelations;
404 for (auto &Relation : RetParamRelations) {
405 auto IRelation = instantiateExternalRelation(Relation, Call);
406 if (IRelation.hasValue()) {
407 Graph.addNode(IRelation->From);
408 Graph.addNode(IRelation->To);
409 Graph.addEdge(IRelation->From, IRelation->To);
413 auto &RetParamAttributes = Summary->RetParamAttributes;
414 for (auto &Attribute : RetParamAttributes) {
415 auto IAttr = instantiateExternalAttribute(Attribute, Call);
416 if (IAttr.hasValue())
417 Graph.addNode(IAttr->IValue, IAttr->Attr);
421 return true;
424 void visitCallBase(CallBase &Call) {
425 // Make sure all arguments and return value are added to the graph first
426 for (Value *V : Call.args())
427 if (V->getType()->isPointerTy())
428 addNode(V);
429 if (Call.getType()->isPointerTy())
430 addNode(&Call);
432 // Check if Inst is a call to a library function that
433 // allocates/deallocates on the heap. Those kinds of functions do not
434 // introduce any aliases.
435 // TODO: address other common library functions such as realloc(),
436 // strdup(), etc.
437 if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI))
438 return;
440 // TODO: Add support for noalias args/all the other fun function
441 // attributes that we can tack on.
442 SmallVector<Function *, 4> Targets;
443 if (getPossibleTargets(Call, Targets))
444 if (tryInterproceduralAnalysis(Call, Targets))
445 return;
447 // Because the function is opaque, we need to note that anything
448 // could have happened to the arguments (unless the function is marked
449 // readonly or readnone), and that the result could alias just about
450 // anything, too (unless the result is marked noalias).
451 if (!Call.onlyReadsMemory())
452 for (Value *V : Call.args()) {
453 if (V->getType()->isPointerTy()) {
454 // The argument itself escapes.
455 Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
456 // The fate of argument memory is unknown. Note that since
457 // AliasAttrs is transitive with respect to dereference, we only
458 // need to specify it for the first-level memory.
459 Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown());
463 if (Call.getType()->isPointerTy()) {
464 auto *Fn = Call.getCalledFunction();
465 if (Fn == nullptr || !Fn->returnDoesNotAlias())
466 // No need to call addNode() since we've added Inst at the
467 // beginning of this function and we know it is not a global.
468 Graph.addAttr(InstantiatedValue{&Call, 0}, getAttrUnknown());
472 /// Because vectors/aggregates are immutable and unaddressable, there's
473 /// nothing we can do to coax a value out of them, other than calling
474 /// Extract{Element,Value}. We can effectively treat them as pointers to
475 /// arbitrary memory locations we can store in and load from.
476 void visitExtractElementInst(ExtractElementInst &Inst) {
477 auto *Ptr = Inst.getVectorOperand();
478 auto *Val = &Inst;
479 addLoadEdge(Ptr, Val);
482 void visitInsertElementInst(InsertElementInst &Inst) {
483 auto *Vec = Inst.getOperand(0);
484 auto *Val = Inst.getOperand(1);
485 addAssignEdge(Vec, &Inst);
486 addStoreEdge(Val, &Inst);
489 void visitLandingPadInst(LandingPadInst &Inst) {
490 // Exceptions come from "nowhere", from our analysis' perspective.
491 // So we place the instruction its own group, noting that said group may
492 // alias externals
493 if (Inst.getType()->isPointerTy())
494 addNode(&Inst, getAttrUnknown());
497 void visitInsertValueInst(InsertValueInst &Inst) {
498 auto *Agg = Inst.getOperand(0);
499 auto *Val = Inst.getOperand(1);
500 addAssignEdge(Agg, &Inst);
501 addStoreEdge(Val, &Inst);
504 void visitExtractValueInst(ExtractValueInst &Inst) {
505 auto *Ptr = Inst.getAggregateOperand();
506 addLoadEdge(Ptr, &Inst);
509 void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
510 auto *From1 = Inst.getOperand(0);
511 auto *From2 = Inst.getOperand(1);
512 addAssignEdge(From1, &Inst);
513 addAssignEdge(From2, &Inst);
516 void visitConstantExpr(ConstantExpr *CE) {
517 switch (CE->getOpcode()) {
518 case Instruction::GetElementPtr: {
519 auto GEPOp = cast<GEPOperator>(CE);
520 visitGEP(*GEPOp);
521 break;
524 case Instruction::PtrToInt: {
525 addNode(CE->getOperand(0), getAttrEscaped());
526 break;
529 case Instruction::IntToPtr: {
530 addNode(CE, getAttrUnknown());
531 break;
534 case Instruction::BitCast:
535 case Instruction::AddrSpaceCast:
536 case Instruction::Trunc:
537 case Instruction::ZExt:
538 case Instruction::SExt:
539 case Instruction::FPExt:
540 case Instruction::FPTrunc:
541 case Instruction::UIToFP:
542 case Instruction::SIToFP:
543 case Instruction::FPToUI:
544 case Instruction::FPToSI: {
545 addAssignEdge(CE->getOperand(0), CE);
546 break;
549 case Instruction::Select: {
550 addAssignEdge(CE->getOperand(1), CE);
551 addAssignEdge(CE->getOperand(2), CE);
552 break;
555 case Instruction::InsertElement:
556 case Instruction::InsertValue: {
557 addAssignEdge(CE->getOperand(0), CE);
558 addStoreEdge(CE->getOperand(1), CE);
559 break;
562 case Instruction::ExtractElement:
563 case Instruction::ExtractValue: {
564 addLoadEdge(CE->getOperand(0), CE);
565 break;
568 case Instruction::Add:
569 case Instruction::FAdd:
570 case Instruction::Sub:
571 case Instruction::FSub:
572 case Instruction::Mul:
573 case Instruction::FMul:
574 case Instruction::UDiv:
575 case Instruction::SDiv:
576 case Instruction::FDiv:
577 case Instruction::URem:
578 case Instruction::SRem:
579 case Instruction::FRem:
580 case Instruction::And:
581 case Instruction::Or:
582 case Instruction::Xor:
583 case Instruction::Shl:
584 case Instruction::LShr:
585 case Instruction::AShr:
586 case Instruction::ICmp:
587 case Instruction::FCmp:
588 case Instruction::ShuffleVector: {
589 addAssignEdge(CE->getOperand(0), CE);
590 addAssignEdge(CE->getOperand(1), CE);
591 break;
594 case Instruction::FNeg: {
595 addAssignEdge(CE->getOperand(0), CE);
596 break;
599 default:
600 llvm_unreachable("Unknown instruction type encountered!");
605 // Helper functions
607 // Determines whether or not we an instruction is useless to us (e.g.
608 // FenceInst)
609 static bool hasUsefulEdges(Instruction *Inst) {
610 bool IsNonInvokeRetTerminator = Inst->isTerminator() &&
611 !isa<InvokeInst>(Inst) &&
612 !isa<ReturnInst>(Inst);
613 return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
614 !IsNonInvokeRetTerminator;
617 void addArgumentToGraph(Argument &Arg) {
618 if (Arg.getType()->isPointerTy()) {
619 Graph.addNode(InstantiatedValue{&Arg, 0},
620 getGlobalOrArgAttrFromValue(Arg));
621 // Pointees of a formal parameter is known to the caller
622 Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller());
626 // Given an Instruction, this will add it to the graph, along with any
627 // Instructions that are potentially only available from said Instruction
628 // For example, given the following line:
629 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
630 // addInstructionToGraph would add both the `load` and `getelementptr`
631 // instructions to the graph appropriately.
632 void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) {
633 if (!hasUsefulEdges(&Inst))
634 return;
636 Visitor.visit(Inst);
639 // Builds the graph needed for constructing the StratifiedSets for the given
640 // function
641 void buildGraphFrom(Function &Fn) {
642 GetEdgesVisitor Visitor(*this, Fn.getParent()->getDataLayout());
644 for (auto &Bb : Fn.getBasicBlockList())
645 for (auto &Inst : Bb.getInstList())
646 addInstructionToGraph(Visitor, Inst);
648 for (auto &Arg : Fn.args())
649 addArgumentToGraph(Arg);
652 public:
653 CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
654 : Analysis(Analysis), TLI(TLI) {
655 buildGraphFrom(Fn);
658 const CFLGraph &getCFLGraph() const { return Graph; }
659 const SmallVector<Value *, 4> &getReturnValues() const {
660 return ReturnedValues;
664 } // end namespace cflaa
665 } // end namespace llvm
667 #endif // LLVM_LIB_ANALYSIS_CFLGRAPH_H