1 //===- DependenceGraphBuilder.cpp ------------------------------------------==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file implements common steps of the build algorithm for construction
9 // of dependence graphs such as DDG and PDG.
10 //===----------------------------------------------------------------------===//
12 #include "llvm/Analysis/DependenceGraphBuilder.h"
13 #include "llvm/ADT/DepthFirstIterator.h"
14 #include "llvm/ADT/EnumeratedArray.h"
15 #include "llvm/ADT/PostOrderIterator.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/DDG.h"
22 #define DEBUG_TYPE "dgb"
24 STATISTIC(TotalGraphs
, "Number of dependence graphs created.");
25 STATISTIC(TotalDefUseEdges
, "Number of def-use edges created.");
26 STATISTIC(TotalMemoryEdges
, "Number of memory dependence edges created.");
27 STATISTIC(TotalFineGrainedNodes
, "Number of fine-grained nodes created.");
28 STATISTIC(TotalPiBlockNodes
, "Number of pi-block nodes created.");
29 STATISTIC(TotalConfusedEdges
,
30 "Number of confused memory dependencies between two nodes.");
31 STATISTIC(TotalEdgeReversals
,
32 "Number of times the source and sink of dependence was reversed to "
33 "expose cycles in the graph.");
35 using InstructionListType
= SmallVector
<Instruction
*, 2>;
37 //===--------------------------------------------------------------------===//
38 // AbstractDependenceGraphBuilder implementation
39 //===--------------------------------------------------------------------===//
42 void AbstractDependenceGraphBuilder
<G
>::computeInstructionOrdinals() {
43 // The BBList is expected to be in program order.
44 size_t NextOrdinal
= 1;
45 for (auto *BB
: BBList
)
47 InstOrdinalMap
.insert(std::make_pair(&I
, NextOrdinal
++));
51 void AbstractDependenceGraphBuilder
<G
>::createFineGrainedNodes() {
53 assert(IMap
.empty() && "Expected empty instruction map at start");
54 for (BasicBlock
*BB
: BBList
)
55 for (Instruction
&I
: *BB
) {
56 auto &NewNode
= createFineGrainedNode(I
);
57 IMap
.insert(std::make_pair(&I
, &NewNode
));
58 NodeOrdinalMap
.insert(std::make_pair(&NewNode
, getOrdinal(I
)));
59 ++TotalFineGrainedNodes
;
64 void AbstractDependenceGraphBuilder
<G
>::createAndConnectRootNode() {
65 // Create a root node that connects to every connected component of the graph.
66 // This is done to allow graph iterators to visit all the disjoint components
67 // of the graph, in a single walk.
69 // This algorithm works by going through each node of the graph and for each
70 // node N, do a DFS starting from N. A rooted edge is established between the
71 // root node and N (if N is not yet visited). All the nodes reachable from N
72 // are marked as visited and are skipped in the DFS of subsequent nodes.
74 // Note: This algorithm tries to limit the number of edges out of the root
75 // node to some extent, but there may be redundant edges created depending on
76 // the iteration order. For example for a graph {A -> B}, an edge from the
77 // root node is added to both nodes if B is visited before A. While it does
78 // not result in minimal number of edges, this approach saves compile-time
79 // while keeping the number of edges in check.
80 auto &RootNode
= createRootNode();
81 df_iterator_default_set
<const NodeType
*, 4> Visited
;
82 for (auto *N
: Graph
) {
85 for (auto I
: depth_first_ext(N
, Visited
))
87 createRootedEdge(RootNode
, *N
);
91 template <class G
> void AbstractDependenceGraphBuilder
<G
>::createPiBlocks() {
92 if (!shouldCreatePiBlocks())
95 LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
97 // The overall algorithm is as follows:
98 // 1. Identify SCCs and for each SCC create a pi-block node containing all
99 // the nodes in that SCC.
100 // 2. Identify incoming edges incident to the nodes inside of the SCC and
101 // reconnect them to the pi-block node.
102 // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
103 // outside of it and reconnect them so that the edges are coming out of the
106 // Adding nodes as we iterate through the SCCs cause the SCC
107 // iterators to get invalidated. To prevent this invalidation, we first
108 // collect a list of nodes that are part of an SCC, and then iterate over
109 // those lists to create the pi-block nodes. Each element of the list is a
110 // list of nodes in an SCC. Note: trivial SCCs containing a single node are
112 SmallVector
<NodeListType
, 4> ListOfSCCs
;
113 for (auto &SCC
: make_range(scc_begin(&Graph
), scc_end(&Graph
))) {
115 ListOfSCCs
.emplace_back(SCC
.begin(), SCC
.end());
118 for (NodeListType
&NL
: ListOfSCCs
) {
119 LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL
.size()
120 << " nodes in it.\n");
122 // SCC iterator may put the nodes in an order that's different from the
123 // program order. To preserve original program order, we sort the list of
124 // nodes based on ordinal numbers computed earlier.
125 llvm::sort(NL
, [&](NodeType
*LHS
, NodeType
*RHS
) {
126 return getOrdinal(*LHS
) < getOrdinal(*RHS
);
129 NodeType
&PiNode
= createPiBlock(NL
);
132 // Build a set to speed up the lookup for edges whose targets
133 // are inside the SCC.
134 SmallPtrSet
<NodeType
*, 4> NodesInSCC(NL
.begin(), NL
.end());
136 // We have the set of nodes in the SCC. We go through the set of nodes
137 // that are outside of the SCC and look for edges that cross the two sets.
138 for (NodeType
*N
: Graph
) {
140 // Skip the SCC node and all the nodes inside of it.
141 if (*N
== PiNode
|| NodesInSCC
.count(N
))
145 Incoming
, // Incoming edges to the SCC
146 Outgoing
, // Edges going ot of the SCC
147 DirectionCount
// To make the enum usable as an array index.
150 // Use these flags to help us avoid creating redundant edges. If there
151 // are more than one edges from an outside node to inside nodes, we only
152 // keep one edge from that node to the pi-block node. Similarly, if
153 // there are more than one edges from inside nodes to an outside node,
154 // we only keep one edge from the pi-block node to the outside node.
155 // There is a flag defined for each direction (incoming vs outgoing) and
156 // for each type of edge supported, using a two-dimensional boolean
158 using EdgeKind
= typename
EdgeType::EdgeKind
;
159 EnumeratedArray
<bool, EdgeKind
> EdgeAlreadyCreated
[DirectionCount
]{false,
162 auto createEdgeOfKind
= [this](NodeType
&Src
, NodeType
&Dst
,
165 case EdgeKind::RegisterDefUse
:
166 createDefUseEdge(Src
, Dst
);
168 case EdgeKind::MemoryDependence
:
169 createMemoryEdge(Src
, Dst
);
171 case EdgeKind::Rooted
:
172 createRootedEdge(Src
, Dst
);
175 llvm_unreachable("Unsupported type of edge.");
179 auto reconnectEdges
= [&](NodeType
*Src
, NodeType
*Dst
, NodeType
*New
,
180 const Direction Dir
) {
181 if (!Src
->hasEdgeTo(*Dst
))
184 dbgs() << "reconnecting("
185 << (Dir
== Direction::Incoming
? "incoming)" : "outgoing)")
186 << ":\nSrc:" << *Src
<< "\nDst:" << *Dst
<< "\nNew:" << *New
188 assert((Dir
== Direction::Incoming
|| Dir
== Direction::Outgoing
) &&
189 "Invalid direction.");
191 SmallVector
<EdgeType
*, 10> EL
;
192 Src
->findEdgesTo(*Dst
, EL
);
193 for (EdgeType
*OldEdge
: EL
) {
194 EdgeKind Kind
= OldEdge
->getKind();
195 if (!EdgeAlreadyCreated
[Dir
][Kind
]) {
196 if (Dir
== Direction::Incoming
) {
197 createEdgeOfKind(*Src
, *New
, Kind
);
198 LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
199 } else if (Dir
== Direction::Outgoing
) {
200 createEdgeOfKind(*New
, *Dst
, Kind
);
201 LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
203 EdgeAlreadyCreated
[Dir
][Kind
] = true;
205 Src
->removeEdge(*OldEdge
);
206 destroyEdge(*OldEdge
);
207 LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
211 for (NodeType
*SCCNode
: NL
) {
212 // Process incoming edges incident to the pi-block node.
213 reconnectEdges(N
, SCCNode
, &PiNode
, Direction::Incoming
);
215 // Process edges that are coming out of the pi-block node.
216 reconnectEdges(SCCNode
, N
, &PiNode
, Direction::Outgoing
);
221 // Ordinal maps are no longer needed.
222 InstOrdinalMap
.clear();
223 NodeOrdinalMap
.clear();
225 LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
228 template <class G
> void AbstractDependenceGraphBuilder
<G
>::createDefUseEdges() {
229 for (NodeType
*N
: Graph
) {
230 InstructionListType SrcIList
;
231 N
->collectInstructions([](const Instruction
*I
) { return true; }, SrcIList
);
233 // Use a set to mark the targets that we link to N, so we don't add
234 // duplicate def-use edges when more than one instruction in a target node
235 // use results of instructions that are contained in N.
236 SmallPtrSet
<NodeType
*, 4> VisitedTargets
;
238 for (Instruction
*II
: SrcIList
) {
239 for (User
*U
: II
->users()) {
240 Instruction
*UI
= dyn_cast
<Instruction
>(U
);
243 NodeType
*DstNode
= nullptr;
244 if (IMap
.find(UI
) != IMap
.end())
245 DstNode
= IMap
.find(UI
)->second
;
247 // In the case of loops, the scope of the subgraph is all the
248 // basic blocks (and instructions within them) belonging to the loop. We
249 // simply ignore all the edges coming from (or going into) instructions
250 // or basic blocks outside of this range.
254 << "skipped def-use edge since the sink" << *UI
255 << " is outside the range of instructions being considered.\n");
259 // Self dependencies are ignored because they are redundant and
263 << "skipped def-use edge since the sink and the source ("
264 << N
<< ") are the same.\n");
268 if (VisitedTargets
.insert(DstNode
).second
) {
269 createDefUseEdge(*N
, *DstNode
);
278 void AbstractDependenceGraphBuilder
<G
>::createMemoryDependencyEdges() {
279 using DGIterator
= typename
G::iterator
;
280 auto isMemoryAccess
= [](const Instruction
*I
) {
281 return I
->mayReadOrWriteMemory();
283 for (DGIterator SrcIt
= Graph
.begin(), E
= Graph
.end(); SrcIt
!= E
; ++SrcIt
) {
284 InstructionListType SrcIList
;
285 (*SrcIt
)->collectInstructions(isMemoryAccess
, SrcIList
);
286 if (SrcIList
.empty())
289 for (DGIterator DstIt
= SrcIt
; DstIt
!= E
; ++DstIt
) {
290 if (**SrcIt
== **DstIt
)
292 InstructionListType DstIList
;
293 (*DstIt
)->collectInstructions(isMemoryAccess
, DstIList
);
294 if (DstIList
.empty())
296 bool ForwardEdgeCreated
= false;
297 bool BackwardEdgeCreated
= false;
298 for (Instruction
*ISrc
: SrcIList
) {
299 for (Instruction
*IDst
: DstIList
) {
300 auto D
= DI
.depends(ISrc
, IDst
, true);
304 // If we have a dependence with its left-most non-'=' direction
305 // being '>' we need to reverse the direction of the edge, because
306 // the source of the dependence cannot occur after the sink. For
307 // confused dependencies, we will create edges in both directions to
308 // represent the possibility of a cycle.
310 auto createConfusedEdges
= [&](NodeType
&Src
, NodeType
&Dst
) {
311 if (!ForwardEdgeCreated
) {
312 createMemoryEdge(Src
, Dst
);
315 if (!BackwardEdgeCreated
) {
316 createMemoryEdge(Dst
, Src
);
319 ForwardEdgeCreated
= BackwardEdgeCreated
= true;
320 ++TotalConfusedEdges
;
323 auto createForwardEdge
= [&](NodeType
&Src
, NodeType
&Dst
) {
324 if (!ForwardEdgeCreated
) {
325 createMemoryEdge(Src
, Dst
);
328 ForwardEdgeCreated
= true;
331 auto createBackwardEdge
= [&](NodeType
&Src
, NodeType
&Dst
) {
332 if (!BackwardEdgeCreated
) {
333 createMemoryEdge(Dst
, Src
);
336 BackwardEdgeCreated
= true;
340 createConfusedEdges(**SrcIt
, **DstIt
);
341 else if (D
->isOrdered() && !D
->isLoopIndependent()) {
342 bool ReversedEdge
= false;
343 for (unsigned Level
= 1; Level
<= D
->getLevels(); ++Level
) {
344 if (D
->getDirection(Level
) == Dependence::DVEntry::EQ
)
346 else if (D
->getDirection(Level
) == Dependence::DVEntry::GT
) {
347 createBackwardEdge(**SrcIt
, **DstIt
);
349 ++TotalEdgeReversals
;
351 } else if (D
->getDirection(Level
) == Dependence::DVEntry::LT
)
354 createConfusedEdges(**SrcIt
, **DstIt
);
359 createForwardEdge(**SrcIt
, **DstIt
);
361 createForwardEdge(**SrcIt
, **DstIt
);
363 // Avoid creating duplicate edges.
364 if (ForwardEdgeCreated
&& BackwardEdgeCreated
)
368 // If we've created edges in both directions, there is no more
369 // unique edge that we can create between these two nodes, so we
371 if (ForwardEdgeCreated
&& BackwardEdgeCreated
)
378 template <class G
> void AbstractDependenceGraphBuilder
<G
>::simplify() {
379 if (!shouldSimplify())
381 LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
383 // This algorithm works by first collecting a set of candidate nodes that have
384 // an out-degree of one (in terms of def-use edges), and then ignoring those
385 // whose targets have an in-degree more than one. Each node in the resulting
386 // set can then be merged with its corresponding target and put back into the
387 // worklist until no further merge candidates are available.
388 SmallPtrSet
<NodeType
*, 32> CandidateSourceNodes
;
390 // A mapping between nodes and their in-degree. To save space, this map
391 // only contains nodes that are targets of nodes in the CandidateSourceNodes.
392 DenseMap
<NodeType
*, unsigned> TargetInDegreeMap
;
394 for (NodeType
*N
: Graph
) {
395 if (N
->getEdges().size() != 1)
397 EdgeType
&Edge
= N
->back();
398 if (!Edge
.isDefUse())
400 CandidateSourceNodes
.insert(N
);
402 // Insert an element into the in-degree map and initialize to zero. The
403 // count will get updated in the next step.
404 TargetInDegreeMap
.insert({&Edge
.getTargetNode(), 0});
408 dbgs() << "Size of candidate src node list:" << CandidateSourceNodes
.size()
409 << "\nNode with single outgoing def-use edge:\n";
410 for (NodeType
*N
: CandidateSourceNodes
) {
415 for (NodeType
*N
: Graph
) {
416 for (EdgeType
*E
: *N
) {
417 NodeType
*Tgt
= &E
->getTargetNode();
418 auto TgtIT
= TargetInDegreeMap
.find(Tgt
);
419 if (TgtIT
!= TargetInDegreeMap
.end())
425 dbgs() << "Size of target in-degree map:" << TargetInDegreeMap
.size()
426 << "\nContent of in-degree map:\n";
427 for (auto &I
: TargetInDegreeMap
) {
428 dbgs() << I
.first
<< " --> " << I
.second
<< "\n";
432 SmallVector
<NodeType
*, 32> Worklist(CandidateSourceNodes
.begin(),
433 CandidateSourceNodes
.end());
434 while (!Worklist
.empty()) {
435 NodeType
&Src
= *Worklist
.pop_back_val();
436 // As nodes get merged, we need to skip any node that has been removed from
437 // the candidate set (see below).
438 if (!CandidateSourceNodes
.erase(&Src
))
441 assert(Src
.getEdges().size() == 1 &&
442 "Expected a single edge from the candidate src node.");
443 NodeType
&Tgt
= Src
.back().getTargetNode();
444 assert(TargetInDegreeMap
.find(&Tgt
) != TargetInDegreeMap
.end() &&
445 "Expected target to be in the in-degree map.");
447 if (TargetInDegreeMap
[&Tgt
] != 1)
450 if (!areNodesMergeable(Src
, Tgt
))
453 // Do not merge if there is also an edge from target to src (immediate
455 if (Tgt
.hasEdgeTo(Src
))
458 LLVM_DEBUG(dbgs() << "Merging:" << Src
<< "\nWith:" << Tgt
<< "\n");
460 mergeNodes(Src
, Tgt
);
462 // If the target node is in the candidate set itself, we need to put the
463 // src node back into the worklist again so it gives the target a chance
464 // to get merged into it. For example if we have:
465 // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
466 // then after merging (a) and (b) together, we need to put (a,b) back in
467 // the worklist so that (c) can get merged in as well resulting in
469 // We also need to remove the old target (b), from the worklist. We first
470 // remove it from the candidate set here, and skip any item from the
471 // worklist that is not in the set.
472 if (CandidateSourceNodes
.erase(&Tgt
)) {
473 Worklist
.push_back(&Src
);
474 CandidateSourceNodes
.insert(&Src
);
475 LLVM_DEBUG(dbgs() << "Putting " << &Src
<< " back in the worklist.\n");
478 LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
482 void AbstractDependenceGraphBuilder
<G
>::sortNodesTopologically() {
484 // If we don't create pi-blocks, then we may not have a DAG.
485 if (!shouldCreatePiBlocks())
488 SmallVector
<NodeType
*, 64> NodesInPO
;
489 using NodeKind
= typename
NodeType::NodeKind
;
490 for (NodeType
*N
: post_order(&Graph
)) {
491 if (N
->getKind() == NodeKind::PiBlock
) {
492 // Put members of the pi-block right after the pi-block itself, for
494 const NodeListType
&PiBlockMembers
= getNodesInPiBlock(*N
);
495 llvm::append_range(NodesInPO
, PiBlockMembers
);
497 NodesInPO
.push_back(N
);
500 size_t OldSize
= Graph
.Nodes
.size();
502 append_range(Graph
.Nodes
, reverse(NodesInPO
));
503 if (Graph
.Nodes
.size() != OldSize
)
505 "Expected the number of nodes to stay the same after the sort");
508 template class llvm::AbstractDependenceGraphBuilder
<DataDependenceGraph
>;
509 template class llvm::DependenceGraphInfo
<DDGNode
>;