1 //===- DependenceGraphBuilder.cpp ------------------------------------------==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file implements common steps of the build algorithm for construction
9 // of dependence graphs such as DDG and PDG.
10 //===----------------------------------------------------------------------===//
12 #include "llvm/Analysis/DependenceGraphBuilder.h"
13 #include "llvm/ADT/DepthFirstIterator.h"
14 #include "llvm/ADT/EnumeratedArray.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/DDG.h"
21 #define DEBUG_TYPE "dgb"
23 STATISTIC(TotalGraphs
, "Number of dependence graphs created.");
24 STATISTIC(TotalDefUseEdges
, "Number of def-use edges created.");
25 STATISTIC(TotalMemoryEdges
, "Number of memory dependence edges created.");
26 STATISTIC(TotalFineGrainedNodes
, "Number of fine-grained nodes created.");
27 STATISTIC(TotalPiBlockNodes
, "Number of pi-block nodes created.");
28 STATISTIC(TotalConfusedEdges
,
29 "Number of confused memory dependencies between two nodes.");
30 STATISTIC(TotalEdgeReversals
,
31 "Number of times the source and sink of dependence was reversed to "
32 "expose cycles in the graph.");
34 using InstructionListType
= SmallVector
<Instruction
*, 2>;
36 //===--------------------------------------------------------------------===//
37 // AbstractDependenceGraphBuilder implementation
38 //===--------------------------------------------------------------------===//
41 void AbstractDependenceGraphBuilder
<G
>::computeInstructionOrdinals() {
42 // The BBList is expected to be in program order.
43 size_t NextOrdinal
= 1;
44 for (auto *BB
: BBList
)
46 InstOrdinalMap
.insert(std::make_pair(&I
, NextOrdinal
++));
50 void AbstractDependenceGraphBuilder
<G
>::createFineGrainedNodes() {
52 assert(IMap
.empty() && "Expected empty instruction map at start");
53 for (BasicBlock
*BB
: BBList
)
54 for (Instruction
&I
: *BB
) {
55 auto &NewNode
= createFineGrainedNode(I
);
56 IMap
.insert(std::make_pair(&I
, &NewNode
));
57 NodeOrdinalMap
.insert(std::make_pair(&NewNode
, getOrdinal(I
)));
58 ++TotalFineGrainedNodes
;
63 void AbstractDependenceGraphBuilder
<G
>::createAndConnectRootNode() {
64 // Create a root node that connects to every connected component of the graph.
65 // This is done to allow graph iterators to visit all the disjoint components
66 // of the graph, in a single walk.
68 // This algorithm works by going through each node of the graph and for each
69 // node N, do a DFS starting from N. A rooted edge is established between the
70 // root node and N (if N is not yet visited). All the nodes reachable from N
71 // are marked as visited and are skipped in the DFS of subsequent nodes.
73 // Note: This algorithm tries to limit the number of edges out of the root
74 // node to some extent, but there may be redundant edges created depending on
75 // the iteration order. For example for a graph {A -> B}, an edge from the
76 // root node is added to both nodes if B is visited before A. While it does
77 // not result in minimal number of edges, this approach saves compile-time
78 // while keeping the number of edges in check.
79 auto &RootNode
= createRootNode();
80 df_iterator_default_set
<const NodeType
*, 4> Visited
;
81 for (auto *N
: Graph
) {
84 for (auto I
: depth_first_ext(N
, Visited
))
86 createRootedEdge(RootNode
, *N
);
90 template <class G
> void AbstractDependenceGraphBuilder
<G
>::createPiBlocks() {
91 if (!shouldCreatePiBlocks())
94 LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
96 // The overall algorithm is as follows:
97 // 1. Identify SCCs and for each SCC create a pi-block node containing all
98 // the nodes in that SCC.
99 // 2. Identify incoming edges incident to the nodes inside of the SCC and
100 // reconnect them to the pi-block node.
101 // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
102 // outside of it and reconnect them so that the edges are coming out of the
105 // Adding nodes as we iterate through the SCCs cause the SCC
106 // iterators to get invalidated. To prevent this invalidation, we first
107 // collect a list of nodes that are part of an SCC, and then iterate over
108 // those lists to create the pi-block nodes. Each element of the list is a
109 // list of nodes in an SCC. Note: trivial SCCs containing a single node are
111 SmallVector
<NodeListType
, 4> ListOfSCCs
;
112 for (auto &SCC
: make_range(scc_begin(&Graph
), scc_end(&Graph
))) {
114 ListOfSCCs
.emplace_back(SCC
.begin(), SCC
.end());
117 for (NodeListType
&NL
: ListOfSCCs
) {
118 LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL
.size()
119 << " nodes in it.\n");
121 // SCC iterator may put the nodes in an order that's different from the
122 // program order. To preserve original program order, we sort the list of
123 // nodes based on ordinal numbers computed earlier.
124 llvm::sort(NL
, [&](NodeType
*LHS
, NodeType
*RHS
) {
125 return getOrdinal(*LHS
) < getOrdinal(*RHS
);
128 NodeType
&PiNode
= createPiBlock(NL
);
131 // Build a set to speed up the lookup for edges whose targets
132 // are inside the SCC.
133 SmallPtrSet
<NodeType
*, 4> NodesInSCC(NL
.begin(), NL
.end());
135 // We have the set of nodes in the SCC. We go through the set of nodes
136 // that are outside of the SCC and look for edges that cross the two sets.
137 for (NodeType
*N
: Graph
) {
139 // Skip the SCC node and all the nodes inside of it.
140 if (*N
== PiNode
|| NodesInSCC
.count(N
))
144 Incoming
, // Incoming edges to the SCC
145 Outgoing
, // Edges going ot of the SCC
146 DirectionCount
// To make the enum usable as an array index.
149 // Use these flags to help us avoid creating redundant edges. If there
150 // are more than one edges from an outside node to inside nodes, we only
151 // keep one edge from that node to the pi-block node. Similarly, if
152 // there are more than one edges from inside nodes to an outside node,
153 // we only keep one edge from the pi-block node to the outside node.
154 // There is a flag defined for each direction (incoming vs outgoing) and
155 // for each type of edge supported, using a two-dimensional boolean
157 using EdgeKind
= typename
EdgeType::EdgeKind
;
158 EnumeratedArray
<bool, EdgeKind
> EdgeAlreadyCreated
[DirectionCount
]{false,
161 auto createEdgeOfKind
= [this](NodeType
&Src
, NodeType
&Dst
,
164 case EdgeKind::RegisterDefUse
:
165 createDefUseEdge(Src
, Dst
);
167 case EdgeKind::MemoryDependence
:
168 createMemoryEdge(Src
, Dst
);
170 case EdgeKind::Rooted
:
171 createRootedEdge(Src
, Dst
);
174 llvm_unreachable("Unsupported type of edge.");
178 auto reconnectEdges
= [&](NodeType
*Src
, NodeType
*Dst
, NodeType
*New
,
179 const Direction Dir
) {
180 if (!Src
->hasEdgeTo(*Dst
))
183 dbgs() << "reconnecting("
184 << (Dir
== Direction::Incoming
? "incoming)" : "outgoing)")
185 << ":\nSrc:" << *Src
<< "\nDst:" << *Dst
<< "\nNew:" << *New
187 assert((Dir
== Direction::Incoming
|| Dir
== Direction::Outgoing
) &&
188 "Invalid direction.");
190 SmallVector
<EdgeType
*, 10> EL
;
191 Src
->findEdgesTo(*Dst
, EL
);
192 for (EdgeType
*OldEdge
: EL
) {
193 EdgeKind Kind
= OldEdge
->getKind();
194 if (!EdgeAlreadyCreated
[Dir
][Kind
]) {
195 if (Dir
== Direction::Incoming
) {
196 createEdgeOfKind(*Src
, *New
, Kind
);
197 LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
198 } else if (Dir
== Direction::Outgoing
) {
199 createEdgeOfKind(*New
, *Dst
, Kind
);
200 LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
202 EdgeAlreadyCreated
[Dir
][Kind
] = true;
204 Src
->removeEdge(*OldEdge
);
205 destroyEdge(*OldEdge
);
206 LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
210 for (NodeType
*SCCNode
: NL
) {
211 // Process incoming edges incident to the pi-block node.
212 reconnectEdges(N
, SCCNode
, &PiNode
, Direction::Incoming
);
214 // Process edges that are coming out of the pi-block node.
215 reconnectEdges(SCCNode
, N
, &PiNode
, Direction::Outgoing
);
220 // Ordinal maps are no longer needed.
221 InstOrdinalMap
.clear();
222 NodeOrdinalMap
.clear();
224 LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
227 template <class G
> void AbstractDependenceGraphBuilder
<G
>::createDefUseEdges() {
228 for (NodeType
*N
: Graph
) {
229 InstructionListType SrcIList
;
230 N
->collectInstructions([](const Instruction
*I
) { return true; }, SrcIList
);
232 // Use a set to mark the targets that we link to N, so we don't add
233 // duplicate def-use edges when more than one instruction in a target node
234 // use results of instructions that are contained in N.
235 SmallPtrSet
<NodeType
*, 4> VisitedTargets
;
237 for (Instruction
*II
: SrcIList
) {
238 for (User
*U
: II
->users()) {
239 Instruction
*UI
= dyn_cast
<Instruction
>(U
);
242 NodeType
*DstNode
= nullptr;
243 if (IMap
.find(UI
) != IMap
.end())
244 DstNode
= IMap
.find(UI
)->second
;
246 // In the case of loops, the scope of the subgraph is all the
247 // basic blocks (and instructions within them) belonging to the loop. We
248 // simply ignore all the edges coming from (or going into) instructions
249 // or basic blocks outside of this range.
253 << "skipped def-use edge since the sink" << *UI
254 << " is outside the range of instructions being considered.\n");
258 // Self dependencies are ignored because they are redundant and
262 << "skipped def-use edge since the sink and the source ("
263 << N
<< ") are the same.\n");
267 if (VisitedTargets
.insert(DstNode
).second
) {
268 createDefUseEdge(*N
, *DstNode
);
277 void AbstractDependenceGraphBuilder
<G
>::createMemoryDependencyEdges() {
278 using DGIterator
= typename
G::iterator
;
279 auto isMemoryAccess
= [](const Instruction
*I
) {
280 return I
->mayReadOrWriteMemory();
282 for (DGIterator SrcIt
= Graph
.begin(), E
= Graph
.end(); SrcIt
!= E
; ++SrcIt
) {
283 InstructionListType SrcIList
;
284 (*SrcIt
)->collectInstructions(isMemoryAccess
, SrcIList
);
285 if (SrcIList
.empty())
288 for (DGIterator DstIt
= SrcIt
; DstIt
!= E
; ++DstIt
) {
289 if (**SrcIt
== **DstIt
)
291 InstructionListType DstIList
;
292 (*DstIt
)->collectInstructions(isMemoryAccess
, DstIList
);
293 if (DstIList
.empty())
295 bool ForwardEdgeCreated
= false;
296 bool BackwardEdgeCreated
= false;
297 for (Instruction
*ISrc
: SrcIList
) {
298 for (Instruction
*IDst
: DstIList
) {
299 auto D
= DI
.depends(ISrc
, IDst
, true);
303 // If we have a dependence with its left-most non-'=' direction
304 // being '>' we need to reverse the direction of the edge, because
305 // the source of the dependence cannot occur after the sink. For
306 // confused dependencies, we will create edges in both directions to
307 // represent the possibility of a cycle.
309 auto createConfusedEdges
= [&](NodeType
&Src
, NodeType
&Dst
) {
310 if (!ForwardEdgeCreated
) {
311 createMemoryEdge(Src
, Dst
);
314 if (!BackwardEdgeCreated
) {
315 createMemoryEdge(Dst
, Src
);
318 ForwardEdgeCreated
= BackwardEdgeCreated
= true;
319 ++TotalConfusedEdges
;
322 auto createForwardEdge
= [&](NodeType
&Src
, NodeType
&Dst
) {
323 if (!ForwardEdgeCreated
) {
324 createMemoryEdge(Src
, Dst
);
327 ForwardEdgeCreated
= true;
330 auto createBackwardEdge
= [&](NodeType
&Src
, NodeType
&Dst
) {
331 if (!BackwardEdgeCreated
) {
332 createMemoryEdge(Dst
, Src
);
335 BackwardEdgeCreated
= true;
339 createConfusedEdges(**SrcIt
, **DstIt
);
340 else if (D
->isOrdered() && !D
->isLoopIndependent()) {
341 bool ReversedEdge
= false;
342 for (unsigned Level
= 1; Level
<= D
->getLevels(); ++Level
) {
343 if (D
->getDirection(Level
) == Dependence::DVEntry::EQ
)
345 else if (D
->getDirection(Level
) == Dependence::DVEntry::GT
) {
346 createBackwardEdge(**SrcIt
, **DstIt
);
348 ++TotalEdgeReversals
;
350 } else if (D
->getDirection(Level
) == Dependence::DVEntry::LT
)
353 createConfusedEdges(**SrcIt
, **DstIt
);
358 createForwardEdge(**SrcIt
, **DstIt
);
360 createForwardEdge(**SrcIt
, **DstIt
);
362 // Avoid creating duplicate edges.
363 if (ForwardEdgeCreated
&& BackwardEdgeCreated
)
367 // If we've created edges in both directions, there is no more
368 // unique edge that we can create between these two nodes, so we
370 if (ForwardEdgeCreated
&& BackwardEdgeCreated
)
377 template <class G
> void AbstractDependenceGraphBuilder
<G
>::simplify() {
378 if (!shouldSimplify())
380 LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
382 // This algorithm works by first collecting a set of candidate nodes that have
383 // an out-degree of one (in terms of def-use edges), and then ignoring those
384 // whose targets have an in-degree more than one. Each node in the resulting
385 // set can then be merged with its corresponding target and put back into the
386 // worklist until no further merge candidates are available.
387 SmallPtrSet
<NodeType
*, 32> CandidateSourceNodes
;
389 // A mapping between nodes and their in-degree. To save space, this map
390 // only contains nodes that are targets of nodes in the CandidateSourceNodes.
391 DenseMap
<NodeType
*, unsigned> TargetInDegreeMap
;
393 for (NodeType
*N
: Graph
) {
394 if (N
->getEdges().size() != 1)
396 EdgeType
&Edge
= N
->back();
397 if (!Edge
.isDefUse())
399 CandidateSourceNodes
.insert(N
);
401 // Insert an element into the in-degree map and initialize to zero. The
402 // count will get updated in the next step.
403 TargetInDegreeMap
.insert({&Edge
.getTargetNode(), 0});
407 dbgs() << "Size of candidate src node list:" << CandidateSourceNodes
.size()
408 << "\nNode with single outgoing def-use edge:\n";
409 for (NodeType
*N
: CandidateSourceNodes
) {
414 for (NodeType
*N
: Graph
) {
415 for (EdgeType
*E
: *N
) {
416 NodeType
*Tgt
= &E
->getTargetNode();
417 auto TgtIT
= TargetInDegreeMap
.find(Tgt
);
418 if (TgtIT
!= TargetInDegreeMap
.end())
424 dbgs() << "Size of target in-degree map:" << TargetInDegreeMap
.size()
425 << "\nContent of in-degree map:\n";
426 for (auto &I
: TargetInDegreeMap
) {
427 dbgs() << I
.first
<< " --> " << I
.second
<< "\n";
431 SmallVector
<NodeType
*, 32> Worklist(CandidateSourceNodes
.begin(),
432 CandidateSourceNodes
.end());
433 while (!Worklist
.empty()) {
434 NodeType
&Src
= *Worklist
.pop_back_val();
435 // As nodes get merged, we need to skip any node that has been removed from
436 // the candidate set (see below).
437 if (!CandidateSourceNodes
.erase(&Src
))
440 assert(Src
.getEdges().size() == 1 &&
441 "Expected a single edge from the candidate src node.");
442 NodeType
&Tgt
= Src
.back().getTargetNode();
443 assert(TargetInDegreeMap
.find(&Tgt
) != TargetInDegreeMap
.end() &&
444 "Expected target to be in the in-degree map.");
446 if (TargetInDegreeMap
[&Tgt
] != 1)
449 if (!areNodesMergeable(Src
, Tgt
))
452 // Do not merge if there is also an edge from target to src (immediate
454 if (Tgt
.hasEdgeTo(Src
))
457 LLVM_DEBUG(dbgs() << "Merging:" << Src
<< "\nWith:" << Tgt
<< "\n");
459 mergeNodes(Src
, Tgt
);
461 // If the target node is in the candidate set itself, we need to put the
462 // src node back into the worklist again so it gives the target a chance
463 // to get merged into it. For example if we have:
464 // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
465 // then after merging (a) and (b) together, we need to put (a,b) back in
466 // the worklist so that (c) can get merged in as well resulting in
468 // We also need to remove the old target (b), from the worklist. We first
469 // remove it from the candidate set here, and skip any item from the
470 // worklist that is not in the set.
471 if (CandidateSourceNodes
.erase(&Tgt
)) {
472 Worklist
.push_back(&Src
);
473 CandidateSourceNodes
.insert(&Src
);
474 LLVM_DEBUG(dbgs() << "Putting " << &Src
<< " back in the worklist.\n");
477 LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
481 void AbstractDependenceGraphBuilder
<G
>::sortNodesTopologically() {
483 // If we don't create pi-blocks, then we may not have a DAG.
484 if (!shouldCreatePiBlocks())
487 SmallVector
<NodeType
*, 64> NodesInPO
;
488 using NodeKind
= typename
NodeType::NodeKind
;
489 for (NodeType
*N
: post_order(&Graph
)) {
490 if (N
->getKind() == NodeKind::PiBlock
) {
491 // Put members of the pi-block right after the pi-block itself, for
493 const NodeListType
&PiBlockMembers
= getNodesInPiBlock(*N
);
494 llvm::append_range(NodesInPO
, PiBlockMembers
);
496 NodesInPO
.push_back(N
);
499 size_t OldSize
= Graph
.Nodes
.size();
501 append_range(Graph
.Nodes
, reverse(NodesInPO
));
502 if (Graph
.Nodes
.size() != OldSize
)
504 "Expected the number of nodes to stay the same after the sort");
507 template class llvm::AbstractDependenceGraphBuilder
<DataDependenceGraph
>;
508 template class llvm::DependenceGraphInfo
<DDGNode
>;