1 //===- HexagonCommonGEP.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/ArrayRef.h"
10 #include "llvm/ADT/FoldingSet.h"
11 #include "llvm/ADT/GraphTraits.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/Analysis/PostDominators.h"
18 #include "llvm/IR/BasicBlock.h"
19 #include "llvm/IR/Constant.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/Dominators.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/Type.h"
27 #include "llvm/IR/Use.h"
28 #include "llvm/IR/User.h"
29 #include "llvm/IR/Value.h"
30 #include "llvm/IR/Verifier.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Allocator.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Compiler.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Transforms/Utils/Local.h"
49 #define DEBUG_TYPE "commgep"
53 static cl::opt
<bool> OptSpeculate("commgep-speculate", cl::init(true),
56 static cl::opt
<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden
);
58 static cl::opt
<bool> OptEnableConst("commgep-const", cl::init(true),
63 void initializeHexagonCommonGEPPass(PassRegistry
&);
65 } // end namespace llvm
70 using NodeSet
= std::set
<GepNode
*>;
71 using NodeToValueMap
= std::map
<GepNode
*, Value
*>;
72 using NodeVect
= std::vector
<GepNode
*>;
73 using NodeChildrenMap
= std::map
<GepNode
*, NodeVect
>;
74 using UseSet
= SetVector
<Use
*>;
75 using NodeToUsesMap
= std::map
<GepNode
*, UseSet
>;
77 // Numbering map for gep nodes. Used to keep track of ordering for
80 NodeOrdering() = default;
82 void insert(const GepNode
*N
) { Map
.insert(std::make_pair(N
, ++LastNum
)); }
83 void clear() { Map
.clear(); }
85 bool operator()(const GepNode
*N1
, const GepNode
*N2
) const {
86 auto F1
= Map
.find(N1
), F2
= Map
.find(N2
);
87 assert(F1
!= Map
.end() && F2
!= Map
.end());
88 return F1
->second
< F2
->second
;
92 std::map
<const GepNode
*, unsigned> Map
;
96 class HexagonCommonGEP
: public FunctionPass
{
100 HexagonCommonGEP() : FunctionPass(ID
) {
101 initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry());
104 bool runOnFunction(Function
&F
) override
;
105 StringRef
getPassName() const override
{ return "Hexagon Common GEP"; }
107 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
108 AU
.addRequired
<DominatorTreeWrapperPass
>();
109 AU
.addPreserved
<DominatorTreeWrapperPass
>();
110 AU
.addRequired
<PostDominatorTreeWrapperPass
>();
111 AU
.addPreserved
<PostDominatorTreeWrapperPass
>();
112 AU
.addRequired
<LoopInfoWrapperPass
>();
113 AU
.addPreserved
<LoopInfoWrapperPass
>();
114 FunctionPass::getAnalysisUsage(AU
);
118 using ValueToNodeMap
= std::map
<Value
*, GepNode
*>;
119 using ValueVect
= std::vector
<Value
*>;
120 using NodeToValuesMap
= std::map
<GepNode
*, ValueVect
>;
122 void getBlockTraversalOrder(BasicBlock
*Root
, ValueVect
&Order
);
123 bool isHandledGepForm(GetElementPtrInst
*GepI
);
124 void processGepInst(GetElementPtrInst
*GepI
, ValueToNodeMap
&NM
);
128 BasicBlock
*recalculatePlacement(GepNode
*Node
, NodeChildrenMap
&NCM
,
129 NodeToValueMap
&Loc
);
130 BasicBlock
*recalculatePlacementRec(GepNode
*Node
, NodeChildrenMap
&NCM
,
131 NodeToValueMap
&Loc
);
132 bool isInvariantIn(Value
*Val
, Loop
*L
);
133 bool isInvariantIn(GepNode
*Node
, Loop
*L
);
134 bool isInMainPath(BasicBlock
*B
, Loop
*L
);
135 BasicBlock
*adjustForInvariance(GepNode
*Node
, NodeChildrenMap
&NCM
,
136 NodeToValueMap
&Loc
);
137 void separateChainForNode(GepNode
*Node
, Use
*U
, NodeToValueMap
&Loc
);
138 void separateConstantChains(GepNode
*Node
, NodeChildrenMap
&NCM
,
139 NodeToValueMap
&Loc
);
140 void computeNodePlacement(NodeToValueMap
&Loc
);
142 Value
*fabricateGEP(NodeVect
&NA
, BasicBlock::iterator At
,
144 void getAllUsersForNode(GepNode
*Node
, ValueVect
&Values
,
145 NodeChildrenMap
&NCM
);
146 void materialize(NodeToValueMap
&Loc
);
148 void removeDeadCode();
152 NodeOrdering NodeOrder
; // Node ordering, for deterministic behavior.
153 SpecificBumpPtrAllocator
<GepNode
> *Mem
;
157 PostDominatorTree
*PDT
;
161 } // end anonymous namespace
163 char HexagonCommonGEP::ID
= 0;
165 INITIALIZE_PASS_BEGIN(HexagonCommonGEP
, "hcommgep", "Hexagon Common GEP",
167 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
168 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass
)
169 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
170 INITIALIZE_PASS_END(HexagonCommonGEP
, "hcommgep", "Hexagon Common GEP",
182 Pointer
= 0x10, // See note below.
184 // Note: GEP indices generally traverse nested types, and so a GepNode
185 // (representing a single index) can be associated with some composite
186 // type. The exception is the GEP input, which is a pointer, and not
187 // a composite type (at least not in the sense of having sub-types).
188 // Also, the corresponding index plays a different role as well: it is
189 // simply added to the input pointer. Since pointer types are becoming
190 // opaque (i.e. are no longer going to include the pointee type), the
191 // two pieces of information (1) the fact that it's a pointer, and
192 // (2) the pointee type, need to be stored separately. The pointee type
193 // will be stored in the PTy member, while the fact that the node
194 // operates on a pointer will be reflected by the flag "Pointer".
201 Value
*Idx
= nullptr;
202 Type
*PTy
= nullptr; // Type indexed by this node. For pointer nodes
203 // this is the "pointee" type, and indexing a
204 // pointer does not change the type.
206 GepNode() : Parent(nullptr) {}
207 GepNode(const GepNode
*N
) : Flags(N
->Flags
), Idx(N
->Idx
), PTy(N
->PTy
) {
209 BaseVal
= N
->BaseVal
;
214 friend raw_ostream
&operator<< (raw_ostream
&OS
, const GepNode
&GN
);
217 raw_ostream
&operator<< (raw_ostream
&OS
, const GepNode
&GN
) {
220 if (GN
.Flags
& GepNode::Root
) {
224 if (GN
.Flags
& GepNode::Internal
) {
230 if (GN
.Flags
& GepNode::Used
) {
235 if (GN
.Flags
& GepNode::InBounds
) {
240 if (GN
.Flags
& GepNode::Pointer
) {
246 if (GN
.Flags
& GepNode::Root
)
247 OS
<< "BaseVal:" << GN
.BaseVal
->getName() << '(' << GN
.BaseVal
<< ')';
249 OS
<< "Parent:" << GN
.Parent
;
252 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(GN
.Idx
))
253 OS
<< CI
->getValue().getSExtValue();
254 else if (GN
.Idx
->hasName())
255 OS
<< GN
.Idx
->getName();
257 OS
<< "<anon> =" << *GN
.Idx
;
260 if (GN
.PTy
->isStructTy()) {
261 StructType
*STy
= cast
<StructType
>(GN
.PTy
);
262 if (!STy
->isLiteral())
263 OS
<< GN
.PTy
->getStructName();
265 OS
<< "<anon-struct>:" << *STy
;
273 template <typename NodeContainer
>
274 void dump_node_container(raw_ostream
&OS
, const NodeContainer
&S
) {
275 using const_iterator
= typename
NodeContainer::const_iterator
;
277 for (const_iterator I
= S
.begin(), E
= S
.end(); I
!= E
; ++I
)
278 OS
<< *I
<< ' ' << **I
<< '\n';
281 raw_ostream
&operator<< (raw_ostream
&OS
,
282 const NodeVect
&S
) LLVM_ATTRIBUTE_UNUSED
;
283 raw_ostream
&operator<< (raw_ostream
&OS
, const NodeVect
&S
) {
284 dump_node_container(OS
, S
);
288 raw_ostream
&operator<< (raw_ostream
&OS
,
289 const NodeToUsesMap
&M
) LLVM_ATTRIBUTE_UNUSED
;
290 raw_ostream
&operator<< (raw_ostream
&OS
, const NodeToUsesMap
&M
){
291 for (const auto &I
: M
) {
292 const UseSet
&Us
= I
.second
;
293 OS
<< I
.first
<< " -> #" << Us
.size() << '{';
294 for (const Use
*U
: Us
) {
295 User
*R
= U
->getUser();
297 OS
<< ' ' << R
->getName();
299 OS
<< " <?>(" << *R
<< ')';
307 in_set(const NodeSet
&S
) : NS(S
) {}
309 bool operator() (GepNode
*N
) const {
310 return NS
.find(N
) != NS
.end();
317 } // end anonymous namespace
319 inline void *operator new(size_t, SpecificBumpPtrAllocator
<GepNode
> &A
) {
323 void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock
*Root
,
325 // Compute block ordering for a typical DT-based traversal of the flow
326 // graph: "before visiting a block, all of its dominators must have been
329 Order
.push_back(Root
);
330 for (auto *DTN
: children
<DomTreeNode
*>(DT
->getNode(Root
)))
331 getBlockTraversalOrder(DTN
->getBlock(), Order
);
334 bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst
*GepI
) {
336 if (!GepI
->getType()->isPointerTy())
338 // No GEPs without any indices. (Is this possible?)
339 if (GepI
->idx_begin() == GepI
->idx_end())
344 void HexagonCommonGEP::processGepInst(GetElementPtrInst
*GepI
,
345 ValueToNodeMap
&NM
) {
346 LLVM_DEBUG(dbgs() << "Visiting GEP: " << *GepI
<< '\n');
347 GepNode
*N
= new (*Mem
) GepNode
;
348 Value
*PtrOp
= GepI
->getPointerOperand();
349 uint32_t InBounds
= GepI
->isInBounds() ? GepNode::InBounds
: 0;
350 ValueToNodeMap::iterator F
= NM
.find(PtrOp
);
353 N
->Flags
|= GepNode::Root
| InBounds
;
355 // If PtrOp was a GEP instruction, it must have already been processed.
356 // The ValueToNodeMap entry for it is the last gep node in the generated
357 // chain. Link to it here.
358 N
->Parent
= F
->second
;
360 N
->PTy
= GepI
->getSourceElementType();
361 N
->Flags
|= GepNode::Pointer
;
362 N
->Idx
= *GepI
->idx_begin();
364 // Collect the list of users of this GEP instruction. Will add it to the
365 // last node created for it.
367 for (Value::user_iterator UI
= GepI
->user_begin(), UE
= GepI
->user_end();
369 // Check if this gep is used by anything other than other geps that
371 if (isa
<GetElementPtrInst
>(*UI
)) {
372 GetElementPtrInst
*UserG
= cast
<GetElementPtrInst
>(*UI
);
373 if (isHandledGepForm(UserG
))
376 Us
.insert(&UI
.getUse());
381 // Skip the first index operand, since it was already handled above. This
382 // dereferences the pointer operand.
384 Type
*PtrTy
= GepI
->getSourceElementType();
385 for (Use
&U
: llvm::drop_begin(GepI
->indices())) {
387 GepNode
*Nx
= new (*Mem
) GepNode
;
388 Nx
->Parent
= PN
; // Link Nx to the previous node.
389 Nx
->Flags
|= GepNode::Internal
| InBounds
;
393 NodeOrder
.insert(Nx
);
396 PtrTy
= GetElementPtrInst::getTypeAtIndex(PtrTy
, Op
);
399 // After last node has been created, update the use information.
401 PN
->Flags
|= GepNode::Used
;
402 Uses
[PN
].insert(Us
.begin(), Us
.end());
405 // Link the last node with the originating GEP instruction. This is to
406 // help with linking chained GEP instructions.
407 NM
.insert(std::make_pair(GepI
, PN
));
410 void HexagonCommonGEP::collect() {
411 // Establish depth-first traversal order of the dominator tree.
413 getBlockTraversalOrder(&Fn
->front(), BO
);
415 // The creation of gep nodes requires DT-traversal. When processing a GEP
416 // instruction that uses another GEP instruction as the base pointer, the
417 // gep node for the base pointer should already exist.
419 for (Value
*I
: BO
) {
420 BasicBlock
*B
= cast
<BasicBlock
>(I
);
421 for (Instruction
&J
: *B
)
422 if (auto *GepI
= dyn_cast
<GetElementPtrInst
>(&J
))
423 if (isHandledGepForm(GepI
))
424 processGepInst(GepI
, NM
);
427 LLVM_DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes
);
430 static void invert_find_roots(const NodeVect
&Nodes
, NodeChildrenMap
&NCM
,
432 for (GepNode
*N
: Nodes
) {
433 if (N
->Flags
& GepNode::Root
) {
437 GepNode
*PN
= N
->Parent
;
438 NCM
[PN
].push_back(N
);
442 static void nodes_for_root(GepNode
*Root
, NodeChildrenMap
&NCM
,
445 Work
.push_back(Root
);
448 while (!Work
.empty()) {
449 NodeVect::iterator First
= Work
.begin();
452 NodeChildrenMap::iterator CF
= NCM
.find(N
);
453 if (CF
!= NCM
.end()) {
454 llvm::append_range(Work
, CF
->second
);
455 Nodes
.insert(CF
->second
.begin(), CF
->second
.end());
462 using NodeSymRel
= std::set
<NodeSet
>;
463 using NodePair
= std::pair
<GepNode
*, GepNode
*>;
464 using NodePairSet
= std::set
<NodePair
>;
466 } // end anonymous namespace
468 static const NodeSet
*node_class(GepNode
*N
, NodeSymRel
&Rel
) {
469 for (const NodeSet
&S
: Rel
)
475 // Create an ordered pair of GepNode pointers. The pair will be used in
476 // determining equality. The only purpose of the ordering is to eliminate
477 // duplication due to the commutativity of equality/non-equality.
478 static NodePair
node_pair(GepNode
*N1
, GepNode
*N2
) {
479 uintptr_t P1
= reinterpret_cast<uintptr_t>(N1
);
480 uintptr_t P2
= reinterpret_cast<uintptr_t>(N2
);
482 return std::make_pair(N1
, N2
);
483 return std::make_pair(N2
, N1
);
486 static unsigned node_hash(GepNode
*N
) {
487 // Include everything except flags and parent.
489 ID
.AddPointer(N
->Idx
);
490 ID
.AddPointer(N
->PTy
);
491 return ID
.ComputeHash();
494 static bool node_eq(GepNode
*N1
, GepNode
*N2
, NodePairSet
&Eq
,
496 // Don't cache the result for nodes with different hashes. The hash
497 // comparison is fast enough.
498 if (node_hash(N1
) != node_hash(N2
))
501 NodePair NP
= node_pair(N1
, N2
);
502 NodePairSet::iterator FEq
= Eq
.find(NP
);
505 NodePairSet::iterator FNe
= Ne
.find(NP
);
508 // Not previously compared.
509 bool Root1
= N1
->Flags
& GepNode::Root
;
510 uint32_t CmpFlags
= GepNode::Root
| GepNode::Pointer
;
511 bool Different
= (N1
->Flags
& CmpFlags
) != (N2
->Flags
& CmpFlags
);
512 NodePair P
= node_pair(N1
, N2
);
513 // If the root/pointer flags have different values, the nodes are
515 // If both nodes are root nodes, but their base pointers differ,
516 // they are different.
517 if (Different
|| (Root1
&& N1
->BaseVal
!= N2
->BaseVal
)) {
521 // Here the root/pointer flags are identical, and for root nodes the
522 // base pointers are equal, so the root nodes are equal.
523 // For non-root nodes, compare their parent nodes.
524 if (Root1
|| node_eq(N1
->Parent
, N2
->Parent
, Eq
, Ne
)) {
531 void HexagonCommonGEP::common() {
532 // The essence of this commoning is finding gep nodes that are equal.
533 // To do this we need to compare all pairs of nodes. To save time,
534 // first, partition the set of all nodes into sets of potentially equal
535 // nodes, and then compare pairs from within each partition.
536 using NodeSetMap
= std::map
<unsigned, NodeSet
>;
539 for (GepNode
*N
: Nodes
) {
540 unsigned H
= node_hash(N
);
541 MaybeEq
[H
].insert(N
);
544 // Compute the equivalence relation for the gep nodes. Use two caches,
545 // one for equality and the other for non-equality.
546 NodeSymRel EqRel
; // Equality relation (as set of equivalence classes).
547 NodePairSet Eq
, Ne
; // Caches.
548 for (auto &I
: MaybeEq
) {
549 NodeSet
&S
= I
.second
;
550 for (NodeSet::iterator NI
= S
.begin(), NE
= S
.end(); NI
!= NE
; ++NI
) {
552 // If node already has a class, then the class must have been created
553 // in a prior iteration of this loop. Since equality is transitive,
554 // nothing more will be added to that class, so skip it.
555 if (node_class(N
, EqRel
))
558 // Create a new class candidate now.
560 for (NodeSet::iterator NJ
= std::next(NI
); NJ
!= NE
; ++NJ
)
561 if (node_eq(N
, *NJ
, Eq
, Ne
))
563 // If Tmp is empty, N would be the only element in it. Don't bother
564 // creating a class for it then.
566 C
.insert(N
); // Finalize the set before adding it to the relation.
567 std::pair
<NodeSymRel::iterator
, bool> Ins
= EqRel
.insert(C
);
569 assert(Ins
.second
&& "Cannot add a class");
575 dbgs() << "Gep node equality:\n";
576 for (NodePairSet::iterator I
= Eq
.begin(), E
= Eq
.end(); I
!= E
; ++I
)
577 dbgs() << "{ " << I
->first
<< ", " << I
->second
<< " }\n";
579 dbgs() << "Gep equivalence classes:\n";
580 for (const NodeSet
&S
: EqRel
) {
582 for (NodeSet::const_iterator J
= S
.begin(), F
= S
.end(); J
!= F
; ++J
) {
591 // Create a projection from a NodeSet to the minimal element in it.
592 using ProjMap
= std::map
<const NodeSet
*, GepNode
*>;
594 for (const NodeSet
&S
: EqRel
) {
595 GepNode
*Min
= *llvm::min_element(S
, NodeOrder
);
596 std::pair
<ProjMap::iterator
,bool> Ins
= PM
.insert(std::make_pair(&S
, Min
));
598 assert(Ins
.second
&& "Cannot add minimal element");
600 // Update the min element's flags, and user list.
602 UseSet
&MinUs
= Uses
[Min
];
603 for (GepNode
*N
: S
) {
604 uint32_t NF
= N
->Flags
;
605 // If N is used, append all original values of N to the list of
606 // original values of Min.
607 if (NF
& GepNode::Used
)
608 MinUs
.insert(Uses
[N
].begin(), Uses
[N
].end());
614 // The collected flags should include all the flags from the min element.
615 assert((Min
->Flags
& Flags
) == Min
->Flags
);
619 // Commoning: for each non-root gep node, replace "Parent" with the
620 // selected (minimum) node from the corresponding equivalence class.
621 // If a given parent does not have an equivalence class, leave it
622 // unchanged (it means that it's the only element in its class).
623 for (GepNode
*N
: Nodes
) {
624 if (N
->Flags
& GepNode::Root
)
626 const NodeSet
*PC
= node_class(N
->Parent
, EqRel
);
629 ProjMap::iterator F
= PM
.find(PC
);
632 // Found a replacement, use it.
633 GepNode
*Rep
= F
->second
;
637 LLVM_DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes
);
639 // Finally, erase the nodes that are no longer used.
641 for (GepNode
*N
: Nodes
) {
642 const NodeSet
*PC
= node_class(N
, EqRel
);
645 ProjMap::iterator F
= PM
.find(PC
);
653 erase_if(Nodes
, in_set(Erase
));
655 LLVM_DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes
);
658 template <typename T
>
659 static BasicBlock
*nearest_common_dominator(DominatorTree
*DT
, T
&Blocks
) {
661 dbgs() << "NCD of {";
662 for (typename
T::iterator I
= Blocks
.begin(), E
= Blocks
.end(); I
!= E
;
666 BasicBlock
*B
= cast
<BasicBlock
>(*I
);
667 dbgs() << ' ' << B
->getName();
672 // Allow null basic blocks in Blocks. In such cases, return nullptr.
673 typename
T::iterator I
= Blocks
.begin(), E
= Blocks
.end();
676 BasicBlock
*Dom
= cast
<BasicBlock
>(*I
);
678 BasicBlock
*B
= cast_or_null
<BasicBlock
>(*I
);
679 Dom
= B
? DT
->findNearestCommonDominator(Dom
, B
) : nullptr;
683 LLVM_DEBUG(dbgs() << "computed:" << Dom
->getName() << '\n');
687 template <typename T
>
688 static BasicBlock
*nearest_common_dominatee(DominatorTree
*DT
, T
&Blocks
) {
689 // If two blocks, A and B, dominate a block C, then A dominates B,
691 typename
T::iterator I
= Blocks
.begin(), E
= Blocks
.end();
692 // Find the first non-null block.
693 while (I
!= E
&& !*I
)
696 return DT
->getRoot();
697 BasicBlock
*DomB
= cast
<BasicBlock
>(*I
);
701 BasicBlock
*B
= cast
<BasicBlock
>(*I
);
702 if (DT
->dominates(B
, DomB
))
704 if (!DT
->dominates(DomB
, B
))
711 // Find the first use in B of any value from Values. If no such use,
713 template <typename T
>
714 static BasicBlock::iterator
first_use_of_in_block(T
&Values
, BasicBlock
*B
) {
715 BasicBlock::iterator FirstUse
= B
->end(), BEnd
= B
->end();
717 using iterator
= typename
T::iterator
;
719 for (iterator I
= Values
.begin(), E
= Values
.end(); I
!= E
; ++I
) {
721 // If V is used in a PHI node, the use belongs to the incoming block,
722 // not the block with the PHI node. In the incoming block, the use
723 // would be considered as being at the end of it, so it cannot
724 // influence the position of the first use (which is assumed to be
725 // at the end to start with).
728 if (!isa
<Instruction
>(V
))
730 Instruction
*In
= cast
<Instruction
>(V
);
731 if (In
->getParent() != B
)
733 BasicBlock::iterator It
= In
->getIterator();
734 if (std::distance(FirstUse
, BEnd
) < std::distance(It
, BEnd
))
740 static bool is_empty(const BasicBlock
*B
) {
741 return B
->empty() || (&*B
->begin() == B
->getTerminator());
744 BasicBlock
*HexagonCommonGEP::recalculatePlacement(GepNode
*Node
,
745 NodeChildrenMap
&NCM
, NodeToValueMap
&Loc
) {
746 LLVM_DEBUG(dbgs() << "Loc for node:" << Node
<< '\n');
747 // Recalculate the placement for Node, assuming that the locations of
748 // its children in Loc are valid.
749 // Return nullptr if there is no valid placement for Node (for example, it
750 // uses an index value that is not available at the location required
751 // to dominate all children, etc.).
753 // Find the nearest common dominator for:
754 // - all users, if the node is used, and
757 if (Node
->Flags
& GepNode::Used
) {
758 // Append all blocks with uses of the original values to the
760 NodeToUsesMap::iterator UF
= Uses
.find(Node
);
761 assert(UF
!= Uses
.end() && "Used node with no use information");
762 UseSet
&Us
= UF
->second
;
764 User
*R
= U
->getUser();
765 if (!isa
<Instruction
>(R
))
767 BasicBlock
*PB
= isa
<PHINode
>(R
)
768 ? cast
<PHINode
>(R
)->getIncomingBlock(*U
)
769 : cast
<Instruction
>(R
)->getParent();
773 // Append the location of each child.
774 NodeChildrenMap::iterator CF
= NCM
.find(Node
);
775 if (CF
!= NCM
.end()) {
776 NodeVect
&Cs
= CF
->second
;
777 for (GepNode
*CN
: Cs
) {
778 NodeToValueMap::iterator LF
= Loc
.find(CN
);
779 // If the child is only used in GEP instructions (i.e. is not used in
780 // non-GEP instructions), the nearest dominator computed for it may
781 // have been null. In such case it won't have a location available.
784 Bs
.push_back(LF
->second
);
788 BasicBlock
*DomB
= nearest_common_dominator(DT
, Bs
);
791 // Check if the index used by Node dominates the computed dominator.
792 Instruction
*IdxI
= dyn_cast
<Instruction
>(Node
->Idx
);
793 if (IdxI
&& !DT
->dominates(IdxI
->getParent(), DomB
))
796 // Avoid putting nodes into empty blocks.
797 while (is_empty(DomB
)) {
798 DomTreeNode
*N
= (*DT
)[DomB
]->getIDom();
801 DomB
= N
->getBlock();
804 // Otherwise, DomB is fine. Update the location map.
809 BasicBlock
*HexagonCommonGEP::recalculatePlacementRec(GepNode
*Node
,
810 NodeChildrenMap
&NCM
, NodeToValueMap
&Loc
) {
811 LLVM_DEBUG(dbgs() << "LocRec begin for node:" << Node
<< '\n');
812 // Recalculate the placement of Node, after recursively recalculating the
813 // placements of all its children.
814 NodeChildrenMap::iterator CF
= NCM
.find(Node
);
815 if (CF
!= NCM
.end()) {
816 NodeVect
&Cs
= CF
->second
;
817 for (GepNode
*C
: Cs
)
818 recalculatePlacementRec(C
, NCM
, Loc
);
820 BasicBlock
*LB
= recalculatePlacement(Node
, NCM
, Loc
);
821 LLVM_DEBUG(dbgs() << "LocRec end for node:" << Node
<< '\n');
825 bool HexagonCommonGEP::isInvariantIn(Value
*Val
, Loop
*L
) {
826 if (isa
<Constant
>(Val
) || isa
<Argument
>(Val
))
828 Instruction
*In
= dyn_cast
<Instruction
>(Val
);
831 BasicBlock
*HdrB
= L
->getHeader(), *DefB
= In
->getParent();
832 return DT
->properlyDominates(DefB
, HdrB
);
835 bool HexagonCommonGEP::isInvariantIn(GepNode
*Node
, Loop
*L
) {
836 if (Node
->Flags
& GepNode::Root
)
837 if (!isInvariantIn(Node
->BaseVal
, L
))
839 return isInvariantIn(Node
->Idx
, L
);
842 bool HexagonCommonGEP::isInMainPath(BasicBlock
*B
, Loop
*L
) {
843 BasicBlock
*HB
= L
->getHeader();
844 BasicBlock
*LB
= L
->getLoopLatch();
845 // B must post-dominate the loop header or dominate the loop latch.
846 if (PDT
->dominates(B
, HB
))
848 if (LB
&& DT
->dominates(B
, LB
))
853 static BasicBlock
*preheader(DominatorTree
*DT
, Loop
*L
) {
854 if (BasicBlock
*PH
= L
->getLoopPreheader())
858 DomTreeNode
*DN
= DT
->getNode(L
->getHeader());
861 return DN
->getIDom()->getBlock();
864 BasicBlock
*HexagonCommonGEP::adjustForInvariance(GepNode
*Node
,
865 NodeChildrenMap
&NCM
, NodeToValueMap
&Loc
) {
866 // Find the "topmost" location for Node: it must be dominated by both,
867 // its parent (or the BaseVal, if it's a root node), and by the index
870 if (Node
->Flags
& GepNode::Root
) {
871 if (Instruction
*PIn
= dyn_cast
<Instruction
>(Node
->BaseVal
))
872 Bs
.push_back(PIn
->getParent());
874 Bs
.push_back(Loc
[Node
->Parent
]);
876 if (Instruction
*IIn
= dyn_cast
<Instruction
>(Node
->Idx
))
877 Bs
.push_back(IIn
->getParent());
878 BasicBlock
*TopB
= nearest_common_dominatee(DT
, Bs
);
880 // Traverse the loop nest upwards until we find a loop in which Node
881 // is no longer invariant, or until we get to the upper limit of Node's
882 // placement. The traversal will also stop when a suitable "preheader"
883 // cannot be found for a given loop. The "preheader" may actually be
884 // a regular block outside of the loop (i.e. not guarded), in which case
885 // the Node will be speculated.
886 // For nodes that are not in the main path of the containing loop (i.e.
887 // are not executed in each iteration), do not move them out of the loop.
888 BasicBlock
*LocB
= cast_or_null
<BasicBlock
>(Loc
[Node
]);
890 Loop
*Lp
= LI
->getLoopFor(LocB
);
892 if (!isInvariantIn(Node
, Lp
) || !isInMainPath(LocB
, Lp
))
894 BasicBlock
*NewLoc
= preheader(DT
, Lp
);
895 if (!NewLoc
|| !DT
->dominates(TopB
, NewLoc
))
897 Lp
= Lp
->getParentLoop();
903 // Recursively compute the locations of all children nodes.
904 NodeChildrenMap::iterator CF
= NCM
.find(Node
);
905 if (CF
!= NCM
.end()) {
906 NodeVect
&Cs
= CF
->second
;
907 for (GepNode
*C
: Cs
)
908 adjustForInvariance(C
, NCM
, Loc
);
915 struct LocationAsBlock
{
916 LocationAsBlock(const NodeToValueMap
&L
) : Map(L
) {}
918 const NodeToValueMap
&Map
;
921 raw_ostream
&operator<< (raw_ostream
&OS
,
922 const LocationAsBlock
&Loc
) LLVM_ATTRIBUTE_UNUSED
;
923 raw_ostream
&operator<< (raw_ostream
&OS
, const LocationAsBlock
&Loc
) {
924 for (const auto &I
: Loc
.Map
) {
925 OS
<< I
.first
<< " -> ";
926 if (BasicBlock
*B
= cast_or_null
<BasicBlock
>(I
.second
))
927 OS
<< B
->getName() << '(' << B
<< ')';
929 OS
<< "<null-block>";
935 inline bool is_constant(GepNode
*N
) {
936 return isa
<ConstantInt
>(N
->Idx
);
939 } // end anonymous namespace
941 void HexagonCommonGEP::separateChainForNode(GepNode
*Node
, Use
*U
,
942 NodeToValueMap
&Loc
) {
943 User
*R
= U
->getUser();
944 LLVM_DEBUG(dbgs() << "Separating chain for node (" << Node
<< ") user: " << *R
946 BasicBlock
*PB
= cast
<Instruction
>(R
)->getParent();
949 GepNode
*C
= nullptr, *NewNode
= nullptr;
950 while (is_constant(N
) && !(N
->Flags
& GepNode::Root
)) {
951 // XXX if (single-use) dont-replicate;
952 GepNode
*NewN
= new (*Mem
) GepNode(N
);
953 Nodes
.push_back(NewN
);
958 NewN
->Flags
&= ~GepNode::Used
;
967 // Move over all uses that share the same user as U from Node to NewNode.
968 NodeToUsesMap::iterator UF
= Uses
.find(Node
);
969 assert(UF
!= Uses
.end());
970 UseSet
&Us
= UF
->second
;
973 if (U
->getUser() == R
)
977 Us
.remove(U
); // erase takes an iterator.
980 Node
->Flags
&= ~GepNode::Used
;
984 // Should at least have U in NewUs.
985 NewNode
->Flags
|= GepNode::Used
;
986 LLVM_DEBUG(dbgs() << "new node: " << NewNode
<< " " << *NewNode
<< '\n');
987 assert(!NewUs
.empty());
988 Uses
[NewNode
] = NewUs
;
991 void HexagonCommonGEP::separateConstantChains(GepNode
*Node
,
992 NodeChildrenMap
&NCM
, NodeToValueMap
&Loc
) {
993 // First approximation: extract all chains.
995 nodes_for_root(Node
, NCM
, Ns
);
997 LLVM_DEBUG(dbgs() << "Separating constant chains for node: " << Node
<< '\n');
998 // Collect all used nodes together with the uses from loads and stores,
999 // where the GEP node could be folded into the load/store instruction.
1000 NodeToUsesMap FNs
; // Foldable nodes.
1001 for (GepNode
*N
: Ns
) {
1002 if (!(N
->Flags
& GepNode::Used
))
1004 NodeToUsesMap::iterator UF
= Uses
.find(N
);
1005 assert(UF
!= Uses
.end());
1006 UseSet
&Us
= UF
->second
;
1007 // Loads/stores that use the node N.
1010 User
*R
= U
->getUser();
1011 // We're interested in uses that provide the address. It can happen
1012 // that the value may also be provided via GEP, but we won't handle
1013 // those cases here for now.
1014 if (LoadInst
*Ld
= dyn_cast
<LoadInst
>(R
)) {
1015 unsigned PtrX
= LoadInst::getPointerOperandIndex();
1016 if (&Ld
->getOperandUse(PtrX
) == U
)
1018 } else if (StoreInst
*St
= dyn_cast
<StoreInst
>(R
)) {
1019 unsigned PtrX
= StoreInst::getPointerOperandIndex();
1020 if (&St
->getOperandUse(PtrX
) == U
)
1024 // Even if the total use count is 1, separating the chain may still be
1025 // beneficial, since the constant chain may be longer than the GEP alone
1026 // would be (e.g. if the parent node has a constant index and also has
1029 FNs
.insert(std::make_pair(N
, LSs
));
1032 LLVM_DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs
);
1034 for (auto &FN
: FNs
) {
1035 GepNode
*N
= FN
.first
;
1036 UseSet
&Us
= FN
.second
;
1038 separateChainForNode(N
, U
, Loc
);
1042 void HexagonCommonGEP::computeNodePlacement(NodeToValueMap
&Loc
) {
1043 // Compute the inverse of the Node.Parent links. Also, collect the set
1045 NodeChildrenMap NCM
;
1047 invert_find_roots(Nodes
, NCM
, Roots
);
1049 // Compute the initial placement determined by the users' locations, and
1050 // the locations of the child nodes.
1051 for (GepNode
*Root
: Roots
)
1052 recalculatePlacementRec(Root
, NCM
, Loc
);
1054 LLVM_DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc
));
1057 for (GepNode
*Root
: Roots
)
1058 adjustForInvariance(Root
, NCM
, Loc
);
1060 LLVM_DEBUG(dbgs() << "Node placement after adjustment for invariance:\n"
1061 << LocationAsBlock(Loc
));
1063 if (OptEnableConst
) {
1064 for (GepNode
*Root
: Roots
)
1065 separateConstantChains(Root
, NCM
, Loc
);
1067 LLVM_DEBUG(dbgs() << "Node use information:\n" << Uses
);
1069 // At the moment, there is no further refinement of the initial placement.
1070 // Such a refinement could include splitting the nodes if they are placed
1071 // too far from some of its users.
1073 LLVM_DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc
));
1076 Value
*HexagonCommonGEP::fabricateGEP(NodeVect
&NA
, BasicBlock::iterator At
,
1078 LLVM_DEBUG(dbgs() << "Fabricating GEP in " << LocB
->getName()
1081 unsigned Num
= NA
.size();
1082 GepNode
*RN
= NA
[0];
1083 assert((RN
->Flags
& GepNode::Root
) && "Creating GEP for non-root");
1085 GetElementPtrInst
*NewInst
= nullptr;
1086 Value
*Input
= RN
->BaseVal
;
1087 Type
*InpTy
= RN
->PTy
;
1091 SmallVector
<Value
*, 4> IdxList
;
1092 // If the type of the input of the first node is not a pointer,
1093 // we need to add an artificial i32 0 to the indices (because the
1094 // actual input in the IR will be a pointer).
1095 if (!(NA
[Idx
]->Flags
& GepNode::Pointer
)) {
1096 Type
*Int32Ty
= Type::getInt32Ty(*Ctx
);
1097 IdxList
.push_back(ConstantInt::get(Int32Ty
, 0));
1100 // Keep adding indices from NA until we have to stop and generate
1101 // an "intermediate" GEP.
1102 while (++Idx
<= Num
) {
1103 GepNode
*N
= NA
[Idx
-1];
1104 IdxList
.push_back(N
->Idx
);
1106 // We have to stop if we reach a pointer.
1107 if (NA
[Idx
]->Flags
& GepNode::Pointer
)
1111 NewInst
= GetElementPtrInst::Create(InpTy
, Input
, IdxList
, "cgep", At
);
1112 NewInst
->setIsInBounds(RN
->Flags
& GepNode::InBounds
);
1113 LLVM_DEBUG(dbgs() << "new GEP: " << *NewInst
<< '\n');
1116 InpTy
= NA
[Idx
]->PTy
;
1118 } while (Idx
<= Num
);
1123 void HexagonCommonGEP::getAllUsersForNode(GepNode
*Node
, ValueVect
&Values
,
1124 NodeChildrenMap
&NCM
) {
1126 Work
.push_back(Node
);
1128 while (!Work
.empty()) {
1129 NodeVect::iterator First
= Work
.begin();
1130 GepNode
*N
= *First
;
1132 if (N
->Flags
& GepNode::Used
) {
1133 NodeToUsesMap::iterator UF
= Uses
.find(N
);
1134 assert(UF
!= Uses
.end() && "No use information for used node");
1135 UseSet
&Us
= UF
->second
;
1136 for (const auto &U
: Us
)
1137 Values
.push_back(U
->getUser());
1139 NodeChildrenMap::iterator CF
= NCM
.find(N
);
1140 if (CF
!= NCM
.end()) {
1141 NodeVect
&Cs
= CF
->second
;
1142 llvm::append_range(Work
, Cs
);
1147 void HexagonCommonGEP::materialize(NodeToValueMap
&Loc
) {
1148 LLVM_DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes
<< '\n');
1149 NodeChildrenMap NCM
;
1151 // Compute the inversion again, since computing placement could alter
1152 // "parent" relation between nodes.
1153 invert_find_roots(Nodes
, NCM
, Roots
);
1155 while (!Roots
.empty()) {
1156 NodeVect::iterator First
= Roots
.begin();
1157 GepNode
*Root
= *First
, *Last
= *First
;
1160 NodeVect NA
; // Nodes to assemble.
1161 // Append to NA all child nodes up to (and including) the first child
1163 // (1) has more than 1 child, or
1165 // (3) has a child located in a different block.
1166 bool LastUsed
= false;
1167 unsigned LastCN
= 0;
1168 // The location may be null if the computation failed (it can legitimately
1169 // happen for nodes created from dead GEPs).
1170 Value
*LocV
= Loc
[Last
];
1173 BasicBlock
*LastB
= cast
<BasicBlock
>(LocV
);
1176 LastUsed
= (Last
->Flags
& GepNode::Used
);
1179 NodeChildrenMap::iterator CF
= NCM
.find(Last
);
1180 LastCN
= (CF
!= NCM
.end()) ? CF
->second
.size() : 0;
1183 GepNode
*Child
= CF
->second
.front();
1184 BasicBlock
*ChildB
= cast_or_null
<BasicBlock
>(Loc
[Child
]);
1185 if (ChildB
!= nullptr && LastB
!= ChildB
)
1190 BasicBlock::iterator InsertAt
= LastB
->getTerminator()->getIterator();
1191 if (LastUsed
|| LastCN
> 0) {
1193 getAllUsersForNode(Root
, Urs
, NCM
);
1194 BasicBlock::iterator FirstUse
= first_use_of_in_block(Urs
, LastB
);
1195 if (FirstUse
!= LastB
->end())
1196 InsertAt
= FirstUse
;
1199 // Generate a new instruction for NA.
1200 Value
*NewInst
= fabricateGEP(NA
, InsertAt
, LastB
);
1202 // Convert all the children of Last node into roots, and append them
1203 // to the Roots list.
1205 NodeVect
&Cs
= NCM
[Last
];
1206 for (GepNode
*CN
: Cs
) {
1207 CN
->Flags
&= ~GepNode::Internal
;
1208 CN
->Flags
|= GepNode::Root
;
1209 CN
->BaseVal
= NewInst
;
1210 Roots
.push_back(CN
);
1214 // Lastly, if the Last node was used, replace all uses with the new GEP.
1215 // The uses reference the original GEP values.
1217 NodeToUsesMap::iterator UF
= Uses
.find(Last
);
1218 assert(UF
!= Uses
.end() && "No use information found");
1219 UseSet
&Us
= UF
->second
;
1226 void HexagonCommonGEP::removeDeadCode() {
1228 BO
.push_back(&Fn
->front());
1230 for (unsigned i
= 0; i
< BO
.size(); ++i
) {
1231 BasicBlock
*B
= cast
<BasicBlock
>(BO
[i
]);
1232 for (auto *DTN
: children
<DomTreeNode
*>(DT
->getNode(B
)))
1233 BO
.push_back(DTN
->getBlock());
1236 for (Value
*V
: llvm::reverse(BO
)) {
1237 BasicBlock
*B
= cast
<BasicBlock
>(V
);
1239 for (Instruction
&I
: llvm::reverse(*B
))
1241 for (Value
*I
: Ins
) {
1242 Instruction
*In
= cast
<Instruction
>(I
);
1243 if (isInstructionTriviallyDead(In
))
1244 In
->eraseFromParent();
1249 bool HexagonCommonGEP::runOnFunction(Function
&F
) {
1250 if (skipFunction(F
))
1253 // For now bail out on C++ exception handling.
1254 for (const BasicBlock
&BB
: F
)
1255 for (const Instruction
&I
: BB
)
1256 if (isa
<InvokeInst
>(I
) || isa
<LandingPadInst
>(I
))
1260 DT
= &getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1261 PDT
= &getAnalysis
<PostDominatorTreeWrapperPass
>().getPostDomTree();
1262 LI
= &getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
1263 Ctx
= &F
.getContext();
1269 SpecificBumpPtrAllocator
<GepNode
> Allocator
;
1276 computeNodePlacement(Loc
);
1280 #ifdef EXPENSIVE_CHECKS
1281 // Run this only when expensive checks are enabled.
1282 if (verifyFunction(F
, &dbgs()))
1283 report_fatal_error("Broken function");
1290 FunctionPass
*createHexagonCommonGEP() {
1291 return new HexagonCommonGEP();
1294 } // end namespace llvm