1 //===- SplitModule.cpp - Split a module into partitions -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the function llvm::SplitModule, which splits a module
10 // into multiple linkable partitions. It can be used to implement parallel code
11 // generation for link-time optimization.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/SplitModule.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/EquivalenceClasses.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/IR/Comdat.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/GlobalAlias.h"
26 #include "llvm/IR/GlobalObject.h"
27 #include "llvm/IR/GlobalIndirectSymbol.h"
28 #include "llvm/IR/GlobalValue.h"
29 #include "llvm/IR/GlobalVariable.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/User.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/MD5.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Transforms/Utils/Cloning.h"
40 #include "llvm/Transforms/Utils/ValueMapper.h"
51 #define DEBUG_TYPE "split-module"
55 using ClusterMapType
= EquivalenceClasses
<const GlobalValue
*>;
56 using ComdatMembersType
= DenseMap
<const Comdat
*, const GlobalValue
*>;
57 using ClusterIDMapType
= DenseMap
<const GlobalValue
*, unsigned>;
59 } // end anonymous namespace
61 static void addNonConstUser(ClusterMapType
&GVtoClusterMap
,
62 const GlobalValue
*GV
, const User
*U
) {
63 assert((!isa
<Constant
>(U
) || isa
<GlobalValue
>(U
)) && "Bad user");
65 if (const Instruction
*I
= dyn_cast
<Instruction
>(U
)) {
66 const GlobalValue
*F
= I
->getParent()->getParent();
67 GVtoClusterMap
.unionSets(GV
, F
);
68 } else if (isa
<GlobalIndirectSymbol
>(U
) || isa
<Function
>(U
) ||
69 isa
<GlobalVariable
>(U
)) {
70 GVtoClusterMap
.unionSets(GV
, cast
<GlobalValue
>(U
));
72 llvm_unreachable("Underimplemented use case");
76 // Adds all GlobalValue users of V to the same cluster as GV.
77 static void addAllGlobalValueUsers(ClusterMapType
&GVtoClusterMap
,
78 const GlobalValue
*GV
, const Value
*V
) {
79 for (auto *U
: V
->users()) {
80 SmallVector
<const User
*, 4> Worklist
;
81 Worklist
.push_back(U
);
82 while (!Worklist
.empty()) {
83 const User
*UU
= Worklist
.pop_back_val();
84 // For each constant that is not a GV (a pure const) recurse.
85 if (isa
<Constant
>(UU
) && !isa
<GlobalValue
>(UU
)) {
86 Worklist
.append(UU
->user_begin(), UU
->user_end());
89 addNonConstUser(GVtoClusterMap
, GV
, UU
);
94 // Find partitions for module in the way that no locals need to be
96 // Try to balance pack those partitions into N files since this roughly equals
97 // thread balancing for the backend codegen step.
98 static void findPartitions(Module
*M
, ClusterIDMapType
&ClusterIDMap
,
100 // At this point module should have the proper mix of globals and locals.
101 // As we attempt to partition this module, we must not change any
102 // locals to globals.
103 LLVM_DEBUG(dbgs() << "Partition module with (" << M
->size()
105 ClusterMapType GVtoClusterMap
;
106 ComdatMembersType ComdatMembers
;
108 auto recordGVSet
= [&GVtoClusterMap
, &ComdatMembers
](GlobalValue
&GV
) {
109 if (GV
.isDeclaration())
113 GV
.setName("__llvmsplit_unnamed");
115 // Comdat groups must not be partitioned. For comdat groups that contain
116 // locals, record all their members here so we can keep them together.
117 // Comdat groups that only contain external globals are already handled by
118 // the MD5-based partitioning.
119 if (const Comdat
*C
= GV
.getComdat()) {
120 auto &Member
= ComdatMembers
[C
];
122 GVtoClusterMap
.unionSets(Member
, &GV
);
127 // For aliases we should not separate them from their aliasees regardless
129 if (auto *GIS
= dyn_cast
<GlobalIndirectSymbol
>(&GV
)) {
130 if (const GlobalObject
*Base
= GIS
->getBaseObject())
131 GVtoClusterMap
.unionSets(&GV
, Base
);
134 if (const Function
*F
= dyn_cast
<Function
>(&GV
)) {
135 for (const BasicBlock
&BB
: *F
) {
136 BlockAddress
*BA
= BlockAddress::lookup(&BB
);
137 if (!BA
|| !BA
->isConstantUsed())
139 addAllGlobalValueUsers(GVtoClusterMap
, F
, BA
);
143 if (GV
.hasLocalLinkage())
144 addAllGlobalValueUsers(GVtoClusterMap
, &GV
, &GV
);
147 llvm::for_each(M
->functions(), recordGVSet
);
148 llvm::for_each(M
->globals(), recordGVSet
);
149 llvm::for_each(M
->aliases(), recordGVSet
);
151 // Assigned all GVs to merged clusters while balancing number of objects in
153 auto CompareClusters
= [](const std::pair
<unsigned, unsigned> &a
,
154 const std::pair
<unsigned, unsigned> &b
) {
155 if (a
.second
|| b
.second
)
156 return a
.second
> b
.second
;
158 return a
.first
> b
.first
;
161 std::priority_queue
<std::pair
<unsigned, unsigned>,
162 std::vector
<std::pair
<unsigned, unsigned>>,
163 decltype(CompareClusters
)>
164 BalancinQueue(CompareClusters
);
165 // Pre-populate priority queue with N slot blanks.
166 for (unsigned i
= 0; i
< N
; ++i
)
167 BalancinQueue
.push(std::make_pair(i
, 0));
169 using SortType
= std::pair
<unsigned, ClusterMapType::iterator
>;
171 SmallVector
<SortType
, 64> Sets
;
172 SmallPtrSet
<const GlobalValue
*, 32> Visited
;
174 // To guarantee determinism, we have to sort SCC according to size.
175 // When size is the same, use leader's name.
176 for (ClusterMapType::iterator I
= GVtoClusterMap
.begin(),
177 E
= GVtoClusterMap
.end(); I
!= E
; ++I
)
180 std::make_pair(std::distance(GVtoClusterMap
.member_begin(I
),
181 GVtoClusterMap
.member_end()), I
));
183 llvm::sort(Sets
, [](const SortType
&a
, const SortType
&b
) {
184 if (a
.first
== b
.first
)
185 return a
.second
->getData()->getName() > b
.second
->getData()->getName();
187 return a
.first
> b
.first
;
190 for (auto &I
: Sets
) {
191 unsigned CurrentClusterID
= BalancinQueue
.top().first
;
192 unsigned CurrentClusterSize
= BalancinQueue
.top().second
;
195 LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID
<< "] cluster_size("
196 << I
.first
<< ") ----> " << I
.second
->getData()->getName()
199 for (ClusterMapType::member_iterator MI
=
200 GVtoClusterMap
.findLeader(I
.second
);
201 MI
!= GVtoClusterMap
.member_end(); ++MI
) {
202 if (!Visited
.insert(*MI
).second
)
204 LLVM_DEBUG(dbgs() << "----> " << (*MI
)->getName()
205 << ((*MI
)->hasLocalLinkage() ? " l " : " e ") << "\n");
207 ClusterIDMap
[*MI
] = CurrentClusterID
;
208 CurrentClusterSize
++;
210 // Add this set size to the number of entries in this cluster.
211 BalancinQueue
.push(std::make_pair(CurrentClusterID
, CurrentClusterSize
));
215 static void externalize(GlobalValue
*GV
) {
216 if (GV
->hasLocalLinkage()) {
217 GV
->setLinkage(GlobalValue::ExternalLinkage
);
218 GV
->setVisibility(GlobalValue::HiddenVisibility
);
221 // Unnamed entities must be named consistently between modules. setName will
222 // give a distinct name to each such entity.
224 GV
->setName("__llvmsplit_unnamed");
227 // Returns whether GV should be in partition (0-based) I of N.
228 static bool isInPartition(const GlobalValue
*GV
, unsigned I
, unsigned N
) {
229 if (auto *GIS
= dyn_cast
<GlobalIndirectSymbol
>(GV
))
230 if (const GlobalObject
*Base
= GIS
->getBaseObject())
234 if (const Comdat
*C
= GV
->getComdat())
237 Name
= GV
->getName();
239 // Partition by MD5 hash. We only need a few bits for evenness as the number
240 // of partitions will generally be in the 1-2 figure range; the low 16 bits
246 return (R
[0] | (R
[1] << 8)) % N
== I
;
249 void llvm::SplitModule(
250 std::unique_ptr
<Module
> M
, unsigned N
,
251 function_ref
<void(std::unique_ptr
<Module
> MPart
)> ModuleCallback
,
252 bool PreserveLocals
) {
253 if (!PreserveLocals
) {
254 for (Function
&F
: *M
)
256 for (GlobalVariable
&GV
: M
->globals())
258 for (GlobalAlias
&GA
: M
->aliases())
260 for (GlobalIFunc
&GIF
: M
->ifuncs())
264 // This performs splitting without a need for externalization, which might not
265 // always be possible.
266 ClusterIDMapType ClusterIDMap
;
267 findPartitions(M
.get(), ClusterIDMap
, N
);
269 // FIXME: We should be able to reuse M as the last partition instead of
271 for (unsigned I
= 0; I
< N
; ++I
) {
272 ValueToValueMapTy VMap
;
273 std::unique_ptr
<Module
> MPart(
274 CloneModule(*M
, VMap
, [&](const GlobalValue
*GV
) {
275 if (ClusterIDMap
.count(GV
))
276 return (ClusterIDMap
[GV
] == I
);
278 return isInPartition(GV
, I
, N
);
281 MPart
->setModuleInlineAsm("");
282 ModuleCallback(std::move(MPart
));