1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
15 /// The pass analyzes call graph and propagates ABI target features through the
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
27 //===----------------------------------------------------------------------===//
30 #include "AMDGPUSubtarget.h"
31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Transforms/Utils/Cloning.h"
41 #define DEBUG_TYPE "amdgpu-propagate-attributes"
46 extern const SubtargetFeatureKV AMDGPUFeatureKV
[AMDGPU::NumSubtargetFeatures
-1];
51 class AMDGPUPropagateAttributes
{
52 const FeatureBitset TargetFeatures
= {
53 AMDGPU::FeatureWavefrontSize16
,
54 AMDGPU::FeatureWavefrontSize32
,
55 AMDGPU::FeatureWavefrontSize64
60 Clone(FeatureBitset FeatureMask
, Function
*OrigF
, Function
*NewF
) :
61 FeatureMask(FeatureMask
), OrigF(OrigF
), NewF(NewF
) {}
63 FeatureBitset FeatureMask
;
68 const TargetMachine
*TM
;
70 // Clone functions as needed or just set attributes.
73 // Option propagation roots.
74 SmallSet
<Function
*, 32> Roots
;
76 // Clones of functions with their attributes.
77 SmallVector
<Clone
, 32> Clones
;
79 // Find a clone with required features.
80 Function
*findFunction(const FeatureBitset
&FeaturesNeeded
,
83 // Clone function F and set NewFeatures on the clone.
84 // Cole takes the name of original function.
85 Function
*cloneWithFeatures(Function
&F
,
86 const FeatureBitset
&NewFeatures
);
88 // Set new function's features in place.
89 void setFeatures(Function
&F
, const FeatureBitset
&NewFeatures
);
91 std::string
getFeatureString(const FeatureBitset
&Features
) const;
93 // Propagate attributes from Roots.
97 AMDGPUPropagateAttributes(const TargetMachine
*TM
, bool AllowClone
) :
98 TM(TM
), AllowClone(AllowClone
) {}
100 // Use F as a root and propagate its attributes.
101 bool process(Function
&F
);
103 // Propagate attributes starting from kernel functions.
104 bool process(Module
&M
);
107 // Allows to propagate attributes early, but no clonning is allowed as it must
108 // be a function pass to run before any optimizations.
109 // TODO: We shall only need a one instance of module pass, but that needs to be
110 // in the linker pipeline which is currently not possible.
111 class AMDGPUPropagateAttributesEarly
: public FunctionPass
{
112 const TargetMachine
*TM
;
115 static char ID
; // Pass identification
117 AMDGPUPropagateAttributesEarly(const TargetMachine
*TM
= nullptr) :
118 FunctionPass(ID
), TM(TM
) {
119 initializeAMDGPUPropagateAttributesEarlyPass(
120 *PassRegistry::getPassRegistry());
123 bool runOnFunction(Function
&F
) override
;
126 // Allows to propagate attributes with clonning but does that late in the
128 class AMDGPUPropagateAttributesLate
: public ModulePass
{
129 const TargetMachine
*TM
;
132 static char ID
; // Pass identification
134 AMDGPUPropagateAttributesLate(const TargetMachine
*TM
= nullptr) :
135 ModulePass(ID
), TM(TM
) {
136 initializeAMDGPUPropagateAttributesLatePass(
137 *PassRegistry::getPassRegistry());
140 bool runOnModule(Module
&M
) override
;
143 } // end anonymous namespace.
145 char AMDGPUPropagateAttributesEarly::ID
= 0;
146 char AMDGPUPropagateAttributesLate::ID
= 0;
148 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly
,
149 "amdgpu-propagate-attributes-early",
150 "Early propagate attributes from kernels to functions",
152 INITIALIZE_PASS(AMDGPUPropagateAttributesLate
,
153 "amdgpu-propagate-attributes-late",
154 "Late propagate attributes from kernels to functions",
158 AMDGPUPropagateAttributes::findFunction(const FeatureBitset
&FeaturesNeeded
,
160 // TODO: search for clone's clones.
161 for (Clone
&C
: Clones
)
162 if (C
.OrigF
== OrigF
&& FeaturesNeeded
== C
.FeatureMask
)
168 bool AMDGPUPropagateAttributes::process(Module
&M
) {
169 for (auto &F
: M
.functions())
170 if (AMDGPU::isEntryFunctionCC(F
.getCallingConv()))
176 bool AMDGPUPropagateAttributes::process(Function
&F
) {
181 bool AMDGPUPropagateAttributes::process() {
182 bool Changed
= false;
183 SmallSet
<Function
*, 32> NewRoots
;
184 SmallSet
<Function
*, 32> Replaced
;
188 Module
&M
= *(*Roots
.begin())->getParent();
191 Roots
.insert(NewRoots
.begin(), NewRoots
.end());
194 for (auto &F
: M
.functions()) {
195 if (F
.isDeclaration() || Roots
.count(&F
) || Roots
.count(&F
))
198 const FeatureBitset
&CalleeBits
=
199 TM
->getSubtargetImpl(F
)->getFeatureBits();
200 SmallVector
<std::pair
<CallBase
*, Function
*>, 32> ToReplace
;
202 for (User
*U
: F
.users()) {
203 Instruction
*I
= dyn_cast
<Instruction
>(U
);
206 CallBase
*CI
= dyn_cast
<CallBase
>(I
);
209 Function
*Caller
= CI
->getCaller();
212 if (!Roots
.count(Caller
))
215 const FeatureBitset
&CallerBits
=
216 TM
->getSubtargetImpl(*Caller
)->getFeatureBits() & TargetFeatures
;
218 if (CallerBits
== (CalleeBits
& TargetFeatures
)) {
223 Function
*NewF
= findFunction(CallerBits
, &F
);
225 FeatureBitset
NewFeatures((CalleeBits
& ~TargetFeatures
) |
228 // This may set different features on different iteartions if
229 // there is a contradiction in callers' attributes. In this case
230 // we rely on a second pass running on Module, which is allowed
232 setFeatures(F
, NewFeatures
);
238 NewF
= cloneWithFeatures(F
, NewFeatures
);
239 Clones
.push_back(Clone(CallerBits
, &F
, NewF
));
240 NewRoots
.insert(NewF
);
243 ToReplace
.push_back(std::make_pair(CI
, NewF
));
249 while (!ToReplace
.empty()) {
250 auto R
= ToReplace
.pop_back_val();
251 R
.first
->setCalledFunction(R
.second
);
254 } while (!NewRoots
.empty());
256 for (Function
*F
: Replaced
) {
258 F
->eraseFromParent();
265 AMDGPUPropagateAttributes::cloneWithFeatures(Function
&F
,
266 const FeatureBitset
&NewFeatures
) {
267 LLVM_DEBUG(dbgs() << "Cloning " << F
.getName() << '\n');
269 ValueToValueMapTy dummy
;
270 Function
*NewF
= CloneFunction(&F
, dummy
);
271 setFeatures(*NewF
, NewFeatures
);
273 // Swap names. If that is the only clone it will retain the name of now
276 std::string NewName
= NewF
->getName();
280 // Name has changed, it does not need an external symbol.
281 F
.setVisibility(GlobalValue::DefaultVisibility
);
282 F
.setLinkage(GlobalValue::InternalLinkage
);
288 void AMDGPUPropagateAttributes::setFeatures(Function
&F
,
289 const FeatureBitset
&NewFeatures
) {
290 std::string NewFeatureStr
= getFeatureString(NewFeatures
);
292 LLVM_DEBUG(dbgs() << "Set features "
293 << getFeatureString(NewFeatures
& TargetFeatures
)
294 << " on " << F
.getName() << '\n');
296 F
.removeFnAttr("target-features");
297 F
.addFnAttr("target-features", NewFeatureStr
);
301 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset
&Features
) const
304 for (const SubtargetFeatureKV
&KV
: AMDGPUFeatureKV
) {
305 if (Features
[KV
.Value
])
306 Ret
+= (StringRef("+") + KV
.Key
+ ",").str();
307 else if (TargetFeatures
[KV
.Value
])
308 Ret
+= (StringRef("-") + KV
.Key
+ ",").str();
310 Ret
.pop_back(); // Remove last comma.
314 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function
&F
) {
315 if (!TM
|| !AMDGPU::isEntryFunctionCC(F
.getCallingConv()))
318 return AMDGPUPropagateAttributes(TM
, false).process(F
);
321 bool AMDGPUPropagateAttributesLate::runOnModule(Module
&M
) {
325 return AMDGPUPropagateAttributes(TM
, true).process(M
);
329 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine
*TM
) {
330 return new AMDGPUPropagateAttributesEarly(TM
);
334 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine
*TM
) {
335 return new AMDGPUPropagateAttributesLate(TM
);