1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
15 /// The pass analyzes call graph and propagates ABI target features through the
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
27 //===----------------------------------------------------------------------===//
30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/CodeGen/TargetPassConfig.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/IR/InstrTypes.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Transforms/Utils/Cloning.h"
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
44 extern const SubtargetFeatureKV AMDGPUFeatureKV
[AMDGPU::NumSubtargetFeatures
-1];
49 // Target features to propagate.
50 static constexpr const FeatureBitset TargetFeatures
= {
51 AMDGPU::FeatureWavefrontSize16
,
52 AMDGPU::FeatureWavefrontSize32
,
53 AMDGPU::FeatureWavefrontSize64
56 // Attributes to propagate.
57 // TODO: Support conservative min/max merging instead of cloning.
58 static constexpr const char* AttributeNames
[] = {
59 "amdgpu-waves-per-eu",
60 "amdgpu-flat-work-group-size"
63 static constexpr unsigned NumAttr
=
64 sizeof(AttributeNames
) / sizeof(AttributeNames
[0]);
66 class AMDGPUPropagateAttributes
{
70 explicit FnProperties(const FeatureBitset
&&FB
) : Features(FB
) {}
73 explicit FnProperties(const TargetMachine
&TM
, const Function
&F
) {
74 Features
= TM
.getSubtargetImpl(F
)->getFeatureBits();
76 for (unsigned I
= 0; I
< NumAttr
; ++I
)
77 if (F
.hasFnAttribute(AttributeNames
[I
]))
78 Attributes
[I
] = F
.getFnAttribute(AttributeNames
[I
]);
81 bool operator == (const FnProperties
&Other
) const {
82 if ((Features
& TargetFeatures
) != (Other
.Features
& TargetFeatures
))
84 for (unsigned I
= 0; I
< NumAttr
; ++I
)
85 if (Attributes
[I
] != Other
.Attributes
[I
])
90 FnProperties
adjustToCaller(const FnProperties
&CallerProps
) const {
91 FnProperties
New((Features
& ~TargetFeatures
) | CallerProps
.Features
);
92 for (unsigned I
= 0; I
< NumAttr
; ++I
)
93 New
.Attributes
[I
] = CallerProps
.Attributes
[I
];
97 FeatureBitset Features
;
98 Optional
<Attribute
> Attributes
[NumAttr
];
103 Clone(const FnProperties
&Props
, Function
*OrigF
, Function
*NewF
) :
104 Properties(Props
), OrigF(OrigF
), NewF(NewF
) {}
106 FnProperties Properties
;
111 const TargetMachine
*TM
;
113 // Clone functions as needed or just set attributes.
116 // Option propagation roots.
117 SmallSet
<Function
*, 32> Roots
;
119 // Clones of functions with their attributes.
120 SmallVector
<Clone
, 32> Clones
;
122 // Find a clone with required features.
123 Function
*findFunction(const FnProperties
&PropsNeeded
,
126 // Clone function \p F and set \p NewProps on the clone.
127 // Cole takes the name of original function.
128 Function
*cloneWithProperties(Function
&F
, const FnProperties
&NewProps
);
130 // Set new function's features in place.
131 void setFeatures(Function
&F
, const FeatureBitset
&NewFeatures
);
133 // Set new function's attributes in place.
134 void setAttributes(Function
&F
, const ArrayRef
<Optional
<Attribute
>> NewAttrs
);
136 std::string
getFeatureString(const FeatureBitset
&Features
) const;
138 // Propagate attributes from Roots.
142 AMDGPUPropagateAttributes(const TargetMachine
*TM
, bool AllowClone
) :
143 TM(TM
), AllowClone(AllowClone
) {}
145 // Use F as a root and propagate its attributes.
146 bool process(Function
&F
);
148 // Propagate attributes starting from kernel functions.
149 bool process(Module
&M
);
152 // Allows to propagate attributes early, but no clonning is allowed as it must
153 // be a function pass to run before any optimizations.
154 // TODO: We shall only need a one instance of module pass, but that needs to be
155 // in the linker pipeline which is currently not possible.
156 class AMDGPUPropagateAttributesEarly
: public FunctionPass
{
157 const TargetMachine
*TM
;
160 static char ID
; // Pass identification
162 AMDGPUPropagateAttributesEarly(const TargetMachine
*TM
= nullptr) :
163 FunctionPass(ID
), TM(TM
) {
164 initializeAMDGPUPropagateAttributesEarlyPass(
165 *PassRegistry::getPassRegistry());
168 bool runOnFunction(Function
&F
) override
;
171 // Allows to propagate attributes with clonning but does that late in the
173 class AMDGPUPropagateAttributesLate
: public ModulePass
{
174 const TargetMachine
*TM
;
177 static char ID
; // Pass identification
179 AMDGPUPropagateAttributesLate(const TargetMachine
*TM
= nullptr) :
180 ModulePass(ID
), TM(TM
) {
181 initializeAMDGPUPropagateAttributesLatePass(
182 *PassRegistry::getPassRegistry());
185 bool runOnModule(Module
&M
) override
;
188 } // end anonymous namespace.
190 char AMDGPUPropagateAttributesEarly::ID
= 0;
191 char AMDGPUPropagateAttributesLate::ID
= 0;
193 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly
,
194 "amdgpu-propagate-attributes-early",
195 "Early propagate attributes from kernels to functions",
197 INITIALIZE_PASS(AMDGPUPropagateAttributesLate
,
198 "amdgpu-propagate-attributes-late",
199 "Late propagate attributes from kernels to functions",
203 AMDGPUPropagateAttributes::findFunction(const FnProperties
&PropsNeeded
,
205 // TODO: search for clone's clones.
206 for (Clone
&C
: Clones
)
207 if (C
.OrigF
== OrigF
&& PropsNeeded
== C
.Properties
)
213 bool AMDGPUPropagateAttributes::process(Module
&M
) {
214 for (auto &F
: M
.functions())
215 if (AMDGPU::isEntryFunctionCC(F
.getCallingConv()))
221 bool AMDGPUPropagateAttributes::process(Function
&F
) {
226 bool AMDGPUPropagateAttributes::process() {
227 bool Changed
= false;
228 SmallSet
<Function
*, 32> NewRoots
;
229 SmallSet
<Function
*, 32> Replaced
;
233 Module
&M
= *(*Roots
.begin())->getParent();
236 Roots
.insert(NewRoots
.begin(), NewRoots
.end());
239 for (auto &F
: M
.functions()) {
240 if (F
.isDeclaration())
243 const FnProperties
CalleeProps(*TM
, F
);
244 SmallVector
<std::pair
<CallBase
*, Function
*>, 32> ToReplace
;
245 SmallSet
<CallBase
*, 32> Visited
;
247 for (User
*U
: F
.users()) {
248 Instruction
*I
= dyn_cast
<Instruction
>(U
);
251 CallBase
*CI
= dyn_cast
<CallBase
>(I
);
252 // Only propagate attributes if F is the called function. Specifically,
253 // do not propagate attributes if F is passed as an argument.
254 // FIXME: handle bitcasted callee, e.g.
255 // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
256 if (!CI
|| CI
->getCalledOperand() != &F
)
258 Function
*Caller
= CI
->getCaller();
259 if (!Caller
|| !Visited
.insert(CI
).second
)
261 if (!Roots
.count(Caller
) && !NewRoots
.count(Caller
))
264 const FnProperties
CallerProps(*TM
, *Caller
);
266 if (CalleeProps
== CallerProps
) {
267 if (!Roots
.count(&F
))
272 Function
*NewF
= findFunction(CallerProps
, &F
);
274 const FnProperties NewProps
= CalleeProps
.adjustToCaller(CallerProps
);
276 // This may set different features on different iteartions if
277 // there is a contradiction in callers' attributes. In this case
278 // we rely on a second pass running on Module, which is allowed
280 setFeatures(F
, NewProps
.Features
);
281 setAttributes(F
, NewProps
.Attributes
);
287 NewF
= cloneWithProperties(F
, NewProps
);
288 Clones
.push_back(Clone(CallerProps
, &F
, NewF
));
289 NewRoots
.insert(NewF
);
292 ToReplace
.push_back(std::make_pair(CI
, NewF
));
298 while (!ToReplace
.empty()) {
299 auto R
= ToReplace
.pop_back_val();
300 R
.first
->setCalledFunction(R
.second
);
303 } while (!NewRoots
.empty());
305 for (Function
*F
: Replaced
) {
307 F
->eraseFromParent();
317 AMDGPUPropagateAttributes::cloneWithProperties(Function
&F
,
318 const FnProperties
&NewProps
) {
319 LLVM_DEBUG(dbgs() << "Cloning " << F
.getName() << '\n');
321 ValueToValueMapTy dummy
;
322 Function
*NewF
= CloneFunction(&F
, dummy
);
323 setFeatures(*NewF
, NewProps
.Features
);
324 setAttributes(*NewF
, NewProps
.Attributes
);
325 NewF
->setVisibility(GlobalValue::DefaultVisibility
);
326 NewF
->setLinkage(GlobalValue::InternalLinkage
);
328 // Swap names. If that is the only clone it will retain the name of now
329 // dead value. Preserve original name for externally visible functions.
330 if (F
.hasName() && F
.hasLocalLinkage()) {
331 std::string NewName
= std::string(NewF
->getName());
339 void AMDGPUPropagateAttributes::setFeatures(Function
&F
,
340 const FeatureBitset
&NewFeatures
) {
341 std::string NewFeatureStr
= getFeatureString(NewFeatures
);
343 LLVM_DEBUG(dbgs() << "Set features "
344 << getFeatureString(NewFeatures
& TargetFeatures
)
345 << " on " << F
.getName() << '\n');
347 F
.removeFnAttr("target-features");
348 F
.addFnAttr("target-features", NewFeatureStr
);
351 void AMDGPUPropagateAttributes::setAttributes(Function
&F
,
352 const ArrayRef
<Optional
<Attribute
>> NewAttrs
) {
353 LLVM_DEBUG(dbgs() << "Set attributes on " << F
.getName() << ":\n");
354 for (unsigned I
= 0; I
< NumAttr
; ++I
) {
355 F
.removeFnAttr(AttributeNames
[I
]);
357 LLVM_DEBUG(dbgs() << '\t' << NewAttrs
[I
]->getAsString() << '\n');
358 F
.addFnAttr(*NewAttrs
[I
]);
364 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset
&Features
) const
367 for (const SubtargetFeatureKV
&KV
: AMDGPUFeatureKV
) {
368 if (Features
[KV
.Value
])
369 Ret
+= (StringRef("+") + KV
.Key
+ ",").str();
370 else if (TargetFeatures
[KV
.Value
])
371 Ret
+= (StringRef("-") + KV
.Key
+ ",").str();
373 Ret
.pop_back(); // Remove last comma.
377 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function
&F
) {
379 auto *TPC
= getAnalysisIfAvailable
<TargetPassConfig
>();
383 TM
= &TPC
->getTM
<TargetMachine
>();
386 if (!AMDGPU::isEntryFunctionCC(F
.getCallingConv()))
389 return AMDGPUPropagateAttributes(TM
, false).process(F
);
392 bool AMDGPUPropagateAttributesLate::runOnModule(Module
&M
) {
394 auto *TPC
= getAnalysisIfAvailable
<TargetPassConfig
>();
398 TM
= &TPC
->getTM
<TargetMachine
>();
401 return AMDGPUPropagateAttributes(TM
, true).process(M
);
405 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine
*TM
) {
406 return new AMDGPUPropagateAttributesEarly(TM
);
410 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine
*TM
) {
411 return new AMDGPUPropagateAttributesLate(TM
);
415 AMDGPUPropagateAttributesEarlyPass::run(Function
&F
,
416 FunctionAnalysisManager
&AM
) {
417 if (!AMDGPU::isEntryFunctionCC(F
.getCallingConv()))
418 return PreservedAnalyses::all();
420 return AMDGPUPropagateAttributes(&TM
, false).process(F
)
421 ? PreservedAnalyses::none()
422 : PreservedAnalyses::all();
426 AMDGPUPropagateAttributesLatePass::run(Module
&M
, ModuleAnalysisManager
&AM
) {
427 return AMDGPUPropagateAttributes(&TM
, true).process(M
)
428 ? PreservedAnalyses::none()
429 : PreservedAnalyses::all();