1 //===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "AMDGPUSubtarget.h"
12 #include "llvm/CodeGen/TargetPassConfig.h"
13 #include "llvm/Analysis/TargetTransformInfo.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/Instructions.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/Module.h"
18 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
20 #define DEBUG_TYPE "amdgpu-lower-intrinsics"
26 const unsigned MaxStaticSize
= 1024;
28 class AMDGPULowerIntrinsics
: public ModulePass
{
30 bool makeLIDRangeMetadata(Function
&F
) const;
35 AMDGPULowerIntrinsics() : ModulePass(ID
) {}
37 bool runOnModule(Module
&M
) override
;
38 bool expandMemIntrinsicUses(Function
&F
);
39 StringRef
getPassName() const override
{
40 return "AMDGPU Lower Intrinsics";
43 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
44 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
50 char AMDGPULowerIntrinsics::ID
= 0;
52 char &llvm::AMDGPULowerIntrinsicsID
= AMDGPULowerIntrinsics::ID
;
54 INITIALIZE_PASS(AMDGPULowerIntrinsics
, DEBUG_TYPE
, "Lower intrinsics", false,
57 // TODO: Should refine based on estimated number of accesses (e.g. does it
58 // require splitting based on alignment)
59 static bool shouldExpandOperationWithSize(Value
*Size
) {
60 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Size
);
61 return !CI
|| (CI
->getZExtValue() > MaxStaticSize
);
64 bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function
&F
) {
65 Intrinsic::ID ID
= F
.getIntrinsicID();
68 for (auto I
= F
.user_begin(), E
= F
.user_end(); I
!= E
;) {
69 Instruction
*Inst
= cast
<Instruction
>(*I
);
73 case Intrinsic::memcpy
: {
74 auto *Memcpy
= cast
<MemCpyInst
>(Inst
);
75 if (shouldExpandOperationWithSize(Memcpy
->getLength())) {
76 Function
*ParentFunc
= Memcpy
->getParent()->getParent();
77 const TargetTransformInfo
&TTI
=
78 getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(*ParentFunc
);
79 expandMemCpyAsLoop(Memcpy
, TTI
);
81 Memcpy
->eraseFromParent();
86 case Intrinsic::memmove
: {
87 auto *Memmove
= cast
<MemMoveInst
>(Inst
);
88 if (shouldExpandOperationWithSize(Memmove
->getLength())) {
89 expandMemMoveAsLoop(Memmove
);
91 Memmove
->eraseFromParent();
96 case Intrinsic::memset
: {
97 auto *Memset
= cast
<MemSetInst
>(Inst
);
98 if (shouldExpandOperationWithSize(Memset
->getLength())) {
99 expandMemSetAsLoop(Memset
);
101 Memset
->eraseFromParent();
114 bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function
&F
) const {
115 auto *TPC
= getAnalysisIfAvailable
<TargetPassConfig
>();
119 const TargetMachine
&TM
= TPC
->getTM
<TargetMachine
>();
120 bool Changed
= false;
122 for (auto *U
: F
.users()) {
123 auto *CI
= dyn_cast
<CallInst
>(U
);
127 Changed
|= AMDGPUSubtarget::get(TM
, F
).makeLIDRangeMetadata(CI
);
132 bool AMDGPULowerIntrinsics::runOnModule(Module
&M
) {
133 bool Changed
= false;
135 for (Function
&F
: M
) {
136 if (!F
.isDeclaration())
139 switch (F
.getIntrinsicID()) {
140 case Intrinsic::memcpy
:
141 case Intrinsic::memmove
:
142 case Intrinsic::memset
:
143 if (expandMemIntrinsicUses(F
))
147 case Intrinsic::amdgcn_workitem_id_x
:
148 case Intrinsic::r600_read_tidig_x
:
149 case Intrinsic::amdgcn_workitem_id_y
:
150 case Intrinsic::r600_read_tidig_y
:
151 case Intrinsic::amdgcn_workitem_id_z
:
152 case Intrinsic::r600_read_tidig_z
:
153 case Intrinsic::r600_read_local_size_x
:
154 case Intrinsic::r600_read_local_size_y
:
155 case Intrinsic::r600_read_local_size_z
:
156 Changed
|= makeLIDRangeMetadata(F
);
167 ModulePass
*llvm::createAMDGPULowerIntrinsicsPass() {
168 return new AMDGPULowerIntrinsics();