[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / AMDGPULowerIntrinsics.cpp
blob15032969890e2bfc3f8daf4d78a5108b32965e9f
1 //===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "AMDGPU.h"
10 #include "AMDGPUSubtarget.h"
11 #include "llvm/CodeGen/TargetPassConfig.h"
12 #include "llvm/Analysis/TargetTransformInfo.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
19 #define DEBUG_TYPE "amdgpu-lower-intrinsics"
21 using namespace llvm;
23 namespace {
25 const unsigned MaxStaticSize = 1024;
27 class AMDGPULowerIntrinsics : public ModulePass {
28 private:
29 bool makeLIDRangeMetadata(Function &F) const;
31 public:
32 static char ID;
34 AMDGPULowerIntrinsics() : ModulePass(ID) {}
36 bool runOnModule(Module &M) override;
37 bool expandMemIntrinsicUses(Function &F);
38 StringRef getPassName() const override {
39 return "AMDGPU Lower Intrinsics";
42 void getAnalysisUsage(AnalysisUsage &AU) const override {
43 AU.addRequired<TargetTransformInfoWrapperPass>();
49 char AMDGPULowerIntrinsics::ID = 0;
51 char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID;
53 INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false,
54 false)
56 // TODO: Should refine based on estimated number of accesses (e.g. does it
57 // require splitting based on alignment)
58 static bool shouldExpandOperationWithSize(Value *Size) {
59 ConstantInt *CI = dyn_cast<ConstantInt>(Size);
60 return !CI || (CI->getZExtValue() > MaxStaticSize);
63 bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
64 Intrinsic::ID ID = F.getIntrinsicID();
65 bool Changed = false;
67 for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
68 Instruction *Inst = cast<Instruction>(*I);
69 ++I;
71 switch (ID) {
72 case Intrinsic::memcpy: {
73 auto *Memcpy = cast<MemCpyInst>(Inst);
74 if (shouldExpandOperationWithSize(Memcpy->getLength())) {
75 Function *ParentFunc = Memcpy->getParent()->getParent();
76 const TargetTransformInfo &TTI =
77 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*ParentFunc);
78 expandMemCpyAsLoop(Memcpy, TTI);
79 Changed = true;
80 Memcpy->eraseFromParent();
83 break;
85 case Intrinsic::memmove: {
86 auto *Memmove = cast<MemMoveInst>(Inst);
87 if (shouldExpandOperationWithSize(Memmove->getLength())) {
88 expandMemMoveAsLoop(Memmove);
89 Changed = true;
90 Memmove->eraseFromParent();
93 break;
95 case Intrinsic::memset: {
96 auto *Memset = cast<MemSetInst>(Inst);
97 if (shouldExpandOperationWithSize(Memset->getLength())) {
98 expandMemSetAsLoop(Memset);
99 Changed = true;
100 Memset->eraseFromParent();
103 break;
105 default:
106 break;
110 return Changed;
113 bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
114 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
115 if (!TPC)
116 return false;
118 const TargetMachine &TM = TPC->getTM<TargetMachine>();
119 bool Changed = false;
121 for (auto *U : F.users()) {
122 auto *CI = dyn_cast<CallInst>(U);
123 if (!CI)
124 continue;
126 Changed |= AMDGPUSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
128 return Changed;
131 bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
132 bool Changed = false;
134 for (Function &F : M) {
135 if (!F.isDeclaration())
136 continue;
138 switch (F.getIntrinsicID()) {
139 case Intrinsic::memcpy:
140 case Intrinsic::memmove:
141 case Intrinsic::memset:
142 if (expandMemIntrinsicUses(F))
143 Changed = true;
144 break;
146 case Intrinsic::amdgcn_workitem_id_x:
147 case Intrinsic::r600_read_tidig_x:
148 case Intrinsic::amdgcn_workitem_id_y:
149 case Intrinsic::r600_read_tidig_y:
150 case Intrinsic::amdgcn_workitem_id_z:
151 case Intrinsic::r600_read_tidig_z:
152 case Intrinsic::r600_read_local_size_x:
153 case Intrinsic::r600_read_local_size_y:
154 case Intrinsic::r600_read_local_size_z:
155 Changed |= makeLIDRangeMetadata(F);
156 break;
158 default:
159 break;
163 return Changed;
166 ModulePass *llvm::createAMDGPULowerIntrinsicsPass() {
167 return new AMDGPULowerIntrinsics();