[SCFToGPU] Convert scf.parallel+scf.reduce to gpu.all_reduce (#122782)
[llvm-project.git] / clang / lib / CodeGen / Targets / SPIR.cpp
blob5c75e985e953de537c4c85f22bcfc0404be8df7f
1 //===- SPIR.cpp -----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
12 using namespace clang;
13 using namespace clang::CodeGen;
15 //===----------------------------------------------------------------------===//
16 // Base ABI and target codegen info implementation common between SPIR and
17 // SPIR-V.
18 //===----------------------------------------------------------------------===//
20 namespace {
21 class CommonSPIRABIInfo : public DefaultABIInfo {
22 public:
23 CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
25 private:
26 void setCCs();
29 class SPIRVABIInfo : public CommonSPIRABIInfo {
30 public:
31 SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {}
32 void computeInfo(CGFunctionInfo &FI) const override;
34 private:
35 ABIArgInfo classifyReturnType(QualType RetTy) const;
36 ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
37 ABIArgInfo classifyArgumentType(QualType Ty) const;
39 } // end anonymous namespace
40 namespace {
41 class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
42 public:
43 CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
44 : TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {}
45 CommonSPIRTargetCodeGenInfo(std::unique_ptr<ABIInfo> ABIInfo)
46 : TargetCodeGenInfo(std::move(ABIInfo)) {}
48 LangAS getASTAllocaAddressSpace() const override {
49 return getLangASFromTargetAS(
50 getABIInfo().getDataLayout().getAllocaAddrSpace());
53 unsigned getOpenCLKernelCallingConv() const override;
54 llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override;
55 llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *Ty) const override;
56 llvm::Type *getSPIRVImageTypeFromHLSLResource(
57 const HLSLAttributedResourceType::Attributes &attributes,
58 llvm::Type *ElementType, llvm::LLVMContext &Ctx) const;
60 class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
61 public:
62 SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
63 : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
64 void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
65 LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
66 const VarDecl *D) const override;
67 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
68 CodeGen::CodeGenModule &M) const override;
69 llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
70 SyncScope Scope,
71 llvm::AtomicOrdering Ordering,
72 llvm::LLVMContext &Ctx) const override;
75 inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
76 switch (Scope) {
77 case SyncScope::HIPSingleThread:
78 case SyncScope::SingleScope:
79 return "singlethread";
80 case SyncScope::HIPWavefront:
81 case SyncScope::OpenCLSubGroup:
82 case SyncScope::WavefrontScope:
83 return "subgroup";
84 case SyncScope::HIPWorkgroup:
85 case SyncScope::OpenCLWorkGroup:
86 case SyncScope::WorkgroupScope:
87 return "workgroup";
88 case SyncScope::HIPAgent:
89 case SyncScope::OpenCLDevice:
90 case SyncScope::DeviceScope:
91 return "device";
92 case SyncScope::SystemScope:
93 case SyncScope::HIPSystem:
94 case SyncScope::OpenCLAllSVMDevices:
95 return "";
97 return "";
99 } // End anonymous namespace.
101 void CommonSPIRABIInfo::setCCs() {
102 assert(getRuntimeCC() == llvm::CallingConv::C);
103 RuntimeCC = llvm::CallingConv::SPIR_FUNC;
106 ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
107 if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
108 return DefaultABIInfo::classifyReturnType(RetTy);
109 if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
110 return DefaultABIInfo::classifyReturnType(RetTy);
112 if (const RecordType *RT = RetTy->getAs<RecordType>()) {
113 const RecordDecl *RD = RT->getDecl();
114 if (RD->hasFlexibleArrayMember())
115 return DefaultABIInfo::classifyReturnType(RetTy);
118 // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
119 // avoid encoding various architecture specific bits here we return everything
120 // as direct to retain type info for things like aggregates, for later perusal
121 // when translating back to LLVM/lowering in the BE. This is also why we
122 // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
123 // This will be revisited / optimised in the future.
124 return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
127 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
128 if (getContext().getLangOpts().CUDAIsDevice) {
129 // Coerce pointer arguments with default address space to CrossWorkGroup
130 // pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the
131 // SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space.
132 llvm::Type *LTy = CGT.ConvertType(Ty);
133 auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
134 auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
135 auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy);
136 if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
137 LTy = llvm::PointerType::get(PtrTy->getContext(), GlobalAS);
138 return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
141 if (isAggregateTypeForABI(Ty)) {
142 if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
143 // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
144 // currently expressible in SPIR-V; SPIR-V passes aggregates byval,
145 // which the AMDGPU kernel ABI does not allow. Passing aggregates as
146 // direct works around this impedance mismatch, as it retains type info
147 // and can be correctly handled, post reverse-translation, by the AMDGPU
148 // BE, which has to support this CC for legacy OpenCL purposes. It can
149 // be brittle and does lead to performance degradation in certain
150 // pathological cases. This will be revisited / optimised in the future,
151 // once a way to deal with the byref/byval impedance mismatch is
152 // identified.
153 return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
154 // Force copying aggregate type in kernel arguments by value when
155 // compiling CUDA targeting SPIR-V. This is required for the object
156 // copied to be valid on the device.
157 // This behavior follows the CUDA spec
158 // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
159 // and matches the NVPTX implementation.
160 return getNaturalAlignIndirect(Ty, /* byval */ true);
163 return classifyArgumentType(Ty);
166 ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
167 if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
168 return DefaultABIInfo::classifyArgumentType(Ty);
169 if (!isAggregateTypeForABI(Ty))
170 return DefaultABIInfo::classifyArgumentType(Ty);
172 // Records with non-trivial destructors/copy-constructors should not be
173 // passed by value.
174 if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
175 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
177 if (const RecordType *RT = Ty->getAs<RecordType>()) {
178 const RecordDecl *RD = RT->getDecl();
179 if (RD->hasFlexibleArrayMember())
180 return DefaultABIInfo::classifyArgumentType(Ty);
183 return ABIArgInfo::getDirect(CGT.ConvertType(Ty), 0u, nullptr, false);
186 void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
187 // The logic is same as in DefaultABIInfo with an exception on the kernel
188 // arguments handling.
189 llvm::CallingConv::ID CC = FI.getCallingConvention();
191 if (!getCXXABI().classifyReturnType(FI))
192 FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
194 for (auto &I : FI.arguments()) {
195 if (CC == llvm::CallingConv::SPIR_KERNEL) {
196 I.info = classifyKernelArgumentType(I.type);
197 } else {
198 I.info = classifyArgumentType(I.type);
203 namespace clang {
204 namespace CodeGen {
205 void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
206 if (CGM.getTarget().getTriple().isSPIRV())
207 SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
208 else
209 CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI);
214 unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
215 return llvm::CallingConv::SPIR_KERNEL;
218 void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
219 const FunctionType *&FT) const {
220 // Convert HIP kernels to SPIR-V kernels.
221 if (getABIInfo().getContext().getLangOpts().HIP) {
222 FT = getABIInfo().getContext().adjustFunctionType(
223 FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
224 return;
228 LangAS
229 SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
230 const VarDecl *D) const {
231 assert(!CGM.getLangOpts().OpenCL &&
232 !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
233 "Address space agnostic languages only");
234 // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
235 // the global AS we can rely on either cuda_device or sycl_global to be
236 // correct; however, since this is not a CUDA Device context, we use
237 // sycl_global to prevent confusion with the assertion.
238 LangAS DefaultGlobalAS = getLangASFromTargetAS(
239 CGM.getContext().getTargetAddressSpace(LangAS::sycl_global));
240 if (!D)
241 return DefaultGlobalAS;
243 LangAS AddrSpace = D->getType().getAddressSpace();
244 if (AddrSpace != LangAS::Default)
245 return AddrSpace;
247 return DefaultGlobalAS;
250 void SPIRVTargetCodeGenInfo::setTargetAttributes(
251 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
252 if (!M.getLangOpts().HIP ||
253 M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
254 return;
255 if (GV->isDeclaration())
256 return;
258 auto F = dyn_cast<llvm::Function>(GV);
259 if (!F)
260 return;
262 auto FD = dyn_cast_or_null<FunctionDecl>(D);
263 if (!FD)
264 return;
265 if (!FD->hasAttr<CUDAGlobalAttr>())
266 return;
268 unsigned N = M.getLangOpts().GPUMaxThreadsPerBlock;
269 if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>())
270 N = FlatWGS->getMax()->EvaluateKnownConstInt(M.getContext()).getExtValue();
272 // We encode the maximum flat WG size in the first component of the 3D
273 // max_work_group_size attribute, which will get reverse translated into the
274 // original AMDGPU attribute when targeting AMDGPU.
275 auto Int32Ty = llvm::IntegerType::getInt32Ty(M.getLLVMContext());
276 llvm::Metadata *AttrMDArgs[] = {
277 llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, N)),
278 llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1)),
279 llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1))};
281 F->setMetadata("max_work_group_size",
282 llvm::MDNode::get(M.getLLVMContext(), AttrMDArgs));
285 llvm::SyncScope::ID
286 SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
287 llvm::AtomicOrdering,
288 llvm::LLVMContext &Ctx) const {
289 return Ctx.getOrInsertSyncScopeID(mapClangSyncScopeToLLVM(Scope));
292 /// Construct a SPIR-V target extension type for the given OpenCL image type.
293 static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType,
294 StringRef OpenCLName,
295 unsigned AccessQualifier) {
296 // These parameters compare to the operands of OpTypeImage (see
297 // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage
298 // for more details). The first 6 integer parameters all default to 0, and
299 // will be changed to 1 only for the image type(s) that set the parameter to
300 // one. The 7th integer parameter is the access qualifier, which is tacked on
301 // at the end.
302 SmallVector<unsigned, 7> IntParams = {0, 0, 0, 0, 0, 0};
304 // Choose the dimension of the image--this corresponds to the Dim enum in
305 // SPIR-V (first integer parameter of OpTypeImage).
306 if (OpenCLName.starts_with("image2d"))
307 IntParams[0] = 1; // 1D
308 else if (OpenCLName.starts_with("image3d"))
309 IntParams[0] = 2; // 2D
310 else if (OpenCLName == "image1d_buffer")
311 IntParams[0] = 5; // Buffer
312 else
313 assert(OpenCLName.starts_with("image1d") && "Unknown image type");
315 // Set the other integer parameters of OpTypeImage if necessary. Note that the
316 // OpenCL image types don't provide any information for the Sampled or
317 // Image Format parameters.
318 if (OpenCLName.contains("_depth"))
319 IntParams[1] = 1;
320 if (OpenCLName.contains("_array"))
321 IntParams[2] = 1;
322 if (OpenCLName.contains("_msaa"))
323 IntParams[3] = 1;
325 // Access qualifier
326 IntParams.push_back(AccessQualifier);
328 return llvm::TargetExtType::get(Ctx, BaseType, {llvm::Type::getVoidTy(Ctx)},
329 IntParams);
332 llvm::Type *CommonSPIRTargetCodeGenInfo::getOpenCLType(CodeGenModule &CGM,
333 const Type *Ty) const {
334 llvm::LLVMContext &Ctx = CGM.getLLVMContext();
335 if (auto *PipeTy = dyn_cast<PipeType>(Ty))
336 return llvm::TargetExtType::get(Ctx, "spirv.Pipe", {},
337 {!PipeTy->isReadOnly()});
338 if (auto *BuiltinTy = dyn_cast<BuiltinType>(Ty)) {
339 enum AccessQualifier : unsigned { AQ_ro = 0, AQ_wo = 1, AQ_rw = 2 };
340 switch (BuiltinTy->getKind()) {
341 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
342 case BuiltinType::Id: \
343 return getSPIRVImageType(Ctx, "spirv.Image", #ImgType, AQ_##Suffix);
344 #include "clang/Basic/OpenCLImageTypes.def"
345 case BuiltinType::OCLSampler:
346 return llvm::TargetExtType::get(Ctx, "spirv.Sampler");
347 case BuiltinType::OCLEvent:
348 return llvm::TargetExtType::get(Ctx, "spirv.Event");
349 case BuiltinType::OCLClkEvent:
350 return llvm::TargetExtType::get(Ctx, "spirv.DeviceEvent");
351 case BuiltinType::OCLQueue:
352 return llvm::TargetExtType::get(Ctx, "spirv.Queue");
353 case BuiltinType::OCLReserveID:
354 return llvm::TargetExtType::get(Ctx, "spirv.ReserveId");
355 #define INTEL_SUBGROUP_AVC_TYPE(Name, Id) \
356 case BuiltinType::OCLIntelSubgroupAVC##Id: \
357 return llvm::TargetExtType::get(Ctx, "spirv.Avc" #Id "INTEL");
358 #include "clang/Basic/OpenCLExtensionTypes.def"
359 default:
360 return nullptr;
364 return nullptr;
367 llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(CodeGenModule &CGM,
368 const Type *Ty) const {
369 auto *ResType = dyn_cast<HLSLAttributedResourceType>(Ty);
370 if (!ResType)
371 return nullptr;
373 llvm::LLVMContext &Ctx = CGM.getLLVMContext();
374 const HLSLAttributedResourceType::Attributes &ResAttrs = ResType->getAttrs();
375 switch (ResAttrs.ResourceClass) {
376 case llvm::dxil::ResourceClass::UAV:
377 case llvm::dxil::ResourceClass::SRV: {
378 // TypedBuffer and RawBuffer both need element type
379 QualType ContainedTy = ResType->getContainedType();
380 if (ContainedTy.isNull())
381 return nullptr;
383 assert(!ResAttrs.RawBuffer &&
384 "Raw buffers handles are not implemented for SPIR-V yet");
385 assert(!ResAttrs.IsROV &&
386 "Rasterizer order views not implemented for SPIR-V yet");
388 // convert element type
389 llvm::Type *ElemType = CGM.getTypes().ConvertType(ContainedTy);
390 return getSPIRVImageTypeFromHLSLResource(ResAttrs, ElemType, Ctx);
392 case llvm::dxil::ResourceClass::CBuffer:
393 llvm_unreachable("CBuffer handles are not implemented for SPIR-V yet");
394 break;
395 case llvm::dxil::ResourceClass::Sampler:
396 return llvm::TargetExtType::get(Ctx, "spirv.Sampler");
398 return nullptr;
401 llvm::Type *CommonSPIRTargetCodeGenInfo::getSPIRVImageTypeFromHLSLResource(
402 const HLSLAttributedResourceType::Attributes &attributes,
403 llvm::Type *ElementType, llvm::LLVMContext &Ctx) const {
405 if (ElementType->isVectorTy())
406 ElementType = ElementType->getScalarType();
408 assert((ElementType->isIntegerTy() || ElementType->isFloatingPointTy()) &&
409 "The element type for a SPIR-V resource must be a scalar integer or "
410 "floating point type.");
412 // These parameters correspond to the operands to the OpTypeImage SPIR-V
413 // instruction. See
414 // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage.
415 SmallVector<unsigned, 6> IntParams(6, 0);
417 // Dim
418 // For now we assume everything is a buffer.
419 IntParams[0] = 5;
421 // Depth
422 // HLSL does not indicate if it is a depth texture or not, so we use unknown.
423 IntParams[1] = 2;
425 // Arrayed
426 IntParams[2] = 0;
428 // MS
429 IntParams[3] = 0;
431 // Sampled
432 IntParams[4] =
433 attributes.ResourceClass == llvm::dxil::ResourceClass::UAV ? 2 : 1;
435 // Image format.
436 // Setting to unknown for now.
437 IntParams[5] = 0;
439 return llvm::TargetExtType::get(Ctx, "spirv.Image", {ElementType}, IntParams);
442 std::unique_ptr<TargetCodeGenInfo>
443 CodeGen::createCommonSPIRTargetCodeGenInfo(CodeGenModule &CGM) {
444 return std::make_unique<CommonSPIRTargetCodeGenInfo>(CGM.getTypes());
447 std::unique_ptr<TargetCodeGenInfo>
448 CodeGen::createSPIRVTargetCodeGenInfo(CodeGenModule &CGM) {
449 return std::make_unique<SPIRVTargetCodeGenInfo>(CGM.getTypes());