[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / openmp / libomptarget / plugins-nextgen / amdgpu / utils / UtilitiesRTL.h
blob58a3b5df00fac6672c18f91396d80ecce817501e
1 //===----RTLs/amdgpu/utils/UtilitiesRTL.h ------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // RTL Utilities for AMDGPU plugins
11 //===----------------------------------------------------------------------===//
13 #include <cstdint>
15 #include "Shared/Debug.h"
16 #include "Utils/ELF.h"
18 #include "omptarget.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Error.h"
24 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
25 #include "llvm/BinaryFormat/ELF.h"
26 #include "llvm/BinaryFormat/MsgPackDocument.h"
27 #include "llvm/Support/MemoryBufferRef.h"
28 #include "llvm/Support/YAMLTraits.h"
30 using namespace llvm::ELF;
32 namespace llvm {
33 namespace omp {
34 namespace target {
35 namespace plugin {
36 namespace utils {
38 // The implicit arguments of COV5 AMDGPU kernels.
39 struct AMDGPUImplicitArgsTy {
40 uint32_t BlockCountX;
41 uint32_t BlockCountY;
42 uint32_t BlockCountZ;
43 uint16_t GroupSizeX;
44 uint16_t GroupSizeY;
45 uint16_t GroupSizeZ;
46 uint8_t Unused0[46]; // 46 byte offset.
47 uint16_t GridDims;
48 uint8_t Unused1[54]; // 54 byte offset.
49 uint32_t DynamicLdsSize;
50 uint8_t Unused2[132]; // 132 byte offset.
53 // Dummy struct for COV4 implicitargs.
54 struct AMDGPUImplicitArgsTyCOV4 {
55 uint8_t Unused[56];
58 inline uint32_t getImplicitArgsSize(uint16_t Version) {
59 return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
60 ? sizeof(AMDGPUImplicitArgsTyCOV4)
61 : sizeof(AMDGPUImplicitArgsTy);
64 /// Check if an image is compatible with current system's environment. The
65 /// system environment is given as a 'target-id' which has the form:
66 ///
67 /// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
68 ///
69 /// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
70 /// and is compatible with either '+' or '-'. The HSA runtime returns this
71 /// information using the target-id, while we use the ELF header to determine
72 /// these features.
73 inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
74 StringRef EnvTargetID) {
75 StringRef EnvArch = EnvTargetID.split(":").first;
77 // Trivial check if the base processors match.
78 if (EnvArch != ImageArch)
79 return false;
81 // Check if the image is requesting xnack on or off.
82 switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
83 case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
84 // The image is 'xnack-' so the environment must be 'xnack-'.
85 if (!EnvTargetID.contains("xnack-"))
86 return false;
87 break;
88 case EF_AMDGPU_FEATURE_XNACK_ON_V4:
89 // The image is 'xnack+' so the environment must be 'xnack+'.
90 if (!EnvTargetID.contains("xnack+"))
91 return false;
92 break;
93 case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
94 case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
95 default:
96 break;
99 // Check if the image is requesting sramecc on or off.
100 switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
101 case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
102 // The image is 'sramecc-' so the environment must be 'sramecc-'.
103 if (!EnvTargetID.contains("sramecc-"))
104 return false;
105 break;
106 case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
107 // The image is 'sramecc+' so the environment must be 'sramecc+'.
108 if (!EnvTargetID.contains("sramecc+"))
109 return false;
110 break;
111 case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
112 case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
113 break;
116 return true;
119 struct KernelMetaDataTy {
120 uint64_t KernelObject;
121 uint32_t GroupSegmentList;
122 uint32_t PrivateSegmentSize;
123 uint32_t SGPRCount;
124 uint32_t VGPRCount;
125 uint32_t SGPRSpillCount;
126 uint32_t VGPRSpillCount;
127 uint32_t KernelSegmentSize;
128 uint32_t ExplicitArgumentCount;
129 uint32_t ImplicitArgumentCount;
130 uint32_t RequestedWorkgroupSize[3];
131 uint32_t WorkgroupSizeHint[3];
132 uint32_t WavefronSize;
133 uint32_t MaxFlatWorkgroupSize;
135 namespace {
137 /// Reads the AMDGPU specific per-kernel-metadata from an image.
138 class KernelInfoReader {
139 public:
140 KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
142 /// Process ELF note to read AMDGPU metadata from respective information
143 /// fields.
144 Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
145 if (Note.getName() != "AMDGPU")
146 return Error::success(); // We are not interested in other things
148 assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
149 "Parse AMDGPU MetaData");
150 auto Desc = Note.getDesc(Align);
151 StringRef MsgPackString =
152 StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
153 msgpack::Document MsgPackDoc;
154 if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
155 return Error::success();
157 AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
158 if (!Verifier.verify(MsgPackDoc.getRoot()))
159 return Error::success();
161 auto RootMap = MsgPackDoc.getRoot().getMap(true);
163 if (auto Err = iterateAMDKernels(RootMap))
164 return Err;
166 return Error::success();
169 private:
170 /// Extracts the relevant information via simple string look-up in the msgpack
171 /// document elements.
172 Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
173 std::string &KernelName,
174 KernelMetaDataTy &KernelData) {
175 if (!V.first.isString())
176 return Error::success();
178 const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
179 return DK.getString() == SK;
182 const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
183 uint32_t *Vals) {
184 assert(DN.isArray() && "MsgPack DocNode is an array node");
185 auto DNA = DN.getArray();
186 assert(DNA.size() == 3 && "ArrayNode has at most three elements");
188 int I = 0;
189 for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
190 ++DNABegin) {
191 Vals[I++] = DNABegin->getUInt();
195 if (IsKey(V.first, ".name")) {
196 KernelName = V.second.toString();
197 } else if (IsKey(V.first, ".sgpr_count")) {
198 KernelData.SGPRCount = V.second.getUInt();
199 } else if (IsKey(V.first, ".sgpr_spill_count")) {
200 KernelData.SGPRSpillCount = V.second.getUInt();
201 } else if (IsKey(V.first, ".vgpr_count")) {
202 KernelData.VGPRCount = V.second.getUInt();
203 } else if (IsKey(V.first, ".vgpr_spill_count")) {
204 KernelData.VGPRSpillCount = V.second.getUInt();
205 } else if (IsKey(V.first, ".private_segment_fixed_size")) {
206 KernelData.PrivateSegmentSize = V.second.getUInt();
207 } else if (IsKey(V.first, ".group_segment_fixed_size")) {
208 KernelData.GroupSegmentList = V.second.getUInt();
209 } else if (IsKey(V.first, ".reqd_workgroup_size")) {
210 GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
211 } else if (IsKey(V.first, ".workgroup_size_hint")) {
212 GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
213 } else if (IsKey(V.first, ".wavefront_size")) {
214 KernelData.WavefronSize = V.second.getUInt();
215 } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
216 KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
219 return Error::success();
222 /// Get the "amdhsa.kernels" element from the msgpack Document
223 Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
224 auto Res = MDN.find("amdhsa.kernels");
225 if (Res == MDN.end())
226 return createStringError(inconvertibleErrorCode(),
227 "Could not find amdhsa.kernels key");
229 auto Pair = *Res;
230 assert(Pair.second.isArray() &&
231 "AMDGPU kernel entries are arrays of entries");
233 return Pair.second.getArray();
236 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
237 /// MapDocNode that either maps a string to a single value (most of them) or
238 /// to another array of things. Currently, we only handle the case that maps
239 /// to scalar value.
240 Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
241 KernelMetaDataTy KernelData;
242 std::string KernelName;
243 auto Entry = (*It).getMap();
244 for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
245 if (auto Err = extractKernelData(*MI, KernelName, KernelData))
246 return Err;
248 KernelInfoMap.insert({KernelName, KernelData});
249 return Error::success();
252 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
253 Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
254 auto KernelsOrErr = getAMDKernelsArray(MDN);
255 if (auto Err = KernelsOrErr.takeError())
256 return Err;
258 auto KernelsArr = *KernelsOrErr;
259 for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
260 if (!It->isMap())
261 continue; // we expect <key,value> pairs
263 // Obtain the value for the different entries. Each array entry is a
264 // MapDocNode
265 if (auto Err = generateKernelInfo(It))
266 return Err;
268 return Error::success();
271 // Kernel names are the keys
272 StringMap<KernelMetaDataTy> &KernelInfoMap;
274 } // namespace
276 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
277 /// KernelInfoMap
278 inline Error
279 readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
280 StringMap<KernelMetaDataTy> &KernelInfoMap,
281 uint16_t &ELFABIVersion) {
282 Error Err = Error::success(); // Used later as out-parameter
284 auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
285 if (auto Err = ELFOrError.takeError())
286 return Err;
288 const object::ELF64LEFile ELFObj = ELFOrError.get();
289 ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
290 KernelInfoReader Reader(KernelInfoMap);
292 // Read the code object version from ELF image header
293 auto Header = ELFObj.getHeader();
294 ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
295 DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
297 for (const auto &S : Sections) {
298 if (S.sh_type != ELF::SHT_NOTE)
299 continue;
301 for (const auto N : ELFObj.notes(S, Err)) {
302 if (Err)
303 return Err;
304 // Fills the KernelInfoTabel entries in the reader
305 if ((Err = Reader.processNote(N, S.sh_addralign)))
306 return Err;
310 return Error::success();
313 } // namespace utils
314 } // namespace plugin
315 } // namespace target
316 } // namespace omp
317 } // namespace llvm