Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / libomptarget / plugins-nextgen / amdgpu / utils / UtilitiesRTL.h
blobb39545ab7d02ba2900dc4213d0f344aa73227542
1 //===----RTLs/amdgpu/utils/UtilitiesRTL.h ------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // RTL Utilities for AMDGPU plugins
11 //===----------------------------------------------------------------------===//
13 #include <cstdint>
15 #include "Debug.h"
16 #include "omptarget.h"
18 #include "llvm/ADT/StringMap.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Error.h"
22 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/BinaryFormat/MsgPackDocument.h"
25 #include "llvm/Support/MemoryBufferRef.h"
27 #include "llvm/Support/YAMLTraits.h"
28 using namespace llvm::ELF;
30 namespace llvm {
31 namespace omp {
32 namespace target {
33 namespace plugin {
34 namespace utils {
36 // The implicit arguments of COV5 AMDGPU kernels.
37 struct AMDGPUImplicitArgsTy {
38 uint32_t BlockCountX;
39 uint32_t BlockCountY;
40 uint32_t BlockCountZ;
41 uint16_t GroupSizeX;
42 uint16_t GroupSizeY;
43 uint16_t GroupSizeZ;
44 uint8_t Unused0[46]; // 46 byte offset.
45 uint16_t GridDims;
46 uint8_t Unused1[190]; // 190 byte offset.
49 // Dummy struct for COV4 implicitargs.
50 struct AMDGPUImplicitArgsTyCOV4 {
51 uint8_t Unused[56];
54 uint32_t getImplicitArgsSize(uint16_t Version) {
55 return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
56 ? sizeof(AMDGPUImplicitArgsTyCOV4)
57 : sizeof(AMDGPUImplicitArgsTy);
60 /// Parse a TargetID to get processor arch and feature map.
61 /// Returns processor subarch.
62 /// Returns TargetID features in \p FeatureMap argument.
63 /// If the \p TargetID contains feature+, FeatureMap it to true.
64 /// If the \p TargetID contains feature-, FeatureMap it to false.
65 /// If the \p TargetID does not contain a feature (default), do not map it.
66 StringRef parseTargetID(StringRef TargetID, StringMap<bool> &FeatureMap) {
67 if (TargetID.empty())
68 return llvm::StringRef();
70 auto ArchFeature = TargetID.split(":");
71 auto Arch = ArchFeature.first;
72 auto Features = ArchFeature.second;
73 if (Features.empty())
74 return Arch;
76 if (Features.contains("sramecc+")) {
77 FeatureMap.insert(std::pair<StringRef, bool>("sramecc", true));
78 } else if (Features.contains("sramecc-")) {
79 FeatureMap.insert(std::pair<StringRef, bool>("sramecc", false));
81 if (Features.contains("xnack+")) {
82 FeatureMap.insert(std::pair<StringRef, bool>("xnack", true));
83 } else if (Features.contains("xnack-")) {
84 FeatureMap.insert(std::pair<StringRef, bool>("xnack", false));
87 return Arch;
90 /// Check if an image is compatible with current system's environment.
91 bool isImageCompatibleWithEnv(const __tgt_image_info *Info,
92 StringRef EnvTargetID) {
93 llvm::StringRef ImageTargetID(Info->Arch);
95 // Compatible in case of exact match.
96 if (ImageTargetID == EnvTargetID) {
97 DP("Compatible: Exact match \t[Image: %s]\t:\t[Env: %s]\n",
98 ImageTargetID.data(), EnvTargetID.data());
99 return true;
102 // Incompatible if Archs mismatch.
103 StringMap<bool> ImgMap, EnvMap;
104 StringRef ImgArch = utils::parseTargetID(ImageTargetID, ImgMap);
105 StringRef EnvArch = utils::parseTargetID(EnvTargetID, EnvMap);
107 // Both EnvArch and ImgArch can't be empty here.
108 if (EnvArch.empty() || ImgArch.empty() || !ImgArch.contains(EnvArch)) {
109 DP("Incompatible: Processor mismatch \t[Image: %s]\t:\t[Env: %s]\n",
110 ImageTargetID.data(), EnvTargetID.data());
111 return false;
114 // Incompatible if image has more features than the environment,
115 // irrespective of type or sign of features.
116 if (ImgMap.size() > EnvMap.size()) {
117 DP("Incompatible: Image has more features than the Environment \t[Image: "
118 "%s]\t:\t[Env: %s]\n",
119 ImageTargetID.data(), EnvTargetID.data());
120 return false;
123 // Compatible if each target feature specified by the environment is
124 // compatible with target feature of the image. The target feature is
125 // compatible if the iamge does not specify it (meaning Any), or if it
126 // specifies it with the same value (meaning On or Off).
127 for (const auto &ImgFeature : ImgMap) {
128 auto EnvFeature = EnvMap.find(ImgFeature.first());
129 if (EnvFeature == EnvMap.end() ||
130 (EnvFeature->first() == ImgFeature.first() &&
131 EnvFeature->second != ImgFeature.second)) {
132 DP("Incompatible: Value of Image's non-ANY feature is not matching with "
133 "the Environment's non-ANY feature \t[Image: %s]\t:\t[Env: %s]\n",
134 ImageTargetID.data(), EnvTargetID.data());
135 return false;
139 // Image is compatible if all features of Environment are:
140 // - either, present in the Image's features map with the same sign,
141 // - or, the feature is missing from Image's features map i.e. it is
142 // set to ANY
143 DP("Compatible: Target IDs are compatible \t[Image: %s]\t:\t[Env: %s]\n",
144 ImageTargetID.data(), EnvTargetID.data());
146 return true;
149 struct KernelMetaDataTy {
150 uint64_t KernelObject;
151 uint32_t GroupSegmentList;
152 uint32_t PrivateSegmentSize;
153 uint32_t SGPRCount;
154 uint32_t VGPRCount;
155 uint32_t SGPRSpillCount;
156 uint32_t VGPRSpillCount;
157 uint32_t KernelSegmentSize;
158 uint32_t ExplicitArgumentCount;
159 uint32_t ImplicitArgumentCount;
160 uint32_t RequestedWorkgroupSize[3];
161 uint32_t WorkgroupSizeHint[3];
162 uint32_t WavefronSize;
163 uint32_t MaxFlatWorkgroupSize;
165 namespace {
167 /// Reads the AMDGPU specific per-kernel-metadata from an image.
168 class KernelInfoReader {
169 public:
170 KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
172 /// Process ELF note to read AMDGPU metadata from respective information
173 /// fields.
174 Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
175 if (Note.getName() != "AMDGPU")
176 return Error::success(); // We are not interested in other things
178 assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
179 "Parse AMDGPU MetaData");
180 auto Desc = Note.getDesc(Align);
181 StringRef MsgPackString =
182 StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
183 msgpack::Document MsgPackDoc;
184 if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
185 return Error::success();
187 AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
188 if (!Verifier.verify(MsgPackDoc.getRoot()))
189 return Error::success();
191 auto RootMap = MsgPackDoc.getRoot().getMap(true);
193 if (auto Err = iterateAMDKernels(RootMap))
194 return Err;
196 return Error::success();
199 private:
200 /// Extracts the relevant information via simple string look-up in the msgpack
201 /// document elements.
202 Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
203 std::string &KernelName,
204 KernelMetaDataTy &KernelData) {
205 if (!V.first.isString())
206 return Error::success();
208 const auto isKey = [](const msgpack::DocNode &DK, StringRef SK) {
209 return DK.getString() == SK;
212 const auto getSequenceOfThreeInts = [](msgpack::DocNode &DN,
213 uint32_t *Vals) {
214 assert(DN.isArray() && "MsgPack DocNode is an array node");
215 auto DNA = DN.getArray();
216 assert(DNA.size() == 3 && "ArrayNode has at most three elements");
218 int i = 0;
219 for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
220 ++DNABegin) {
221 Vals[i++] = DNABegin->getUInt();
225 if (isKey(V.first, ".name")) {
226 KernelName = V.second.toString();
227 } else if (isKey(V.first, ".sgpr_count")) {
228 KernelData.SGPRCount = V.second.getUInt();
229 } else if (isKey(V.first, ".sgpr_spill_count")) {
230 KernelData.SGPRSpillCount = V.second.getUInt();
231 } else if (isKey(V.first, ".vgpr_count")) {
232 KernelData.VGPRCount = V.second.getUInt();
233 } else if (isKey(V.first, ".vgpr_spill_count")) {
234 KernelData.VGPRSpillCount = V.second.getUInt();
235 } else if (isKey(V.first, ".private_segment_fixed_size")) {
236 KernelData.PrivateSegmentSize = V.second.getUInt();
237 } else if (isKey(V.first, ".group_segement_fixed_size")) {
238 KernelData.GroupSegmentList = V.second.getUInt();
239 } else if (isKey(V.first, ".reqd_workgroup_size")) {
240 getSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
241 } else if (isKey(V.first, ".workgroup_size_hint")) {
242 getSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
243 } else if (isKey(V.first, ".wavefront_size")) {
244 KernelData.WavefronSize = V.second.getUInt();
245 } else if (isKey(V.first, ".max_flat_workgroup_size")) {
246 KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
249 return Error::success();
252 /// Get the "amdhsa.kernels" element from the msgpack Document
253 Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
254 auto Res = MDN.find("amdhsa.kernels");
255 if (Res == MDN.end())
256 return createStringError(inconvertibleErrorCode(),
257 "Could not find amdhsa.kernels key");
259 auto Pair = *Res;
260 assert(Pair.second.isArray() &&
261 "AMDGPU kernel entries are arrays of entries");
263 return Pair.second.getArray();
266 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
267 /// MapDocNode that either maps a string to a single value (most of them) or
268 /// to another array of things. Currently, we only handle the case that maps
269 /// to scalar value.
270 Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
271 KernelMetaDataTy KernelData;
272 std::string KernelName;
273 auto Entry = (*It).getMap();
274 for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
275 if (auto Err = extractKernelData(*MI, KernelName, KernelData))
276 return Err;
278 KernelInfoMap.insert({KernelName, KernelData});
279 return Error::success();
282 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
283 Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
284 auto KernelsOrErr = getAMDKernelsArray(MDN);
285 if (auto Err = KernelsOrErr.takeError())
286 return Err;
288 auto KernelsArr = *KernelsOrErr;
289 for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
290 if (!It->isMap())
291 continue; // we expect <key,value> pairs
293 // Obtain the value for the different entries. Each array entry is a
294 // MapDocNode
295 if (auto Err = generateKernelInfo(It))
296 return Err;
298 return Error::success();
301 // Kernel names are the keys
302 StringMap<KernelMetaDataTy> &KernelInfoMap;
304 } // namespace
306 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
307 /// KernelInfoMap
308 Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
309 StringMap<KernelMetaDataTy> &KernelInfoMap,
310 uint16_t &ELFABIVersion) {
311 Error Err = Error::success(); // Used later as out-parameter
313 auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
314 if (auto Err = ELFOrError.takeError())
315 return Err;
317 const object::ELF64LEFile ELFObj = ELFOrError.get();
318 ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
319 KernelInfoReader Reader(KernelInfoMap);
321 // Read the code object version from ELF image header
322 auto Header = ELFObj.getHeader();
323 ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
324 DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
326 for (const auto &S : Sections) {
327 if (S.sh_type != ELF::SHT_NOTE)
328 continue;
330 for (const auto N : ELFObj.notes(S, Err)) {
331 if (Err)
332 return Err;
333 // Fills the KernelInfoTabel entries in the reader
334 if ((Err = Reader.processNote(N, S.sh_addralign)))
335 return Err;
339 return Error::success();
342 } // namespace utils
343 } // namespace plugin
344 } // namespace target
345 } // namespace omp
346 } // namespace llvm