1 //===----RTLs/amdgpu/utils/UtilitiesRTL.h ------------------------- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // RTL Utilities for AMDGPU plugins
11 //===----------------------------------------------------------------------===//
15 #include "Shared/Debug.h"
16 #include "Utils/ELF.h"
18 #include "omptarget.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Error.h"
24 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
25 #include "llvm/BinaryFormat/ELF.h"
26 #include "llvm/BinaryFormat/MsgPackDocument.h"
27 #include "llvm/Support/MemoryBufferRef.h"
28 #include "llvm/Support/YAMLTraits.h"
30 using namespace llvm::ELF
;
38 // The implicit arguments of COV5 AMDGPU kernels.
39 struct AMDGPUImplicitArgsTy
{
46 uint8_t Unused0
[46]; // 46 byte offset.
48 uint8_t Unused1
[54]; // 54 byte offset.
49 uint32_t DynamicLdsSize
;
50 uint8_t Unused2
[132]; // 132 byte offset.
53 // Dummy struct for COV4 implicitargs.
54 struct AMDGPUImplicitArgsTyCOV4
{
58 inline uint32_t getImplicitArgsSize(uint16_t Version
) {
59 return Version
< ELF::ELFABIVERSION_AMDGPU_HSA_V5
60 ? sizeof(AMDGPUImplicitArgsTyCOV4
)
61 : sizeof(AMDGPUImplicitArgsTy
);
64 /// Check if an image is compatible with current system's environment. The
65 /// system environment is given as a 'target-id' which has the form:
67 /// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
69 /// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
70 /// and is compatible with either '+' or '-'. The HSA runtime returns this
71 /// information using the target-id, while we use the ELF header to determine
73 inline bool isImageCompatibleWithEnv(StringRef ImageArch
, uint32_t ImageFlags
,
74 StringRef EnvTargetID
) {
75 StringRef EnvArch
= EnvTargetID
.split(":").first
;
77 // Trivial check if the base processors match.
78 if (EnvArch
!= ImageArch
)
81 // Check if the image is requesting xnack on or off.
82 switch (ImageFlags
& EF_AMDGPU_FEATURE_XNACK_V4
) {
83 case EF_AMDGPU_FEATURE_XNACK_OFF_V4
:
84 // The image is 'xnack-' so the environment must be 'xnack-'.
85 if (!EnvTargetID
.contains("xnack-"))
88 case EF_AMDGPU_FEATURE_XNACK_ON_V4
:
89 // The image is 'xnack+' so the environment must be 'xnack+'.
90 if (!EnvTargetID
.contains("xnack+"))
93 case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
:
94 case EF_AMDGPU_FEATURE_XNACK_ANY_V4
:
99 // Check if the image is requesting sramecc on or off.
100 switch (ImageFlags
& EF_AMDGPU_FEATURE_SRAMECC_V4
) {
101 case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
:
102 // The image is 'sramecc-' so the environment must be 'sramecc-'.
103 if (!EnvTargetID
.contains("sramecc-"))
106 case EF_AMDGPU_FEATURE_SRAMECC_ON_V4
:
107 // The image is 'sramecc+' so the environment must be 'sramecc+'.
108 if (!EnvTargetID
.contains("sramecc+"))
111 case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
:
112 case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
:
119 struct KernelMetaDataTy
{
120 uint64_t KernelObject
;
121 uint32_t GroupSegmentList
;
122 uint32_t PrivateSegmentSize
;
125 uint32_t SGPRSpillCount
;
126 uint32_t VGPRSpillCount
;
127 uint32_t KernelSegmentSize
;
128 uint32_t ExplicitArgumentCount
;
129 uint32_t ImplicitArgumentCount
;
130 uint32_t RequestedWorkgroupSize
[3];
131 uint32_t WorkgroupSizeHint
[3];
132 uint32_t WavefronSize
;
133 uint32_t MaxFlatWorkgroupSize
;
137 /// Reads the AMDGPU specific per-kernel-metadata from an image.
138 class KernelInfoReader
{
140 KernelInfoReader(StringMap
<KernelMetaDataTy
> &KIM
) : KernelInfoMap(KIM
) {}
142 /// Process ELF note to read AMDGPU metadata from respective information
144 Error
processNote(const object::ELF64LE::Note
&Note
, size_t Align
) {
145 if (Note
.getName() != "AMDGPU")
146 return Error::success(); // We are not interested in other things
148 assert(Note
.getType() == ELF::NT_AMDGPU_METADATA
&&
149 "Parse AMDGPU MetaData");
150 auto Desc
= Note
.getDesc(Align
);
151 StringRef MsgPackString
=
152 StringRef(reinterpret_cast<const char *>(Desc
.data()), Desc
.size());
153 msgpack::Document MsgPackDoc
;
154 if (!MsgPackDoc
.readFromBlob(MsgPackString
, /*Multi=*/false))
155 return Error::success();
157 AMDGPU::HSAMD::V3::MetadataVerifier
Verifier(true);
158 if (!Verifier
.verify(MsgPackDoc
.getRoot()))
159 return Error::success();
161 auto RootMap
= MsgPackDoc
.getRoot().getMap(true);
163 if (auto Err
= iterateAMDKernels(RootMap
))
166 return Error::success();
170 /// Extracts the relevant information via simple string look-up in the msgpack
171 /// document elements.
172 Error
extractKernelData(msgpack::MapDocNode::MapTy::value_type V
,
173 std::string
&KernelName
,
174 KernelMetaDataTy
&KernelData
) {
175 if (!V
.first
.isString())
176 return Error::success();
178 const auto IsKey
= [](const msgpack::DocNode
&DK
, StringRef SK
) {
179 return DK
.getString() == SK
;
182 const auto GetSequenceOfThreeInts
= [](msgpack::DocNode
&DN
,
184 assert(DN
.isArray() && "MsgPack DocNode is an array node");
185 auto DNA
= DN
.getArray();
186 assert(DNA
.size() == 3 && "ArrayNode has at most three elements");
189 for (auto DNABegin
= DNA
.begin(), DNAEnd
= DNA
.end(); DNABegin
!= DNAEnd
;
191 Vals
[I
++] = DNABegin
->getUInt();
195 if (IsKey(V
.first
, ".name")) {
196 KernelName
= V
.second
.toString();
197 } else if (IsKey(V
.first
, ".sgpr_count")) {
198 KernelData
.SGPRCount
= V
.second
.getUInt();
199 } else if (IsKey(V
.first
, ".sgpr_spill_count")) {
200 KernelData
.SGPRSpillCount
= V
.second
.getUInt();
201 } else if (IsKey(V
.first
, ".vgpr_count")) {
202 KernelData
.VGPRCount
= V
.second
.getUInt();
203 } else if (IsKey(V
.first
, ".vgpr_spill_count")) {
204 KernelData
.VGPRSpillCount
= V
.second
.getUInt();
205 } else if (IsKey(V
.first
, ".private_segment_fixed_size")) {
206 KernelData
.PrivateSegmentSize
= V
.second
.getUInt();
207 } else if (IsKey(V
.first
, ".group_segment_fixed_size")) {
208 KernelData
.GroupSegmentList
= V
.second
.getUInt();
209 } else if (IsKey(V
.first
, ".reqd_workgroup_size")) {
210 GetSequenceOfThreeInts(V
.second
, KernelData
.RequestedWorkgroupSize
);
211 } else if (IsKey(V
.first
, ".workgroup_size_hint")) {
212 GetSequenceOfThreeInts(V
.second
, KernelData
.WorkgroupSizeHint
);
213 } else if (IsKey(V
.first
, ".wavefront_size")) {
214 KernelData
.WavefronSize
= V
.second
.getUInt();
215 } else if (IsKey(V
.first
, ".max_flat_workgroup_size")) {
216 KernelData
.MaxFlatWorkgroupSize
= V
.second
.getUInt();
219 return Error::success();
222 /// Get the "amdhsa.kernels" element from the msgpack Document
223 Expected
<msgpack::ArrayDocNode
> getAMDKernelsArray(msgpack::MapDocNode
&MDN
) {
224 auto Res
= MDN
.find("amdhsa.kernels");
225 if (Res
== MDN
.end())
226 return createStringError(inconvertibleErrorCode(),
227 "Could not find amdhsa.kernels key");
230 assert(Pair
.second
.isArray() &&
231 "AMDGPU kernel entries are arrays of entries");
233 return Pair
.second
.getArray();
236 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
237 /// MapDocNode that either maps a string to a single value (most of them) or
238 /// to another array of things. Currently, we only handle the case that maps
240 Error
generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It
) {
241 KernelMetaDataTy KernelData
;
242 std::string KernelName
;
243 auto Entry
= (*It
).getMap();
244 for (auto MI
= Entry
.begin(), E
= Entry
.end(); MI
!= E
; ++MI
)
245 if (auto Err
= extractKernelData(*MI
, KernelName
, KernelData
))
248 KernelInfoMap
.insert({KernelName
, KernelData
});
249 return Error::success();
252 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
253 Error
iterateAMDKernels(msgpack::MapDocNode
&MDN
) {
254 auto KernelsOrErr
= getAMDKernelsArray(MDN
);
255 if (auto Err
= KernelsOrErr
.takeError())
258 auto KernelsArr
= *KernelsOrErr
;
259 for (auto It
= KernelsArr
.begin(), E
= KernelsArr
.end(); It
!= E
; ++It
) {
261 continue; // we expect <key,value> pairs
263 // Obtain the value for the different entries. Each array entry is a
265 if (auto Err
= generateKernelInfo(It
))
268 return Error::success();
271 // Kernel names are the keys
272 StringMap
<KernelMetaDataTy
> &KernelInfoMap
;
276 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
279 readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer
,
280 StringMap
<KernelMetaDataTy
> &KernelInfoMap
,
281 uint16_t &ELFABIVersion
) {
282 Error Err
= Error::success(); // Used later as out-parameter
284 auto ELFOrError
= object::ELF64LEFile::create(MemBuffer
.getBuffer());
285 if (auto Err
= ELFOrError
.takeError())
288 const object::ELF64LEFile ELFObj
= ELFOrError
.get();
289 ArrayRef
<object::ELF64LE::Shdr
> Sections
= cantFail(ELFObj
.sections());
290 KernelInfoReader
Reader(KernelInfoMap
);
292 // Read the code object version from ELF image header
293 auto Header
= ELFObj
.getHeader();
294 ELFABIVersion
= (uint8_t)(Header
.e_ident
[ELF::EI_ABIVERSION
]);
295 DP("ELFABIVERSION Version: %u\n", ELFABIVersion
);
297 for (const auto &S
: Sections
) {
298 if (S
.sh_type
!= ELF::SHT_NOTE
)
301 for (const auto N
: ELFObj
.notes(S
, Err
)) {
304 // Fills the KernelInfoTabel entries in the reader
305 if ((Err
= Reader
.processNote(N
, S
.sh_addralign
)))
310 return Error::success();
314 } // namespace plugin
315 } // namespace target