1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file provides AMDGPU specific target streamer methods.
11 //===----------------------------------------------------------------------===//
13 #include "AMDGPUTargetStreamer.h"
14 #include "AMDGPUPTNote.h"
15 #include "AMDKernelCodeT.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
19 #include "llvm/BinaryFormat/ELF.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCELFStreamer.h"
22 #include "llvm/MC/MCSectionELF.h"
23 #include "llvm/Support/AMDGPUMetadata.h"
24 #include "llvm/Support/AMDHSAKernelDescriptor.h"
25 #include "llvm/Support/FormattedStream.h"
28 using namespace llvm::AMDGPU
;
30 //===----------------------------------------------------------------------===//
31 // AMDGPUTargetStreamer
32 //===----------------------------------------------------------------------===//
34 static void convertIsaVersionV2(uint32_t &Major
, uint32_t &Minor
,
35 uint32_t &Stepping
, bool Sramecc
, bool Xnack
) {
36 if (Major
== 9 && Minor
== 0) {
48 bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString
) {
49 HSAMD::Metadata HSAMetadata
;
50 if (HSAMD::fromString(HSAMetadataString
, HSAMetadata
))
52 return EmitHSAMetadata(HSAMetadata
);
55 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString
) {
56 msgpack::Document HSAMetadataDoc
;
57 if (!HSAMetadataDoc
.fromYAML(HSAMetadataString
))
59 return EmitHSAMetadata(HSAMetadataDoc
, false);
62 StringRef
AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach
) {
66 default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type");
67 case ELF::EF_AMDGPU_MACH_R600_R600
: AK
= GK_R600
; break;
68 case ELF::EF_AMDGPU_MACH_R600_R630
: AK
= GK_R630
; break;
69 case ELF::EF_AMDGPU_MACH_R600_RS880
: AK
= GK_RS880
; break;
70 case ELF::EF_AMDGPU_MACH_R600_RV670
: AK
= GK_RV670
; break;
71 case ELF::EF_AMDGPU_MACH_R600_RV710
: AK
= GK_RV710
; break;
72 case ELF::EF_AMDGPU_MACH_R600_RV730
: AK
= GK_RV730
; break;
73 case ELF::EF_AMDGPU_MACH_R600_RV770
: AK
= GK_RV770
; break;
74 case ELF::EF_AMDGPU_MACH_R600_CEDAR
: AK
= GK_CEDAR
; break;
75 case ELF::EF_AMDGPU_MACH_R600_CYPRESS
: AK
= GK_CYPRESS
; break;
76 case ELF::EF_AMDGPU_MACH_R600_JUNIPER
: AK
= GK_JUNIPER
; break;
77 case ELF::EF_AMDGPU_MACH_R600_REDWOOD
: AK
= GK_REDWOOD
; break;
78 case ELF::EF_AMDGPU_MACH_R600_SUMO
: AK
= GK_SUMO
; break;
79 case ELF::EF_AMDGPU_MACH_R600_BARTS
: AK
= GK_BARTS
; break;
80 case ELF::EF_AMDGPU_MACH_R600_CAICOS
: AK
= GK_CAICOS
; break;
81 case ELF::EF_AMDGPU_MACH_R600_CAYMAN
: AK
= GK_CAYMAN
; break;
82 case ELF::EF_AMDGPU_MACH_R600_TURKS
: AK
= GK_TURKS
; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600
: AK
= GK_GFX600
; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601
: AK
= GK_GFX601
; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602
: AK
= GK_GFX602
; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700
: AK
= GK_GFX700
; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701
: AK
= GK_GFX701
; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702
: AK
= GK_GFX702
; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703
: AK
= GK_GFX703
; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704
: AK
= GK_GFX704
; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705
: AK
= GK_GFX705
; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801
: AK
= GK_GFX801
; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802
: AK
= GK_GFX802
; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803
: AK
= GK_GFX803
; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805
: AK
= GK_GFX805
; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810
: AK
= GK_GFX810
; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900
: AK
= GK_GFX900
; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902
: AK
= GK_GFX902
; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904
: AK
= GK_GFX904
; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906
: AK
= GK_GFX906
; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908
: AK
= GK_GFX908
; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909
: AK
= GK_GFX909
; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A
: AK
= GK_GFX90A
; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C
: AK
= GK_GFX90C
; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010
: AK
= GK_GFX1010
; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011
: AK
= GK_GFX1011
; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012
: AK
= GK_GFX1012
; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013
: AK
= GK_GFX1013
; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030
: AK
= GK_GFX1030
; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031
: AK
= GK_GFX1031
; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032
: AK
= GK_GFX1032
; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033
: AK
= GK_GFX1033
; break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034
: AK
= GK_GFX1034
; break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035
: AK
= GK_GFX1035
; break;
115 case ELF::EF_AMDGPU_MACH_NONE
: AK
= GK_NONE
; break;
118 StringRef GPUName
= getArchNameAMDGCN(AK
);
121 return getArchNameR600(AK
);
124 unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU
) {
125 AMDGPU::GPUKind AK
= parseArchAMDGCN(GPU
);
126 if (AK
== AMDGPU::GPUKind::GK_NONE
)
127 AK
= parseArchR600(GPU
);
130 case GK_R600
: return ELF::EF_AMDGPU_MACH_R600_R600
;
131 case GK_R630
: return ELF::EF_AMDGPU_MACH_R600_R630
;
132 case GK_RS880
: return ELF::EF_AMDGPU_MACH_R600_RS880
;
133 case GK_RV670
: return ELF::EF_AMDGPU_MACH_R600_RV670
;
134 case GK_RV710
: return ELF::EF_AMDGPU_MACH_R600_RV710
;
135 case GK_RV730
: return ELF::EF_AMDGPU_MACH_R600_RV730
;
136 case GK_RV770
: return ELF::EF_AMDGPU_MACH_R600_RV770
;
137 case GK_CEDAR
: return ELF::EF_AMDGPU_MACH_R600_CEDAR
;
138 case GK_CYPRESS
: return ELF::EF_AMDGPU_MACH_R600_CYPRESS
;
139 case GK_JUNIPER
: return ELF::EF_AMDGPU_MACH_R600_JUNIPER
;
140 case GK_REDWOOD
: return ELF::EF_AMDGPU_MACH_R600_REDWOOD
;
141 case GK_SUMO
: return ELF::EF_AMDGPU_MACH_R600_SUMO
;
142 case GK_BARTS
: return ELF::EF_AMDGPU_MACH_R600_BARTS
;
143 case GK_CAICOS
: return ELF::EF_AMDGPU_MACH_R600_CAICOS
;
144 case GK_CAYMAN
: return ELF::EF_AMDGPU_MACH_R600_CAYMAN
;
145 case GK_TURKS
: return ELF::EF_AMDGPU_MACH_R600_TURKS
;
146 case GK_GFX600
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600
;
147 case GK_GFX601
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601
;
148 case GK_GFX602
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602
;
149 case GK_GFX700
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700
;
150 case GK_GFX701
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701
;
151 case GK_GFX702
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702
;
152 case GK_GFX703
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703
;
153 case GK_GFX704
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704
;
154 case GK_GFX705
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705
;
155 case GK_GFX801
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801
;
156 case GK_GFX802
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802
;
157 case GK_GFX803
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803
;
158 case GK_GFX805
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805
;
159 case GK_GFX810
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810
;
160 case GK_GFX900
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900
;
161 case GK_GFX902
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902
;
162 case GK_GFX904
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904
;
163 case GK_GFX906
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906
;
164 case GK_GFX908
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908
;
165 case GK_GFX909
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909
;
166 case GK_GFX90A
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A
;
167 case GK_GFX90C
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C
;
168 case GK_GFX1010
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010
;
169 case GK_GFX1011
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011
;
170 case GK_GFX1012
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012
;
171 case GK_GFX1013
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013
;
172 case GK_GFX1030
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030
;
173 case GK_GFX1031
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031
;
174 case GK_GFX1032
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032
;
175 case GK_GFX1033
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033
;
176 case GK_GFX1034
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034
;
177 case GK_GFX1035
: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035
;
178 case GK_NONE
: return ELF::EF_AMDGPU_MACH_NONE
;
181 llvm_unreachable("unknown GPU");
184 //===----------------------------------------------------------------------===//
185 // AMDGPUTargetAsmStreamer
186 //===----------------------------------------------------------------------===//
188 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer
&S
,
189 formatted_raw_ostream
&OS
)
190 : AMDGPUTargetStreamer(S
), OS(OS
) { }
192 // A hook for emitting stuff at the end.
193 // We use it for emitting the accumulated PAL metadata as directives.
194 // The PAL metadata is reset after it is emitted.
195 void AMDGPUTargetAsmStreamer::finish() {
197 getPALMetadata()->toString(S
);
200 // Reset the pal metadata so its data will not affect a compilation that
201 // reuses this object.
202 getPALMetadata()->reset();
205 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
206 OS
<< "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
209 void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
210 uint32_t Major
, uint32_t Minor
) {
211 OS
<< "\t.hsa_code_object_version " <<
212 Twine(Major
) << "," << Twine(Minor
) << '\n';
216 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major
,
219 StringRef VendorName
,
220 StringRef ArchName
) {
221 convertIsaVersionV2(Major
, Minor
, Stepping
, TargetID
->isSramEccOnOrAny(), TargetID
->isXnackOnOrAny());
222 OS
<< "\t.hsa_code_object_isa " << Twine(Major
) << "," << Twine(Minor
) << ","
223 << Twine(Stepping
) << ",\"" << VendorName
<< "\",\"" << ArchName
<< "\"\n";
227 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t
&Header
) {
228 OS
<< "\t.amd_kernel_code_t\n";
229 dumpAmdKernelCode(&Header
, OS
, "\t\t");
230 OS
<< "\t.end_amd_kernel_code_t\n";
233 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName
,
236 default: llvm_unreachable("Invalid AMDGPU symbol type");
237 case ELF::STT_AMDGPU_HSA_KERNEL
:
238 OS
<< "\t.amdgpu_hsa_kernel " << SymbolName
<< '\n' ;
243 void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol
*Symbol
, unsigned Size
,
245 OS
<< "\t.amdgpu_lds " << Symbol
->getName() << ", " << Size
<< ", "
246 << Alignment
.value() << '\n';
249 bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
250 OS
<< "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
254 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
255 const AMDGPU::HSAMD::Metadata
&HSAMetadata
) {
256 std::string HSAMetadataString
;
257 if (HSAMD::toString(HSAMetadata
, HSAMetadataString
))
260 OS
<< '\t' << HSAMD::AssemblerDirectiveBegin
<< '\n';
261 OS
<< HSAMetadataString
<< '\n';
262 OS
<< '\t' << HSAMD::AssemblerDirectiveEnd
<< '\n';
266 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
267 msgpack::Document
&HSAMetadataDoc
, bool Strict
) {
268 HSAMD::V3::MetadataVerifier
Verifier(Strict
);
269 if (!Verifier
.verify(HSAMetadataDoc
.getRoot()))
272 std::string HSAMetadataString
;
273 raw_string_ostream
StrOS(HSAMetadataString
);
274 HSAMetadataDoc
.toYAML(StrOS
);
276 OS
<< '\t' << HSAMD::V3::AssemblerDirectiveBegin
<< '\n';
277 OS
<< StrOS
.str() << '\n';
278 OS
<< '\t' << HSAMD::V3::AssemblerDirectiveEnd
<< '\n';
282 bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo
&STI
) {
283 const uint32_t Encoded_s_code_end
= 0xbf9f0000;
284 const uint32_t Encoded_s_nop
= 0xbf800000;
285 uint32_t Encoded_pad
= Encoded_s_code_end
;
287 // Instruction cache line size in bytes.
288 const unsigned Log2CacheLineSize
= 6;
289 const unsigned CacheLineSize
= 1u << Log2CacheLineSize
;
291 // Extra padding amount in bytes to support prefetch mode 3.
292 unsigned FillSize
= 3 * CacheLineSize
;
294 if (AMDGPU::isGFX90A(STI
)) {
295 Encoded_pad
= Encoded_s_nop
;
296 FillSize
= 16 * CacheLineSize
;
299 OS
<< "\t.p2alignl " << Log2CacheLineSize
<< ", " << Encoded_pad
<< '\n';
300 OS
<< "\t.fill " << (FillSize
/ 4) << ", 4, " << Encoded_pad
<< '\n';
304 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
305 const MCSubtargetInfo
&STI
, StringRef KernelName
,
306 const amdhsa::kernel_descriptor_t
&KD
, uint64_t NextVGPR
, uint64_t NextSGPR
,
307 bool ReserveVCC
, bool ReserveFlatScr
) {
308 IsaVersion IVersion
= getIsaVersion(STI
.getCPU());
310 OS
<< "\t.amdhsa_kernel " << KernelName
<< '\n';
312 #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
313 STREAM << "\t\t" << DIRECTIVE << " " \
314 << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
316 OS
<< "\t\t.amdhsa_group_segment_fixed_size " << KD
.group_segment_fixed_size
318 OS
<< "\t\t.amdhsa_private_segment_fixed_size "
319 << KD
.private_segment_fixed_size
<< '\n';
320 OS
<< "\t\t.amdhsa_kernarg_size " << KD
.kernarg_size
<< '\n';
322 if (!hasArchitectedFlatScratch(STI
))
324 OS
, ".amdhsa_user_sgpr_private_segment_buffer", KD
,
325 kernel_code_properties
,
326 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
);
327 PRINT_FIELD(OS
, ".amdhsa_user_sgpr_dispatch_ptr", KD
,
328 kernel_code_properties
,
329 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
);
330 PRINT_FIELD(OS
, ".amdhsa_user_sgpr_queue_ptr", KD
,
331 kernel_code_properties
,
332 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
);
333 PRINT_FIELD(OS
, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD
,
334 kernel_code_properties
,
335 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
);
336 PRINT_FIELD(OS
, ".amdhsa_user_sgpr_dispatch_id", KD
,
337 kernel_code_properties
,
338 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
);
339 if (!hasArchitectedFlatScratch(STI
))
340 PRINT_FIELD(OS
, ".amdhsa_user_sgpr_flat_scratch_init", KD
,
341 kernel_code_properties
,
342 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
);
343 PRINT_FIELD(OS
, ".amdhsa_user_sgpr_private_segment_size", KD
,
344 kernel_code_properties
,
345 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
);
346 if (IVersion
.Major
>= 10)
347 PRINT_FIELD(OS
, ".amdhsa_wavefront_size32", KD
,
348 kernel_code_properties
,
349 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
);
351 (hasArchitectedFlatScratch(STI
)
352 ? ".amdhsa_enable_private_segment"
353 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
354 KD
, compute_pgm_rsrc2
,
355 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT
);
356 PRINT_FIELD(OS
, ".amdhsa_system_sgpr_workgroup_id_x", KD
,
358 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X
);
359 PRINT_FIELD(OS
, ".amdhsa_system_sgpr_workgroup_id_y", KD
,
361 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y
);
362 PRINT_FIELD(OS
, ".amdhsa_system_sgpr_workgroup_id_z", KD
,
364 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z
);
365 PRINT_FIELD(OS
, ".amdhsa_system_sgpr_workgroup_info", KD
,
367 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO
);
368 PRINT_FIELD(OS
, ".amdhsa_system_vgpr_workitem_id", KD
,
370 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID
);
372 // These directives are required.
373 OS
<< "\t\t.amdhsa_next_free_vgpr " << NextVGPR
<< '\n';
374 OS
<< "\t\t.amdhsa_next_free_sgpr " << NextSGPR
<< '\n';
376 if (AMDGPU::isGFX90A(STI
))
377 OS
<< "\t\t.amdhsa_accum_offset " <<
378 (AMDHSA_BITS_GET(KD
.compute_pgm_rsrc3
,
379 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET
) + 1) * 4
383 OS
<< "\t\t.amdhsa_reserve_vcc " << ReserveVCC
<< '\n';
384 if (IVersion
.Major
>= 7 && !ReserveFlatScr
&& !hasArchitectedFlatScratch(STI
))
385 OS
<< "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr
<< '\n';
387 if (Optional
<uint8_t> HsaAbiVer
= getHsaAbiVersion(&STI
)) {
388 switch (*HsaAbiVer
) {
391 case ELF::ELFABIVERSION_AMDGPU_HSA_V2
:
393 case ELF::ELFABIVERSION_AMDGPU_HSA_V3
:
394 case ELF::ELFABIVERSION_AMDGPU_HSA_V4
:
395 if (getTargetID()->isXnackSupported())
396 OS
<< "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
401 PRINT_FIELD(OS
, ".amdhsa_float_round_mode_32", KD
,
403 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32
);
404 PRINT_FIELD(OS
, ".amdhsa_float_round_mode_16_64", KD
,
406 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64
);
407 PRINT_FIELD(OS
, ".amdhsa_float_denorm_mode_32", KD
,
409 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32
);
410 PRINT_FIELD(OS
, ".amdhsa_float_denorm_mode_16_64", KD
,
412 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64
);
413 PRINT_FIELD(OS
, ".amdhsa_dx10_clamp", KD
,
415 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP
);
416 PRINT_FIELD(OS
, ".amdhsa_ieee_mode", KD
,
418 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE
);
419 if (IVersion
.Major
>= 9)
420 PRINT_FIELD(OS
, ".amdhsa_fp16_overflow", KD
,
422 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL
);
423 if (AMDGPU::isGFX90A(STI
))
424 PRINT_FIELD(OS
, ".amdhsa_tg_split", KD
,
426 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT
);
427 if (IVersion
.Major
>= 10) {
428 PRINT_FIELD(OS
, ".amdhsa_workgroup_processor_mode", KD
,
430 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE
);
431 PRINT_FIELD(OS
, ".amdhsa_memory_ordered", KD
,
433 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED
);
434 PRINT_FIELD(OS
, ".amdhsa_forward_progress", KD
,
436 amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS
);
439 OS
, ".amdhsa_exception_fp_ieee_invalid_op", KD
,
441 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION
);
442 PRINT_FIELD(OS
, ".amdhsa_exception_fp_denorm_src", KD
,
444 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE
);
446 OS
, ".amdhsa_exception_fp_ieee_div_zero", KD
,
448 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO
);
449 PRINT_FIELD(OS
, ".amdhsa_exception_fp_ieee_overflow", KD
,
451 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW
);
452 PRINT_FIELD(OS
, ".amdhsa_exception_fp_ieee_underflow", KD
,
454 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW
);
455 PRINT_FIELD(OS
, ".amdhsa_exception_fp_ieee_inexact", KD
,
457 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT
);
458 PRINT_FIELD(OS
, ".amdhsa_exception_int_div_zero", KD
,
460 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO
);
463 OS
<< "\t.end_amdhsa_kernel\n";
466 //===----------------------------------------------------------------------===//
467 // AMDGPUTargetELFStreamer
468 //===----------------------------------------------------------------------===//
470 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer
&S
,
471 const MCSubtargetInfo
&STI
)
472 : AMDGPUTargetStreamer(S
), STI(STI
), Streamer(S
) {}
474 MCELFStreamer
&AMDGPUTargetELFStreamer::getStreamer() {
475 return static_cast<MCELFStreamer
&>(Streamer
);
478 // A hook for emitting stuff at the end.
479 // We use it for emitting the accumulated PAL metadata as a .note record.
480 // The PAL metadata is reset after it is emitted.
481 void AMDGPUTargetELFStreamer::finish() {
482 MCAssembler
&MCA
= getStreamer().getAssembler();
483 MCA
.setELFHeaderEFlags(getEFlags());
486 const char *Vendor
= getPALMetadata()->getVendor();
487 unsigned Type
= getPALMetadata()->getType();
488 getPALMetadata()->toBlob(Type
, Blob
);
491 EmitNote(Vendor
, MCConstantExpr::create(Blob
.size(), getContext()), Type
,
492 [&](MCELFStreamer
&OS
) { OS
.emitBytes(Blob
); });
494 // Reset the pal metadata so its data will not affect a compilation that
495 // reuses this object.
496 getPALMetadata()->reset();
499 void AMDGPUTargetELFStreamer::EmitNote(
500 StringRef Name
, const MCExpr
*DescSZ
, unsigned NoteType
,
501 function_ref
<void(MCELFStreamer
&)> EmitDesc
) {
502 auto &S
= getStreamer();
503 auto &Context
= S
.getContext();
505 auto NameSZ
= Name
.size() + 1;
507 unsigned NoteFlags
= 0;
508 // TODO Apparently, this is currently needed for OpenCL as mentioned in
509 // https://reviews.llvm.org/D74995
510 if (STI
.getTargetTriple().getOS() == Triple::AMDHSA
)
511 NoteFlags
= ELF::SHF_ALLOC
;
515 Context
.getELFSection(ElfNote::SectionName
, ELF::SHT_NOTE
, NoteFlags
));
516 S
.emitInt32(NameSZ
); // namesz
517 S
.emitValue(DescSZ
, 4); // descz
518 S
.emitInt32(NoteType
); // type
519 S
.emitBytes(Name
); // name
520 S
.emitValueToAlignment(4, 0, 1, 0); // padding 0
522 S
.emitValueToAlignment(4, 0, 1, 0); // padding 0
526 unsigned AMDGPUTargetELFStreamer::getEFlags() {
527 switch (STI
.getTargetTriple().getArch()) {
529 llvm_unreachable("Unsupported Arch");
531 return getEFlagsR600();
533 return getEFlagsAMDGCN();
537 unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
538 assert(STI
.getTargetTriple().getArch() == Triple::r600
);
540 return getElfMach(STI
.getCPU());
543 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
544 assert(STI
.getTargetTriple().getArch() == Triple::amdgcn
);
546 switch (STI
.getTargetTriple().getOS()) {
548 // TODO: Why are some tests have "mingw" listed as OS?
549 // llvm_unreachable("Unsupported OS");
550 case Triple::UnknownOS
:
551 return getEFlagsUnknownOS();
553 return getEFlagsAMDHSA();
555 return getEFlagsAMDPAL();
557 return getEFlagsMesa3D();
561 unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
562 // TODO: Why are some tests have "mingw" listed as OS?
563 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
565 return getEFlagsV3();
568 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
569 assert(STI
.getTargetTriple().getOS() == Triple::AMDHSA
);
571 if (Optional
<uint8_t> HsaAbiVer
= getHsaAbiVersion(&STI
)) {
572 switch (*HsaAbiVer
) {
573 case ELF::ELFABIVERSION_AMDGPU_HSA_V2
:
574 case ELF::ELFABIVERSION_AMDGPU_HSA_V3
:
575 return getEFlagsV3();
576 case ELF::ELFABIVERSION_AMDGPU_HSA_V4
:
577 return getEFlagsV4();
581 llvm_unreachable("HSA OS ABI Version identification must be defined");
584 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
585 assert(STI
.getTargetTriple().getOS() == Triple::AMDPAL
);
587 return getEFlagsV3();
590 unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
591 assert(STI
.getTargetTriple().getOS() == Triple::Mesa3D
);
593 return getEFlagsV3();
596 unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
597 unsigned EFlagsV3
= 0;
600 EFlagsV3
|= getElfMach(STI
.getCPU());
603 if (getTargetID()->isXnackOnOrAny())
604 EFlagsV3
|= ELF::EF_AMDGPU_FEATURE_XNACK_V3
;
606 if (getTargetID()->isSramEccOnOrAny())
607 EFlagsV3
|= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3
;
612 unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
613 unsigned EFlagsV4
= 0;
616 EFlagsV4
|= getElfMach(STI
.getCPU());
619 switch (getTargetID()->getXnackSetting()) {
620 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported
:
621 EFlagsV4
|= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
;
623 case AMDGPU::IsaInfo::TargetIDSetting::Any
:
624 EFlagsV4
|= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4
;
626 case AMDGPU::IsaInfo::TargetIDSetting::Off
:
627 EFlagsV4
|= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4
;
629 case AMDGPU::IsaInfo::TargetIDSetting::On
:
630 EFlagsV4
|= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4
;
634 switch (getTargetID()->getSramEccSetting()) {
635 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported
:
636 EFlagsV4
|= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
;
638 case AMDGPU::IsaInfo::TargetIDSetting::Any
:
639 EFlagsV4
|= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
;
641 case AMDGPU::IsaInfo::TargetIDSetting::Off
:
642 EFlagsV4
|= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
;
644 case AMDGPU::IsaInfo::TargetIDSetting::On
:
645 EFlagsV4
|= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4
;
652 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
654 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
655 uint32_t Major
, uint32_t Minor
) {
657 EmitNote(ElfNote::NoteNameV2
, MCConstantExpr::create(8, getContext()),
658 ELF::NT_AMD_HSA_CODE_OBJECT_VERSION
, [&](MCELFStreamer
&OS
) {
665 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major
,
668 StringRef VendorName
,
669 StringRef ArchName
) {
670 uint16_t VendorNameSize
= VendorName
.size() + 1;
671 uint16_t ArchNameSize
= ArchName
.size() + 1;
673 unsigned DescSZ
= sizeof(VendorNameSize
) + sizeof(ArchNameSize
) +
674 sizeof(Major
) + sizeof(Minor
) + sizeof(Stepping
) +
675 VendorNameSize
+ ArchNameSize
;
677 convertIsaVersionV2(Major
, Minor
, Stepping
, TargetID
->isSramEccOnOrAny(), TargetID
->isXnackOnOrAny());
678 EmitNote(ElfNote::NoteNameV2
, MCConstantExpr::create(DescSZ
, getContext()),
679 ELF::NT_AMD_HSA_ISA_VERSION
, [&](MCELFStreamer
&OS
) {
680 OS
.emitInt16(VendorNameSize
);
681 OS
.emitInt16(ArchNameSize
);
684 OS
.emitInt32(Stepping
);
685 OS
.emitBytes(VendorName
);
686 OS
.emitInt8(0); // NULL terminate VendorName
687 OS
.emitBytes(ArchName
);
688 OS
.emitInt8(0); // NULL terminte ArchName
693 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t
&Header
) {
695 MCStreamer
&OS
= getStreamer();
697 OS
.emitBytes(StringRef((const char*)&Header
, sizeof(Header
)));
701 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName
,
703 MCSymbolELF
*Symbol
= cast
<MCSymbolELF
>(
704 getStreamer().getContext().getOrCreateSymbol(SymbolName
));
705 Symbol
->setType(Type
);
708 void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol
*Symbol
, unsigned Size
,
710 MCSymbolELF
*SymbolELF
= cast
<MCSymbolELF
>(Symbol
);
711 SymbolELF
->setType(ELF::STT_OBJECT
);
713 if (!SymbolELF
->isBindingSet()) {
714 SymbolELF
->setBinding(ELF::STB_GLOBAL
);
715 SymbolELF
->setExternal(true);
718 if (SymbolELF
->declareCommon(Size
, Alignment
.value(), true)) {
719 report_fatal_error("Symbol: " + Symbol
->getName() +
720 " redeclared as different type");
723 SymbolELF
->setIndex(ELF::SHN_AMDGPU_LDS
);
724 SymbolELF
->setSize(MCConstantExpr::create(Size
, getContext()));
727 bool AMDGPUTargetELFStreamer::EmitISAVersion() {
728 // Create two labels to mark the beginning and end of the desc field
729 // and a MCExpr to calculate the size of the desc field.
730 auto &Context
= getContext();
731 auto *DescBegin
= Context
.createTempSymbol();
732 auto *DescEnd
= Context
.createTempSymbol();
733 auto *DescSZ
= MCBinaryExpr::createSub(
734 MCSymbolRefExpr::create(DescEnd
, Context
),
735 MCSymbolRefExpr::create(DescBegin
, Context
), Context
);
737 EmitNote(ElfNote::NoteNameV2
, DescSZ
, ELF::NT_AMD_HSA_ISA_NAME
,
738 [&](MCELFStreamer
&OS
) {
739 OS
.emitLabel(DescBegin
);
740 OS
.emitBytes(getTargetID()->toString());
741 OS
.emitLabel(DescEnd
);
746 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document
&HSAMetadataDoc
,
748 HSAMD::V3::MetadataVerifier
Verifier(Strict
);
749 if (!Verifier
.verify(HSAMetadataDoc
.getRoot()))
752 std::string HSAMetadataString
;
753 HSAMetadataDoc
.writeToBlob(HSAMetadataString
);
755 // Create two labels to mark the beginning and end of the desc field
756 // and a MCExpr to calculate the size of the desc field.
757 auto &Context
= getContext();
758 auto *DescBegin
= Context
.createTempSymbol();
759 auto *DescEnd
= Context
.createTempSymbol();
760 auto *DescSZ
= MCBinaryExpr::createSub(
761 MCSymbolRefExpr::create(DescEnd
, Context
),
762 MCSymbolRefExpr::create(DescBegin
, Context
), Context
);
764 EmitNote(ElfNote::NoteNameV3
, DescSZ
, ELF::NT_AMDGPU_METADATA
,
765 [&](MCELFStreamer
&OS
) {
766 OS
.emitLabel(DescBegin
);
767 OS
.emitBytes(HSAMetadataString
);
768 OS
.emitLabel(DescEnd
);
773 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
774 const AMDGPU::HSAMD::Metadata
&HSAMetadata
) {
775 std::string HSAMetadataString
;
776 if (HSAMD::toString(HSAMetadata
, HSAMetadataString
))
779 // Create two labels to mark the beginning and end of the desc field
780 // and a MCExpr to calculate the size of the desc field.
781 auto &Context
= getContext();
782 auto *DescBegin
= Context
.createTempSymbol();
783 auto *DescEnd
= Context
.createTempSymbol();
784 auto *DescSZ
= MCBinaryExpr::createSub(
785 MCSymbolRefExpr::create(DescEnd
, Context
),
786 MCSymbolRefExpr::create(DescBegin
, Context
), Context
);
788 EmitNote(ElfNote::NoteNameV2
, DescSZ
, ELF::NT_AMD_HSA_METADATA
,
789 [&](MCELFStreamer
&OS
) {
790 OS
.emitLabel(DescBegin
);
791 OS
.emitBytes(HSAMetadataString
);
792 OS
.emitLabel(DescEnd
);
797 bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo
&STI
) {
798 const uint32_t Encoded_s_code_end
= 0xbf9f0000;
799 const uint32_t Encoded_s_nop
= 0xbf800000;
800 uint32_t Encoded_pad
= Encoded_s_code_end
;
802 // Instruction cache line size in bytes.
803 const unsigned Log2CacheLineSize
= 6;
804 const unsigned CacheLineSize
= 1u << Log2CacheLineSize
;
806 // Extra padding amount in bytes to support prefetch mode 3.
807 unsigned FillSize
= 3 * CacheLineSize
;
809 if (AMDGPU::isGFX90A(STI
)) {
810 Encoded_pad
= Encoded_s_nop
;
811 FillSize
= 16 * CacheLineSize
;
814 MCStreamer
&OS
= getStreamer();
816 OS
.emitValueToAlignment(CacheLineSize
, Encoded_pad
, 4);
817 for (unsigned I
= 0; I
< FillSize
; I
+= 4)
818 OS
.emitInt32(Encoded_pad
);
823 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
824 const MCSubtargetInfo
&STI
, StringRef KernelName
,
825 const amdhsa::kernel_descriptor_t
&KernelDescriptor
, uint64_t NextVGPR
,
826 uint64_t NextSGPR
, bool ReserveVCC
, bool ReserveFlatScr
) {
827 auto &Streamer
= getStreamer();
828 auto &Context
= Streamer
.getContext();
830 MCSymbolELF
*KernelCodeSymbol
= cast
<MCSymbolELF
>(
831 Context
.getOrCreateSymbol(Twine(KernelName
)));
832 MCSymbolELF
*KernelDescriptorSymbol
= cast
<MCSymbolELF
>(
833 Context
.getOrCreateSymbol(Twine(KernelName
) + Twine(".kd")));
835 // Copy kernel descriptor symbol's binding, other and visibility from the
836 // kernel code symbol.
837 KernelDescriptorSymbol
->setBinding(KernelCodeSymbol
->getBinding());
838 KernelDescriptorSymbol
->setOther(KernelCodeSymbol
->getOther());
839 KernelDescriptorSymbol
->setVisibility(KernelCodeSymbol
->getVisibility());
840 // Kernel descriptor symbol's type and size are fixed.
841 KernelDescriptorSymbol
->setType(ELF::STT_OBJECT
);
842 KernelDescriptorSymbol
->setSize(
843 MCConstantExpr::create(sizeof(KernelDescriptor
), Context
));
845 // The visibility of the kernel code symbol must be protected or less to allow
846 // static relocations from the kernel descriptor to be used.
847 if (KernelCodeSymbol
->getVisibility() == ELF::STV_DEFAULT
)
848 KernelCodeSymbol
->setVisibility(ELF::STV_PROTECTED
);
850 Streamer
.emitLabel(KernelDescriptorSymbol
);
851 Streamer
.emitInt32(KernelDescriptor
.group_segment_fixed_size
);
852 Streamer
.emitInt32(KernelDescriptor
.private_segment_fixed_size
);
853 Streamer
.emitInt32(KernelDescriptor
.kernarg_size
);
855 for (uint8_t Res
: KernelDescriptor
.reserved0
)
856 Streamer
.emitInt8(Res
);
858 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
859 // expression being created is:
860 // (start of kernel code) - (start of kernel descriptor)
861 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
862 Streamer
.emitValue(MCBinaryExpr::createSub(
863 MCSymbolRefExpr::create(
864 KernelCodeSymbol
, MCSymbolRefExpr::VK_AMDGPU_REL64
, Context
),
865 MCSymbolRefExpr::create(
866 KernelDescriptorSymbol
, MCSymbolRefExpr::VK_None
, Context
),
868 sizeof(KernelDescriptor
.kernel_code_entry_byte_offset
));
869 for (uint8_t Res
: KernelDescriptor
.reserved1
)
870 Streamer
.emitInt8(Res
);
871 Streamer
.emitInt32(KernelDescriptor
.compute_pgm_rsrc3
);
872 Streamer
.emitInt32(KernelDescriptor
.compute_pgm_rsrc1
);
873 Streamer
.emitInt32(KernelDescriptor
.compute_pgm_rsrc2
);
874 Streamer
.emitInt16(KernelDescriptor
.kernel_code_properties
);
875 for (uint8_t Res
: KernelDescriptor
.reserved2
)
876 Streamer
.emitInt8(Res
);