1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
6 ; CHECK: amdhsa.kernels:
9 ; CHECK-NEXT: - .address_space: global
10 ; CHECK-NEXT: .name: r
11 ; CHECK-NEXT: .offset: 0
12 ; CHECK-NEXT: .size: 8
13 ; CHECK-NEXT: .value_kind: global_buffer
14 ; CHECK-NEXT: .value_type: f16
15 ; CHECK-NEXT: - .address_space: global
16 ; CHECK-NEXT: .name: a
17 ; CHECK-NEXT: .offset: 8
18 ; CHECK-NEXT: .size: 8
19 ; CHECK-NEXT: .value_kind: global_buffer
20 ; CHECK-NEXT: .value_type: f16
21 ; CHECK-NEXT: - .address_space: global
22 ; CHECK-NEXT: .name: b
23 ; CHECK-NEXT: .offset: 16
24 ; CHECK-NEXT: .size: 8
25 ; CHECK-NEXT: .value_kind: global_buffer
26 ; CHECK-NEXT: .value_type: f16
28 ; CHECK: .symbol: test0.kd
29 define amdgpu_kernel void @test0(
30 half addrspace(1)* %r,
31 half addrspace(1)* %a,
32 half addrspace(1)* %b) {
34 %a.val = load half, half addrspace(1)* %a
35 %b.val = load half, half addrspace(1)* %b
36 %r.val = fadd half %a.val, %b.val
37 store half %r.val, half addrspace(1)* %r
42 ; CHECK-NEXT: - .address_space: global
43 ; CHECK-NEXT: .name: r
44 ; CHECK-NEXT: .offset: 0
45 ; CHECK-NEXT: .size: 8
46 ; CHECK-NEXT: .value_kind: global_buffer
47 ; CHECK-NEXT: .value_type: f16
48 ; CHECK-NEXT: - .address_space: global
49 ; CHECK-NEXT: .name: a
50 ; CHECK-NEXT: .offset: 8
51 ; CHECK-NEXT: .size: 8
52 ; CHECK-NEXT: .value_kind: global_buffer
53 ; CHECK-NEXT: .value_type: f16
54 ; CHECK-NEXT: - .address_space: global
55 ; CHECK-NEXT: .name: b
56 ; CHECK-NEXT: .offset: 16
57 ; CHECK-NEXT: .size: 8
58 ; CHECK-NEXT: .value_kind: global_buffer
59 ; CHECK-NEXT: .value_type: f16
60 ; CHECK-NEXT: - .offset: 24
61 ; CHECK-NEXT: .size: 8
62 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
63 ; CHECK-NEXT: .value_type: i64
65 ; CHECK: .symbol: test8.kd
66 define amdgpu_kernel void @test8(
67 half addrspace(1)* %r,
68 half addrspace(1)* %a,
69 half addrspace(1)* %b) #0 {
71 %a.val = load half, half addrspace(1)* %a
72 %b.val = load half, half addrspace(1)* %b
73 %r.val = fadd half %a.val, %b.val
74 store half %r.val, half addrspace(1)* %r
79 ; CHECK-NEXT: - .address_space: global
80 ; CHECK-NEXT: .name: r
81 ; CHECK-NEXT: .offset: 0
82 ; CHECK-NEXT: .size: 8
83 ; CHECK-NEXT: .value_kind: global_buffer
84 ; CHECK-NEXT: .value_type: f16
85 ; CHECK-NEXT: - .address_space: global
86 ; CHECK-NEXT: .name: a
87 ; CHECK-NEXT: .offset: 8
88 ; CHECK-NEXT: .size: 8
89 ; CHECK-NEXT: .value_kind: global_buffer
90 ; CHECK-NEXT: .value_type: f16
91 ; CHECK-NEXT: - .address_space: global
92 ; CHECK-NEXT: .name: b
93 ; CHECK-NEXT: .offset: 16
94 ; CHECK-NEXT: .size: 8
95 ; CHECK-NEXT: .value_kind: global_buffer
96 ; CHECK-NEXT: .value_type: f16
97 ; CHECK-NEXT: - .offset: 24
98 ; CHECK-NEXT: .size: 8
99 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
100 ; CHECK-NEXT: .value_type: i64
101 ; CHECK-NEXT: - .offset: 32
102 ; CHECK-NEXT: .size: 8
103 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
104 ; CHECK-NEXT: .value_type: i64
105 ; CHECK: .name: test16
106 ; CHECK: .symbol: test16.kd
107 define amdgpu_kernel void @test16(
108 half addrspace(1)* %r,
109 half addrspace(1)* %a,
110 half addrspace(1)* %b) #1 {
112 %a.val = load half, half addrspace(1)* %a
113 %b.val = load half, half addrspace(1)* %b
114 %r.val = fadd half %a.val, %b.val
115 store half %r.val, half addrspace(1)* %r
120 ; CHECK-NEXT: - .address_space: global
121 ; CHECK-NEXT: .name: r
122 ; CHECK-NEXT: .offset: 0
123 ; CHECK-NEXT: .size: 8
124 ; CHECK-NEXT: .value_kind: global_buffer
125 ; CHECK-NEXT: .value_type: f16
126 ; CHECK-NEXT: - .address_space: global
127 ; CHECK-NEXT: .name: a
128 ; CHECK-NEXT: .offset: 8
129 ; CHECK-NEXT: .size: 8
130 ; CHECK-NEXT: .value_kind: global_buffer
131 ; CHECK-NEXT: .value_type: f16
132 ; CHECK-NEXT: - .address_space: global
133 ; CHECK-NEXT: .name: b
134 ; CHECK-NEXT: .offset: 16
135 ; CHECK-NEXT: .size: 8
136 ; CHECK-NEXT: .value_kind: global_buffer
137 ; CHECK-NEXT: .value_type: f16
138 ; CHECK-NEXT: - .offset: 24
139 ; CHECK-NEXT: .size: 8
140 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
141 ; CHECK-NEXT: .value_type: i64
142 ; CHECK-NEXT: - .offset: 32
143 ; CHECK-NEXT: .size: 8
144 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
145 ; CHECK-NEXT: .value_type: i64
146 ; CHECK-NEXT: - .offset: 40
147 ; CHECK-NEXT: .size: 8
148 ; CHECK-NEXT: .value_kind: hidden_global_offset_z
149 ; CHECK-NEXT: .value_type: i64
150 ; CHECK: .name: test24
151 ; CHECK: .symbol: test24.kd
152 define amdgpu_kernel void @test24(
153 half addrspace(1)* %r,
154 half addrspace(1)* %a,
155 half addrspace(1)* %b) #2 {
157 %a.val = load half, half addrspace(1)* %a
158 %b.val = load half, half addrspace(1)* %b
159 %r.val = fadd half %a.val, %b.val
160 store half %r.val, half addrspace(1)* %r
165 ; CHECK-NEXT: - .address_space: global
166 ; CHECK-NEXT: .name: r
167 ; CHECK-NEXT: .offset: 0
168 ; CHECK-NEXT: .size: 8
169 ; CHECK-NEXT: .value_kind: global_buffer
170 ; CHECK-NEXT: .value_type: f16
171 ; CHECK-NEXT: - .address_space: global
172 ; CHECK-NEXT: .name: a
173 ; CHECK-NEXT: .offset: 8
174 ; CHECK-NEXT: .size: 8
175 ; CHECK-NEXT: .value_kind: global_buffer
176 ; CHECK-NEXT: .value_type: f16
177 ; CHECK-NEXT: - .address_space: global
178 ; CHECK-NEXT: .name: b
179 ; CHECK-NEXT: .offset: 16
180 ; CHECK-NEXT: .size: 8
181 ; CHECK-NEXT: .value_kind: global_buffer
182 ; CHECK-NEXT: .value_type: f16
183 ; CHECK-NEXT: - .offset: 24
184 ; CHECK-NEXT: .size: 8
185 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
186 ; CHECK-NEXT: .value_type: i64
187 ; CHECK-NEXT: - .offset: 32
188 ; CHECK-NEXT: .size: 8
189 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
190 ; CHECK-NEXT: .value_type: i64
191 ; CHECK-NEXT: - .offset: 40
192 ; CHECK-NEXT: .size: 8
193 ; CHECK-NEXT: .value_kind: hidden_global_offset_z
194 ; CHECK-NEXT: .value_type: i64
195 ; CHECK-NEXT: - .address_space: global
196 ; CHECK-NEXT: .offset: 48
197 ; CHECK-NEXT: .size: 8
198 ; CHECK-NEXT: .value_kind: hidden_none
199 ; CHECK-NEXT: .value_type: i8
200 ; CHECK: .name: test32
201 ; CHECK: .symbol: test32.kd
202 define amdgpu_kernel void @test32(
203 half addrspace(1)* %r,
204 half addrspace(1)* %a,
205 half addrspace(1)* %b) #3 {
207 %a.val = load half, half addrspace(1)* %a
208 %b.val = load half, half addrspace(1)* %b
209 %r.val = fadd half %a.val, %b.val
210 store half %r.val, half addrspace(1)* %r
215 ; CHECK-NEXT: - .address_space: global
216 ; CHECK-NEXT: .name: r
217 ; CHECK-NEXT: .offset: 0
218 ; CHECK-NEXT: .size: 8
219 ; CHECK-NEXT: .value_kind: global_buffer
220 ; CHECK-NEXT: .value_type: f16
221 ; CHECK-NEXT: - .address_space: global
222 ; CHECK-NEXT: .name: a
223 ; CHECK-NEXT: .offset: 8
224 ; CHECK-NEXT: .size: 8
225 ; CHECK-NEXT: .value_kind: global_buffer
226 ; CHECK-NEXT: .value_type: f16
227 ; CHECK-NEXT: - .address_space: global
228 ; CHECK-NEXT: .name: b
229 ; CHECK-NEXT: .offset: 16
230 ; CHECK-NEXT: .size: 8
231 ; CHECK-NEXT: .value_kind: global_buffer
232 ; CHECK-NEXT: .value_type: f16
233 ; CHECK-NEXT: - .offset: 24
234 ; CHECK-NEXT: .size: 8
235 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
236 ; CHECK-NEXT: .value_type: i64
237 ; CHECK-NEXT: - .offset: 32
238 ; CHECK-NEXT: .size: 8
239 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
240 ; CHECK-NEXT: .value_type: i64
241 ; CHECK-NEXT: - .offset: 40
242 ; CHECK-NEXT: .size: 8
243 ; CHECK-NEXT: .value_kind: hidden_global_offset_z
244 ; CHECK-NEXT: .value_type: i64
245 ; CHECK-NEXT: - .address_space: global
246 ; CHECK-NEXT: .offset: 48
247 ; CHECK-NEXT: .size: 8
248 ; CHECK-NEXT: .value_kind: hidden_none
249 ; CHECK-NEXT: .value_type: i8
250 ; CHECK-NEXT: - .address_space: global
251 ; CHECK-NEXT: .offset: 56
252 ; CHECK-NEXT: .size: 8
253 ; CHECK-NEXT: .value_kind: hidden_none
254 ; CHECK-NEXT: .value_type: i8
255 ; CHECK-NEXT: - .address_space: global
256 ; CHECK-NEXT: .offset: 64
257 ; CHECK-NEXT: .size: 8
258 ; CHECK-NEXT: .value_kind: hidden_none
259 ; CHECK-NEXT: .value_type: i8
260 ; CHECK: .name: test48
261 ; CHECK: .symbol: test48.kd
262 define amdgpu_kernel void @test48(
263 half addrspace(1)* %r,
264 half addrspace(1)* %a,
265 half addrspace(1)* %b) #4 {
267 %a.val = load half, half addrspace(1)* %a
268 %b.val = load half, half addrspace(1)* %b
269 %r.val = fadd half %a.val, %b.val
270 store half %r.val, half addrspace(1)* %r
275 ; CHECK-NEXT: - .address_space: global
276 ; CHECK-NEXT: .name: r
277 ; CHECK-NEXT: .offset: 0
278 ; CHECK-NEXT: .size: 8
279 ; CHECK-NEXT: .value_kind: global_buffer
280 ; CHECK-NEXT: .value_type: f16
281 ; CHECK-NEXT: - .address_space: global
282 ; CHECK-NEXT: .name: a
283 ; CHECK-NEXT: .offset: 8
284 ; CHECK-NEXT: .size: 8
285 ; CHECK-NEXT: .value_kind: global_buffer
286 ; CHECK-NEXT: .value_type: f16
287 ; CHECK-NEXT: - .address_space: global
288 ; CHECK-NEXT: .name: b
289 ; CHECK-NEXT: .offset: 16
290 ; CHECK-NEXT: .size: 8
291 ; CHECK-NEXT: .value_kind: global_buffer
292 ; CHECK-NEXT: .value_type: f16
293 ; CHECK-NEXT: - .offset: 24
294 ; CHECK-NEXT: .size: 8
295 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
296 ; CHECK-NEXT: .value_type: i64
297 ; CHECK-NEXT: - .offset: 32
298 ; CHECK-NEXT: .size: 8
299 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
300 ; CHECK-NEXT: .value_type: i64
301 ; CHECK-NEXT: - .offset: 40
302 ; CHECK-NEXT: .size: 8
303 ; CHECK-NEXT: .value_kind: hidden_global_offset_z
304 ; CHECK-NEXT: .value_type: i64
305 ; CHECK-NEXT: - .address_space: global
306 ; CHECK-NEXT: .offset: 48
307 ; CHECK-NEXT: .size: 8
308 ; CHECK-NEXT: .value_kind: hidden_none
309 ; CHECK-NEXT: .value_type: i8
310 ; CHECK-NEXT: - .address_space: global
311 ; CHECK-NEXT: .offset: 56
312 ; CHECK-NEXT: .size: 8
313 ; CHECK-NEXT: .value_kind: hidden_none
314 ; CHECK-NEXT: .value_type: i8
315 ; CHECK-NEXT: - .address_space: global
316 ; CHECK-NEXT: .offset: 64
317 ; CHECK-NEXT: .size: 8
318 ; CHECK-NEXT: .value_kind: hidden_none
319 ; CHECK-NEXT: .value_type: i8
320 ; CHECK-NEXT: - .address_space: global
321 ; CHECK-NEXT: .offset: 72
322 ; CHECK-NEXT: .size: 8
323 ; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg
324 ; CHECK-NEXT: .value_type: i8
325 ; CHECK: .name: test56
326 ; CHECK: .symbol: test56.kd
327 define amdgpu_kernel void @test56(
328 half addrspace(1)* %r,
329 half addrspace(1)* %a,
330 half addrspace(1)* %b) #5 {
332 %a.val = load half, half addrspace(1)* %a
333 %b.val = load half, half addrspace(1)* %b
334 %r.val = fadd half %a.val, %b.val
335 store half %r.val, half addrspace(1)* %r
339 ; CHECK: amdhsa.version:
343 attributes #0 = { "amdgpu-implicitarg-num-bytes"="8" }
344 attributes #1 = { "amdgpu-implicitarg-num-bytes"="16" }
345 attributes #2 = { "amdgpu-implicitarg-num-bytes"="24" }
346 attributes #3 = { "amdgpu-implicitarg-num-bytes"="32" }
347 attributes #4 = { "amdgpu-implicitarg-num-bytes"="48" }
348 attributes #5 = { "amdgpu-implicitarg-num-bytes"="56" }