1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
6 ; CHECK: amdhsa.kernels:
9 ; CHECK-NEXT: - .address_space: global
10 ; CHECK-NEXT: .name: r
11 ; CHECK-NEXT: .offset: 0
12 ; CHECK-NEXT: .size: 8
13 ; CHECK-NEXT: .value_kind: global_buffer
14 ; CHECK-NEXT: - .address_space: global
15 ; CHECK-NEXT: .name: a
16 ; CHECK-NEXT: .offset: 8
17 ; CHECK-NEXT: .size: 8
18 ; CHECK-NEXT: .value_kind: global_buffer
19 ; CHECK-NEXT: - .address_space: global
20 ; CHECK-NEXT: .name: b
21 ; CHECK-NEXT: .offset: 16
22 ; CHECK-NEXT: .size: 8
23 ; CHECK-NEXT: .value_kind: global_buffer
25 ; CHECK: .symbol: test0.kd
26 define amdgpu_kernel void @test0(
29 ptr addrspace(1) %b) {
31 %a.val = load half, ptr addrspace(1) %a
32 %b.val = load half, ptr addrspace(1) %b
33 %r.val = fadd half %a.val, %b.val
34 store half %r.val, ptr addrspace(1) %r
39 ; CHECK-NEXT: - .address_space: global
40 ; CHECK-NEXT: .name: r
41 ; CHECK-NEXT: .offset: 0
42 ; CHECK-NEXT: .size: 8
43 ; CHECK-NEXT: .value_kind: global_buffer
44 ; CHECK-NEXT: - .address_space: global
45 ; CHECK-NEXT: .name: a
46 ; CHECK-NEXT: .offset: 8
47 ; CHECK-NEXT: .size: 8
48 ; CHECK-NEXT: .value_kind: global_buffer
49 ; CHECK-NEXT: - .address_space: global
50 ; CHECK-NEXT: .name: b
51 ; CHECK-NEXT: .offset: 16
52 ; CHECK-NEXT: .size: 8
53 ; CHECK-NEXT: .value_kind: global_buffer
54 ; CHECK-NEXT: - .offset: 24
55 ; CHECK-NEXT: .size: 8
56 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
58 ; CHECK: .symbol: test8.kd
59 define amdgpu_kernel void @test8(
62 ptr addrspace(1) %b) #0 {
64 %a.val = load half, ptr addrspace(1) %a
65 %b.val = load half, ptr addrspace(1) %b
66 %r.val = fadd half %a.val, %b.val
67 store half %r.val, ptr addrspace(1) %r
72 ; CHECK-NEXT: - .address_space: global
73 ; CHECK-NEXT: .name: r
74 ; CHECK-NEXT: .offset: 0
75 ; CHECK-NEXT: .size: 8
76 ; CHECK-NEXT: .value_kind: global_buffer
77 ; CHECK-NEXT: - .address_space: global
78 ; CHECK-NEXT: .name: a
79 ; CHECK-NEXT: .offset: 8
80 ; CHECK-NEXT: .size: 8
81 ; CHECK-NEXT: .value_kind: global_buffer
82 ; CHECK-NEXT: - .address_space: global
83 ; CHECK-NEXT: .name: b
84 ; CHECK-NEXT: .offset: 16
85 ; CHECK-NEXT: .size: 8
86 ; CHECK-NEXT: .value_kind: global_buffer
87 ; CHECK-NEXT: - .offset: 24
88 ; CHECK-NEXT: .size: 8
89 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
90 ; CHECK-NEXT: - .offset: 32
91 ; CHECK-NEXT: .size: 8
92 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
93 ; CHECK: .name: test16
94 ; CHECK: .symbol: test16.kd
95 define amdgpu_kernel void @test16(
98 ptr addrspace(1) %b) #1 {
100 %a.val = load half, ptr addrspace(1) %a
101 %b.val = load half, ptr addrspace(1) %b
102 %r.val = fadd half %a.val, %b.val
103 store half %r.val, ptr addrspace(1) %r
108 ; CHECK-NEXT: - .address_space: global
109 ; CHECK-NEXT: .name: r
110 ; CHECK-NEXT: .offset: 0
111 ; CHECK-NEXT: .size: 8
112 ; CHECK-NEXT: .value_kind: global_buffer
113 ; CHECK-NEXT: - .address_space: global
114 ; CHECK-NEXT: .name: a
115 ; CHECK-NEXT: .offset: 8
116 ; CHECK-NEXT: .size: 8
117 ; CHECK-NEXT: .value_kind: global_buffer
118 ; CHECK-NEXT: - .address_space: global
119 ; CHECK-NEXT: .name: b
120 ; CHECK-NEXT: .offset: 16
121 ; CHECK-NEXT: .size: 8
122 ; CHECK-NEXT: .value_kind: global_buffer
123 ; CHECK-NEXT: - .offset: 24
124 ; CHECK-NEXT: .size: 8
125 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
126 ; CHECK-NEXT: - .offset: 32
127 ; CHECK-NEXT: .size: 8
128 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
129 ; CHECK-NEXT: - .offset: 40
130 ; CHECK-NEXT: .size: 8
131 ; CHECK-NEXT: .value_kind: hidden_global_offset_z
132 ; CHECK: .name: test24
133 ; CHECK: .symbol: test24.kd
134 define amdgpu_kernel void @test24(
137 ptr addrspace(1) %b) #2 {
139 %a.val = load half, ptr addrspace(1) %a
140 %b.val = load half, ptr addrspace(1) %b
141 %r.val = fadd half %a.val, %b.val
142 store half %r.val, ptr addrspace(1) %r
147 ; CHECK-NEXT: - .address_space: global
148 ; CHECK-NEXT: .name: r
149 ; CHECK-NEXT: .offset: 0
150 ; CHECK-NEXT: .size: 8
151 ; CHECK-NEXT: .value_kind: global_buffer
152 ; CHECK-NEXT: - .address_space: global
153 ; CHECK-NEXT: .name: a
154 ; CHECK-NEXT: .offset: 8
155 ; CHECK-NEXT: .size: 8
156 ; CHECK-NEXT: .value_kind: global_buffer
157 ; CHECK-NEXT: - .address_space: global
158 ; CHECK-NEXT: .name: b
159 ; CHECK-NEXT: .offset: 16
160 ; CHECK-NEXT: .size: 8
161 ; CHECK-NEXT: .value_kind: global_buffer
162 ; CHECK-NEXT: - .offset: 24
163 ; CHECK-NEXT: .size: 8
164 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
165 ; CHECK-NEXT: - .offset: 32
166 ; CHECK-NEXT: .size: 8
167 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
168 ; CHECK-NEXT: - .offset: 40
169 ; CHECK-NEXT: .size: 8
170 ; CHECK-NEXT: .value_kind: hidden_global_offset_z
171 ; CHECK-NEXT: - .offset: 48
172 ; CHECK-NEXT: .size: 8
173 ; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
174 ; CHECK: .name: test32
175 ; CHECK: .symbol: test32.kd
176 define amdgpu_kernel void @test32(
179 ptr addrspace(1) %b) #3 {
181 %a.val = load half, ptr addrspace(1) %a
182 %b.val = load half, ptr addrspace(1) %b
183 %r.val = fadd half %a.val, %b.val
184 store half %r.val, ptr addrspace(1) %r
189 ; CHECK-NEXT: - .address_space: global
190 ; CHECK-NEXT: .name: r
191 ; CHECK-NEXT: .offset: 0
192 ; CHECK-NEXT: .size: 8
193 ; CHECK-NEXT: .value_kind: global_buffer
194 ; CHECK-NEXT: - .address_space: global
195 ; CHECK-NEXT: .name: a
196 ; CHECK-NEXT: .offset: 8
197 ; CHECK-NEXT: .size: 8
198 ; CHECK-NEXT: .value_kind: global_buffer
199 ; CHECK-NEXT: - .address_space: global
200 ; CHECK-NEXT: .name: b
201 ; CHECK-NEXT: .offset: 16
202 ; CHECK-NEXT: .size: 8
203 ; CHECK-NEXT: .value_kind: global_buffer
204 ; CHECK-NEXT: - .offset: 24
205 ; CHECK-NEXT: .size: 8
206 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
207 ; CHECK-NEXT: - .offset: 32
208 ; CHECK-NEXT: .size: 8
209 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
210 ; CHECK-NEXT: - .offset: 40
211 ; CHECK-NEXT: .size: 8
212 ; CHECK-NEXT: .value_kind: hidden_global_offset_z
213 ; CHECK-NEXT: - .offset: 48
214 ; CHECK-NEXT: .size: 8
215 ; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
216 ; CHECK-NEXT: - .offset: 56
217 ; CHECK-NEXT: .size: 8
218 ; CHECK-NEXT: .value_kind: hidden_default_queue
219 ; CHECK-NEXT: - .offset: 64
220 ; CHECK-NEXT: .size: 8
221 ; CHECK-NEXT: .value_kind: hidden_completion_action
222 ; CHECK: .name: test48
223 ; CHECK: .symbol: test48.kd
224 define amdgpu_kernel void @test48(
227 ptr addrspace(1) %b) #4 {
229 %a.val = load half, ptr addrspace(1) %a
230 %b.val = load half, ptr addrspace(1) %b
231 %r.val = fadd half %a.val, %b.val
232 store half %r.val, ptr addrspace(1) %r
237 ; CHECK-NEXT: - .address_space: global
238 ; CHECK-NEXT: .name: r
239 ; CHECK-NEXT: .offset: 0
240 ; CHECK-NEXT: .size: 8
241 ; CHECK-NEXT: .value_kind: global_buffer
242 ; CHECK-NEXT: - .address_space: global
243 ; CHECK-NEXT: .name: a
244 ; CHECK-NEXT: .offset: 8
245 ; CHECK-NEXT: .size: 8
246 ; CHECK-NEXT: .value_kind: global_buffer
247 ; CHECK-NEXT: - .address_space: global
248 ; CHECK-NEXT: .name: b
249 ; CHECK-NEXT: .offset: 16
250 ; CHECK-NEXT: .size: 8
251 ; CHECK-NEXT: .value_kind: global_buffer
252 ; CHECK-NEXT: - .offset: 24
253 ; CHECK-NEXT: .size: 8
254 ; CHECK-NEXT: .value_kind: hidden_global_offset_x
255 ; CHECK-NEXT: - .offset: 32
256 ; CHECK-NEXT: .size: 8
257 ; CHECK-NEXT: .value_kind: hidden_global_offset_y
258 ; CHECK-NEXT: - .offset: 40
259 ; CHECK-NEXT: .size: 8
260 ; CHECK-NEXT: .value_kind: hidden_global_offset_z
261 ; CHECK-NEXT: - .offset: 48
262 ; CHECK-NEXT: .size: 8
263 ; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
264 ; CHECK-NEXT: - .offset: 56
265 ; CHECK-NEXT: .size: 8
266 ; CHECK-NEXT: .value_kind: hidden_default_queue
267 ; CHECK-NEXT: - .offset: 64
268 ; CHECK-NEXT: .size: 8
269 ; CHECK-NEXT: .value_kind: hidden_completion_action
270 ; CHECK-NEXT: - .offset: 72
271 ; CHECK-NEXT: .size: 8
272 ; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg
273 ; CHECK: .name: test56
274 ; CHECK: .symbol: test56.kd
275 define amdgpu_kernel void @test56(
278 ptr addrspace(1) %b) #5 {
280 %a.val = load half, ptr addrspace(1) %a
281 %b.val = load half, ptr addrspace(1) %b
282 %r.val = fadd half %a.val, %b.val
283 store half %r.val, ptr addrspace(1) %r
287 ; CHECK: amdhsa.version:
291 ; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to
292 ; avoid optimizing out the implicit argument allocation.
293 attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="8" }
294 attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="16" }
295 attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" }
296 attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" }
297 attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
298 attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
300 !llvm.module.flags = !{!0}
301 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}