1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
4 define amdgpu_kernel void @lsh8_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
5 ; GCN-LABEL: lsh8_or_and:
7 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
8 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
9 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
10 ; GCN-NEXT: v_mov_b32_e32 v3, 0x6050400
11 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
12 ; GCN-NEXT: v_mov_b32_e32 v1, s3
13 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
14 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
15 ; GCN-NEXT: flat_load_dword v2, v[0:1]
16 ; GCN-NEXT: s_waitcnt vmcnt(0)
17 ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
18 ; GCN-NEXT: flat_store_dword v[0:1], v2
21 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
22 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
23 %tmp = load i32, ptr addrspace(1) %gep, align 4
24 %tmp2 = shl i32 %tmp, 8
25 %tmp3 = and i32 %arg1, 255
26 %tmp4 = or i32 %tmp2, %tmp3
27 store i32 %tmp4, ptr addrspace(1) %gep, align 4
31 define amdgpu_kernel void @lsr24_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
32 ; GCN-LABEL: lsr24_or_and:
34 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
35 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
36 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
37 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503
38 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
39 ; GCN-NEXT: v_mov_b32_e32 v1, s3
40 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
41 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
42 ; GCN-NEXT: flat_load_dword v2, v[0:1]
43 ; GCN-NEXT: s_waitcnt vmcnt(0)
44 ; GCN-NEXT: v_perm_b32 v2, s0, v2, v3
45 ; GCN-NEXT: flat_store_dword v[0:1], v2
48 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
49 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
50 %tmp = load i32, ptr addrspace(1) %gep, align 4
51 %tmp2 = lshr i32 %tmp, 24
52 %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
53 %tmp4 = or i32 %tmp2, %tmp3
54 store i32 %tmp4, ptr addrspace(1) %gep, align 4
58 define amdgpu_kernel void @and_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
59 ; GCN-LABEL: and_or_lsr24:
61 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
62 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
63 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
64 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503
65 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
66 ; GCN-NEXT: v_mov_b32_e32 v1, s3
67 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
68 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
69 ; GCN-NEXT: flat_load_dword v2, v[0:1]
70 ; GCN-NEXT: s_waitcnt vmcnt(0)
71 ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
72 ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
73 ; GCN-NEXT: flat_store_dword v[0:1], v2
76 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
77 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
78 %tmp = load i32, ptr addrspace(1) %gep, align 4
79 %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
80 %tmp3 = lshr i32 %arg1, 24
81 %tmp4 = or i32 %tmp2, %tmp3
82 %tmp5 = xor i32 %tmp4, -2147483648
83 store i32 %tmp5, ptr addrspace(1) %gep, align 4
87 define amdgpu_kernel void @and_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
88 ; GCN-LABEL: and_or_and:
90 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
91 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
92 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
93 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020500
94 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
95 ; GCN-NEXT: v_mov_b32_e32 v1, s3
96 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
97 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
98 ; GCN-NEXT: flat_load_dword v2, v[0:1]
99 ; GCN-NEXT: s_waitcnt vmcnt(0)
100 ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
101 ; GCN-NEXT: flat_store_dword v[0:1], v2
104 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
105 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
106 %tmp = load i32, ptr addrspace(1) %gep, align 4
107 %tmp2 = and i32 %tmp, -16711936
108 %tmp3 = and i32 %arg1, 16711935
109 %tmp4 = or i32 %tmp2, %tmp3
110 store i32 %tmp4, ptr addrspace(1) %gep, align 4
114 define amdgpu_kernel void @lsh8_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
115 ; GCN-LABEL: lsh8_or_lsr24:
116 ; GCN: ; %bb.0: ; %bb
117 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
118 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
119 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
120 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
121 ; GCN-NEXT: v_mov_b32_e32 v1, s3
122 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
123 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
124 ; GCN-NEXT: flat_load_dword v2, v[0:1]
125 ; GCN-NEXT: s_waitcnt vmcnt(0)
126 ; GCN-NEXT: v_alignbit_b32 v2, v2, s0, 24
127 ; GCN-NEXT: flat_store_dword v[0:1], v2
130 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
131 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
132 %tmp = load i32, ptr addrspace(1) %gep, align 4
133 %tmp2 = shl i32 %tmp, 8
134 %tmp3 = lshr i32 %arg1, 24
135 %tmp4 = or i32 %tmp2, %tmp3
136 store i32 %tmp4, ptr addrspace(1) %gep, align 4
140 define amdgpu_kernel void @lsh16_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
141 ; GCN-LABEL: lsh16_or_lsr24:
142 ; GCN: ; %bb.0: ; %bb
143 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
144 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
145 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
146 ; GCN-NEXT: v_mov_b32_e32 v3, 0x5040c03
147 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
148 ; GCN-NEXT: v_mov_b32_e32 v1, s3
149 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
150 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
151 ; GCN-NEXT: flat_load_dword v2, v[0:1]
152 ; GCN-NEXT: s_waitcnt vmcnt(0)
153 ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
154 ; GCN-NEXT: flat_store_dword v[0:1], v2
157 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
158 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
159 %tmp = load i32, ptr addrspace(1) %gep, align 4
160 %tmp2 = shl i32 %tmp, 16
161 %tmp3 = lshr i32 %arg1, 24
162 %tmp4 = or i32 %tmp2, %tmp3
163 store i32 %tmp4, ptr addrspace(1) %gep, align 4
167 define amdgpu_kernel void @and_xor_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
168 ; GCN-LABEL: and_xor_and:
169 ; GCN: ; %bb.0: ; %bb
170 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
171 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
172 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
173 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104
174 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
175 ; GCN-NEXT: v_mov_b32_e32 v1, s3
176 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
177 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
178 ; GCN-NEXT: flat_load_dword v2, v[0:1]
179 ; GCN-NEXT: s_waitcnt vmcnt(0)
180 ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
181 ; GCN-NEXT: flat_store_dword v[0:1], v2
184 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
185 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
186 %tmp = load i32, ptr addrspace(1) %gep, align 4
187 %tmp2 = and i32 %tmp, -16776961
188 %tmp3 = and i32 %arg1, 16776960
189 %tmp4 = xor i32 %tmp2, %tmp3
190 store i32 %tmp4, ptr addrspace(1) %gep, align 4
194 ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
195 define amdgpu_kernel void @and_or_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
196 ; GCN-LABEL: and_or_or_and:
197 ; GCN: ; %bb.0: ; %bb
198 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
199 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
200 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
201 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
202 ; GCN-NEXT: v_mov_b32_e32 v1, s3
203 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
204 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
205 ; GCN-NEXT: flat_load_dword v2, v[0:1]
206 ; GCN-NEXT: s_and_b32 s0, s0, 0xff00
207 ; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
208 ; GCN-NEXT: s_waitcnt vmcnt(0)
209 ; GCN-NEXT: v_and_b32_e32 v2, 0xff00ff, v2
210 ; GCN-NEXT: v_or_b32_e32 v2, s0, v2
211 ; GCN-NEXT: flat_store_dword v[0:1], v2
214 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
215 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
216 %tmp = load i32, ptr addrspace(1) %gep, align 4
217 %and = and i32 %tmp, 16711935 ; 0x00ff00ff
218 %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
219 %tmp2 = or i32 %tmp1, -65536
220 %tmp3 = or i32 %tmp2, %and
221 store i32 %tmp3, ptr addrspace(1) %gep, align 4
225 define amdgpu_kernel void @and_or_and_shl(ptr addrspace(1) nocapture %arg, i32 %arg1) {
226 ; GCN-LABEL: and_or_and_shl:
227 ; GCN: ; %bb.0: ; %bb
228 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
229 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
230 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
231 ; GCN-NEXT: v_mov_b32_e32 v3, 0x50c0c00
232 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
233 ; GCN-NEXT: v_mov_b32_e32 v1, s3
234 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
235 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
236 ; GCN-NEXT: flat_load_dword v2, v[0:1]
237 ; GCN-NEXT: s_waitcnt vmcnt(0)
238 ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
239 ; GCN-NEXT: flat_store_dword v[0:1], v2
242 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
243 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
244 %tmp = load i32, ptr addrspace(1) %gep, align 4
245 %tmp2 = shl i32 %tmp, 16
246 %tmp3 = and i32 %arg1, 65535
247 %tmp4 = or i32 %tmp2, %tmp3
248 %and = and i32 %tmp4, 4278190335
249 store i32 %and, ptr addrspace(1) %gep, align 4
253 define amdgpu_kernel void @or_and_or(ptr addrspace(1) nocapture %arg, i32 %arg1) {
254 ; GCN-LABEL: or_and_or:
255 ; GCN: ; %bb.0: ; %bb
256 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
257 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
258 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
259 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104
260 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
261 ; GCN-NEXT: v_mov_b32_e32 v1, s3
262 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
263 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
264 ; GCN-NEXT: flat_load_dword v2, v[0:1]
265 ; GCN-NEXT: s_waitcnt vmcnt(0)
266 ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
267 ; GCN-NEXT: flat_store_dword v[0:1], v2
270 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
271 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
272 %tmp = load i32, ptr addrspace(1) %gep, align 4
273 %or1 = or i32 %tmp, 16776960 ; 0x00ffff00
274 %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
275 %and = and i32 %or1, %or2
276 store i32 %and, ptr addrspace(1) %gep, align 4
280 ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
281 define amdgpu_kernel void @known_ffff0500(ptr addrspace(1) nocapture %arg, i32 %arg1) {
282 ; GCN-LABEL: known_ffff0500:
283 ; GCN: ; %bb.0: ; %bb
284 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
285 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
286 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
287 ; GCN-NEXT: v_mov_b32_e32 v5, 0xffff8004
288 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
289 ; GCN-NEXT: v_mov_b32_e32 v1, s3
290 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
291 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
292 ; GCN-NEXT: flat_load_dword v4, v[0:1]
293 ; GCN-NEXT: s_bitset1_b32 s0, 15
294 ; GCN-NEXT: s_and_b32 s0, s0, 0xff00
295 ; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
296 ; GCN-NEXT: v_mov_b32_e32 v2, s2
297 ; GCN-NEXT: v_mov_b32_e32 v3, s3
298 ; GCN-NEXT: s_waitcnt vmcnt(0)
299 ; GCN-NEXT: v_or_b32_e32 v4, 4, v4
300 ; GCN-NEXT: v_and_b32_e32 v4, 0xff00ff, v4
301 ; GCN-NEXT: v_or_b32_e32 v4, s0, v4
302 ; GCN-NEXT: flat_store_dword v[0:1], v4
303 ; GCN-NEXT: flat_store_dword v[2:3], v5
306 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
307 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
308 %load = load i32, ptr addrspace(1) %gep, align 4
309 %mask1 = or i32 %arg1, 32768 ; 0x8000
310 %mask2 = or i32 %load, 4
311 %and = and i32 %mask2, 16711935 ; 0x00ff00ff
312 %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
313 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
314 %tmp3 = or i32 %tmp2, %and
315 store i32 %tmp3, ptr addrspace(1) %gep, align 4
316 %v = and i32 %tmp3, 4294934532 ; 0xffff8004
317 store i32 %v, ptr addrspace(1) %arg, align 4
321 define amdgpu_kernel void @known_050c0c00(ptr addrspace(1) nocapture %arg, i32 %arg1) {
322 ; GCN-LABEL: known_050c0c00:
323 ; GCN: ; %bb.0: ; %bb
324 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
325 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
326 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
327 ; GCN-NEXT: v_mov_b32_e32 v5, 0x50c0c00
328 ; GCN-NEXT: v_mov_b32_e32 v6, 4
329 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
330 ; GCN-NEXT: v_mov_b32_e32 v1, s3
331 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
332 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
333 ; GCN-NEXT: flat_load_dword v4, v[0:1]
334 ; GCN-NEXT: s_or_b32 s0, s0, 4
335 ; GCN-NEXT: v_mov_b32_e32 v2, s2
336 ; GCN-NEXT: v_mov_b32_e32 v3, s3
337 ; GCN-NEXT: s_waitcnt vmcnt(0)
338 ; GCN-NEXT: v_perm_b32 v4, v4, s0, v5
339 ; GCN-NEXT: flat_store_dword v[0:1], v4
340 ; GCN-NEXT: flat_store_dword v[2:3], v6
343 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
344 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
345 %tmp = load i32, ptr addrspace(1) %gep, align 4
346 %tmp2 = shl i32 %tmp, 16
347 %mask = or i32 %arg1, 4
348 %tmp3 = and i32 %mask, 65535
349 %tmp4 = or i32 %tmp2, %tmp3
350 %and = and i32 %tmp4, 4278190335
351 store i32 %and, ptr addrspace(1) %gep, align 4
352 %v = and i32 %and, 16776964
353 store i32 %v, ptr addrspace(1) %arg, align 4
357 define amdgpu_kernel void @known_ffff8004(ptr addrspace(1) nocapture %arg, i32 %arg1) {
358 ; GCN-LABEL: known_ffff8004:
359 ; GCN: ; %bb.0: ; %bb
360 ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
361 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
362 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
363 ; GCN-NEXT: v_mov_b32_e32 v5, 0xffff0500
364 ; GCN-NEXT: v_mov_b32_e32 v6, 0xffff8004
365 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
366 ; GCN-NEXT: v_mov_b32_e32 v1, s3
367 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
368 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
369 ; GCN-NEXT: flat_load_dword v4, v[0:1]
370 ; GCN-NEXT: s_or_b32 s0, s0, 4
371 ; GCN-NEXT: v_mov_b32_e32 v2, s2
372 ; GCN-NEXT: v_mov_b32_e32 v3, s3
373 ; GCN-NEXT: s_waitcnt vmcnt(0)
374 ; GCN-NEXT: v_or_b32_e32 v4, 0x8000, v4
375 ; GCN-NEXT: v_perm_b32 v4, v4, s0, v5
376 ; GCN-NEXT: flat_store_dword v[0:1], v4
377 ; GCN-NEXT: flat_store_dword v[2:3], v6
380 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
381 %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
382 %load = load i32, ptr addrspace(1) %gep, align 4
383 %mask1 = or i32 %arg1, 4
384 %mask2 = or i32 %load, 32768 ; 0x8000
385 %and = and i32 %mask1, 16711935 ; 0x00ff00ff
386 %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
387 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
388 %tmp3 = or i32 %tmp2, %and
389 store i32 %tmp3, ptr addrspace(1) %gep, align 4
390 %v = and i32 %tmp3, 4294934532 ; 0xffff8004
391 store i32 %v, ptr addrspace(1) %arg, align 4
395 declare i32 @llvm.amdgcn.workitem.id.x()