1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4 ; Make sure the stack is never realigned for entry functions.
6 define amdgpu_kernel void @max_alignment_128() #0 {
7 ; VI-LABEL: max_alignment_128:
9 ; VI-NEXT: s_add_u32 s0, s0, s17
10 ; VI-NEXT: s_addc_u32 s1, s1, 0
11 ; VI-NEXT: v_mov_b32_e32 v0, 3
12 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
13 ; VI-NEXT: s_waitcnt vmcnt(0)
14 ; VI-NEXT: v_mov_b32_e32 v0, 9
15 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
16 ; VI-NEXT: s_waitcnt vmcnt(0)
18 ; VI-NEXT: .section .rodata,"a"
20 ; VI-NEXT: .amdhsa_kernel max_alignment_128
21 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
22 ; VI-NEXT: .amdhsa_private_segment_fixed_size 256
23 ; VI-NEXT: .amdhsa_kernarg_size 56
24 ; VI-NEXT: .amdhsa_user_sgpr_count 14
25 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
26 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
27 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1
28 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
29 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1
30 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
31 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
32 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
33 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
34 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
35 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
36 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
37 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2
38 ; VI-NEXT: .amdhsa_next_free_vgpr 1
39 ; VI-NEXT: .amdhsa_next_free_sgpr 18
40 ; VI-NEXT: .amdhsa_reserve_vcc 0
41 ; VI-NEXT: .amdhsa_reserve_flat_scratch 0
42 ; VI-NEXT: .amdhsa_float_round_mode_32 0
43 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
44 ; VI-NEXT: .amdhsa_float_denorm_mode_32 3
45 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
46 ; VI-NEXT: .amdhsa_dx10_clamp 1
47 ; VI-NEXT: .amdhsa_ieee_mode 1
48 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
49 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
50 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
51 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
52 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
53 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
54 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
55 ; VI-NEXT: .end_amdhsa_kernel
58 ; GFX9-LABEL: max_alignment_128:
60 ; GFX9-NEXT: s_add_u32 s0, s0, s17
61 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
62 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
63 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
64 ; GFX9-NEXT: s_waitcnt vmcnt(0)
65 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
66 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
67 ; GFX9-NEXT: s_waitcnt vmcnt(0)
69 ; GFX9-NEXT: .section .rodata,"a"
70 ; GFX9-NEXT: .p2align 6
71 ; GFX9-NEXT: .amdhsa_kernel max_alignment_128
72 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
73 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256
74 ; GFX9-NEXT: .amdhsa_kernarg_size 56
75 ; GFX9-NEXT: .amdhsa_user_sgpr_count 14
76 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
77 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
78 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1
79 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
80 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1
81 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
82 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
83 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
84 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
85 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
86 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
87 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
88 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2
89 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
90 ; GFX9-NEXT: .amdhsa_next_free_sgpr 18
91 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
92 ; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0
93 ; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
94 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
95 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
96 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
97 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
98 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
99 ; GFX9-NEXT: .amdhsa_ieee_mode 1
100 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
101 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
102 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
103 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
104 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
105 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
106 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
107 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
108 ; GFX9-NEXT: .end_amdhsa_kernel
110 %clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca
111 store volatile i8 3, ptr addrspace(5) %clutter
112 %alloca.align = alloca i32, align 128, addrspace(5)
113 store volatile i32 9, ptr addrspace(5) %alloca.align, align 128
117 define amdgpu_kernel void @stackrealign_attr() #1 {
118 ; VI-LABEL: stackrealign_attr:
120 ; VI-NEXT: s_add_u32 s0, s0, s17
121 ; VI-NEXT: s_addc_u32 s1, s1, 0
122 ; VI-NEXT: v_mov_b32_e32 v0, 3
123 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
124 ; VI-NEXT: s_waitcnt vmcnt(0)
125 ; VI-NEXT: v_mov_b32_e32 v0, 9
126 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
127 ; VI-NEXT: s_waitcnt vmcnt(0)
129 ; VI-NEXT: .section .rodata,"a"
130 ; VI-NEXT: .p2align 6
131 ; VI-NEXT: .amdhsa_kernel stackrealign_attr
132 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
133 ; VI-NEXT: .amdhsa_private_segment_fixed_size 12
134 ; VI-NEXT: .amdhsa_kernarg_size 56
135 ; VI-NEXT: .amdhsa_user_sgpr_count 14
136 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
137 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
138 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1
139 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
140 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1
141 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
142 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
143 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
144 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
145 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
146 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
147 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
148 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2
149 ; VI-NEXT: .amdhsa_next_free_vgpr 1
150 ; VI-NEXT: .amdhsa_next_free_sgpr 18
151 ; VI-NEXT: .amdhsa_reserve_vcc 0
152 ; VI-NEXT: .amdhsa_reserve_flat_scratch 0
153 ; VI-NEXT: .amdhsa_float_round_mode_32 0
154 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
155 ; VI-NEXT: .amdhsa_float_denorm_mode_32 3
156 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
157 ; VI-NEXT: .amdhsa_dx10_clamp 1
158 ; VI-NEXT: .amdhsa_ieee_mode 1
159 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
160 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
161 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
162 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
163 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
164 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
165 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
166 ; VI-NEXT: .end_amdhsa_kernel
169 ; GFX9-LABEL: stackrealign_attr:
171 ; GFX9-NEXT: s_add_u32 s0, s0, s17
172 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
173 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
174 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
175 ; GFX9-NEXT: s_waitcnt vmcnt(0)
176 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
177 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
178 ; GFX9-NEXT: s_waitcnt vmcnt(0)
179 ; GFX9-NEXT: s_endpgm
180 ; GFX9-NEXT: .section .rodata,"a"
181 ; GFX9-NEXT: .p2align 6
182 ; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
183 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
184 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 12
185 ; GFX9-NEXT: .amdhsa_kernarg_size 56
186 ; GFX9-NEXT: .amdhsa_user_sgpr_count 14
187 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
188 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
189 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1
190 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
191 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1
192 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
193 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
194 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
195 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
196 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
197 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
198 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
199 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2
200 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
201 ; GFX9-NEXT: .amdhsa_next_free_sgpr 18
202 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
203 ; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0
204 ; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
205 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
206 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
207 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
208 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
209 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
210 ; GFX9-NEXT: .amdhsa_ieee_mode 1
211 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
212 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
213 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
214 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
215 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
216 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
217 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
218 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
219 ; GFX9-NEXT: .end_amdhsa_kernel
221 %clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca
222 store volatile i8 3, ptr addrspace(5) %clutter
223 %alloca.align = alloca i32, align 4, addrspace(5)
224 store volatile i32 9, ptr addrspace(5) %alloca.align, align 4
228 define amdgpu_kernel void @alignstack_attr() #2 {
229 ; VI-LABEL: alignstack_attr:
231 ; VI-NEXT: s_add_u32 s0, s0, s17
232 ; VI-NEXT: s_addc_u32 s1, s1, 0
233 ; VI-NEXT: v_mov_b32_e32 v0, 3
234 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
235 ; VI-NEXT: s_waitcnt vmcnt(0)
236 ; VI-NEXT: v_mov_b32_e32 v0, 9
237 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
238 ; VI-NEXT: s_waitcnt vmcnt(0)
240 ; VI-NEXT: .section .rodata,"a"
241 ; VI-NEXT: .p2align 6
242 ; VI-NEXT: .amdhsa_kernel alignstack_attr
243 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
244 ; VI-NEXT: .amdhsa_private_segment_fixed_size 128
245 ; VI-NEXT: .amdhsa_kernarg_size 56
246 ; VI-NEXT: .amdhsa_user_sgpr_count 14
247 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
248 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
249 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1
250 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
251 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1
252 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
253 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
254 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
255 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
256 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
257 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
258 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
259 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2
260 ; VI-NEXT: .amdhsa_next_free_vgpr 1
261 ; VI-NEXT: .amdhsa_next_free_sgpr 18
262 ; VI-NEXT: .amdhsa_reserve_vcc 0
263 ; VI-NEXT: .amdhsa_reserve_flat_scratch 0
264 ; VI-NEXT: .amdhsa_float_round_mode_32 0
265 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
266 ; VI-NEXT: .amdhsa_float_denorm_mode_32 3
267 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
268 ; VI-NEXT: .amdhsa_dx10_clamp 1
269 ; VI-NEXT: .amdhsa_ieee_mode 1
270 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
271 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
272 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
273 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
274 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
275 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
276 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
277 ; VI-NEXT: .end_amdhsa_kernel
280 ; GFX9-LABEL: alignstack_attr:
282 ; GFX9-NEXT: s_add_u32 s0, s0, s17
283 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
284 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
285 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
286 ; GFX9-NEXT: s_waitcnt vmcnt(0)
287 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
288 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
289 ; GFX9-NEXT: s_waitcnt vmcnt(0)
290 ; GFX9-NEXT: s_endpgm
291 ; GFX9-NEXT: .section .rodata,"a"
292 ; GFX9-NEXT: .p2align 6
293 ; GFX9-NEXT: .amdhsa_kernel alignstack_attr
294 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
295 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128
296 ; GFX9-NEXT: .amdhsa_kernarg_size 56
297 ; GFX9-NEXT: .amdhsa_user_sgpr_count 14
298 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
299 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
300 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1
301 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
302 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1
303 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
304 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
305 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
306 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
307 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
308 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
309 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
310 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2
311 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
312 ; GFX9-NEXT: .amdhsa_next_free_sgpr 18
313 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
314 ; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0
315 ; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
316 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
317 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
318 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
319 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
320 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
321 ; GFX9-NEXT: .amdhsa_ieee_mode 1
322 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
323 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
324 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
325 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
326 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
327 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
328 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
329 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
330 ; GFX9-NEXT: .end_amdhsa_kernel
332 %clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca
333 store volatile i8 3, ptr addrspace(5) %clutter
334 %alloca.align = alloca i32, align 4, addrspace(5)
335 store volatile i32 9, ptr addrspace(5) %alloca.align, align 4
339 attributes #0 = { nounwind }
340 attributes #1 = { nounwind "stackrealign" }
341 attributes #2 = { nounwind alignstack=128 }
343 !llvm.module.flags = !{!0}
344 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}