1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5 ; Make sure the stack is never realigned for entry functions.
7 define amdgpu_kernel void @max_alignment_128() #0 {
8 ; VI-LABEL: max_alignment_128:
10 ; VI-NEXT: s_add_u32 s4, s4, s7
11 ; VI-NEXT: v_mov_b32_e32 v0, 9
12 ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5
13 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
14 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:128
16 ; VI-NEXT: .section .rodata,#alloc
18 ; VI-NEXT: .amdhsa_kernel max_alignment_128
19 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
20 ; VI-NEXT: .amdhsa_private_segment_fixed_size 256
21 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
22 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
23 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
24 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
25 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0
26 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
27 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
28 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
29 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
30 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
31 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
32 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
33 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0
34 ; VI-NEXT: .amdhsa_next_free_vgpr 1
35 ; VI-NEXT: .amdhsa_next_free_sgpr 8
36 ; VI-NEXT: .amdhsa_reserve_vcc 0
37 ; VI-NEXT: .amdhsa_float_round_mode_32 0
38 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
39 ; VI-NEXT: .amdhsa_float_denorm_mode_32 0
40 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
41 ; VI-NEXT: .amdhsa_dx10_clamp 1
42 ; VI-NEXT: .amdhsa_ieee_mode 1
43 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
44 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
45 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
46 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
47 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
48 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
49 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
50 ; VI-NEXT: .end_amdhsa_kernel
53 ; GFX9-LABEL: max_alignment_128:
55 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7
56 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
57 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
58 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:128
60 ; GFX9-NEXT: .section .rodata,#alloc
61 ; GFX9-NEXT: .p2align 6
62 ; GFX9-NEXT: .amdhsa_kernel max_alignment_128
63 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
64 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256
65 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
66 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
67 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
68 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
69 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0
70 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
71 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
72 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
73 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
74 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
75 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
76 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
77 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0
78 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
79 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8
80 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
81 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
82 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
83 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0
84 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
85 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
86 ; GFX9-NEXT: .amdhsa_ieee_mode 1
87 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
88 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
89 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
90 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
91 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
92 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
93 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
94 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
95 ; GFX9-NEXT: .end_amdhsa_kernel
97 %alloca.align = alloca i32, align 128, addrspace(5)
98 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
102 define amdgpu_kernel void @stackrealign_attr() #1 {
103 ; VI-LABEL: stackrealign_attr:
105 ; VI-NEXT: s_add_u32 s4, s4, s7
106 ; VI-NEXT: v_mov_b32_e32 v0, 9
107 ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5
108 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
109 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4
111 ; VI-NEXT: .section .rodata,#alloc
112 ; VI-NEXT: .p2align 6
113 ; VI-NEXT: .amdhsa_kernel stackrealign_attr
114 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
115 ; VI-NEXT: .amdhsa_private_segment_fixed_size 8
116 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
117 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
118 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
119 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
120 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0
121 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
122 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
123 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
124 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
125 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
126 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
127 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
128 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0
129 ; VI-NEXT: .amdhsa_next_free_vgpr 1
130 ; VI-NEXT: .amdhsa_next_free_sgpr 8
131 ; VI-NEXT: .amdhsa_reserve_vcc 0
132 ; VI-NEXT: .amdhsa_float_round_mode_32 0
133 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
134 ; VI-NEXT: .amdhsa_float_denorm_mode_32 0
135 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
136 ; VI-NEXT: .amdhsa_dx10_clamp 1
137 ; VI-NEXT: .amdhsa_ieee_mode 1
138 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
139 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
140 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
141 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
142 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
143 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
144 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
145 ; VI-NEXT: .end_amdhsa_kernel
148 ; GFX9-LABEL: stackrealign_attr:
150 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7
151 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
152 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
153 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4
154 ; GFX9-NEXT: s_endpgm
155 ; GFX9-NEXT: .section .rodata,#alloc
156 ; GFX9-NEXT: .p2align 6
157 ; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
158 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
159 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8
160 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
161 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
162 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
163 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
164 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0
165 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
166 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
167 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
168 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
169 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
170 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
171 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
172 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0
173 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
174 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8
175 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
176 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
177 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
178 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0
179 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
180 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
181 ; GFX9-NEXT: .amdhsa_ieee_mode 1
182 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
183 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
184 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
185 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
186 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
187 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
188 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
189 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
190 ; GFX9-NEXT: .end_amdhsa_kernel
192 %alloca.align = alloca i32, align 4, addrspace(5)
193 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
197 define amdgpu_kernel void @alignstack_attr() #2 {
198 ; VI-LABEL: alignstack_attr:
200 ; VI-NEXT: s_add_u32 s4, s4, s7
201 ; VI-NEXT: v_mov_b32_e32 v0, 9
202 ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5
203 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
204 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4
206 ; VI-NEXT: .section .rodata,#alloc
207 ; VI-NEXT: .p2align 6
208 ; VI-NEXT: .amdhsa_kernel alignstack_attr
209 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
210 ; VI-NEXT: .amdhsa_private_segment_fixed_size 128
211 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
212 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
213 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
214 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
215 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0
216 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
217 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
218 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
219 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
220 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
221 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
222 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
223 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0
224 ; VI-NEXT: .amdhsa_next_free_vgpr 1
225 ; VI-NEXT: .amdhsa_next_free_sgpr 8
226 ; VI-NEXT: .amdhsa_reserve_vcc 0
227 ; VI-NEXT: .amdhsa_float_round_mode_32 0
228 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
229 ; VI-NEXT: .amdhsa_float_denorm_mode_32 0
230 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
231 ; VI-NEXT: .amdhsa_dx10_clamp 1
232 ; VI-NEXT: .amdhsa_ieee_mode 1
233 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
234 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
235 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
236 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
237 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
238 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
239 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
240 ; VI-NEXT: .end_amdhsa_kernel
243 ; GFX9-LABEL: alignstack_attr:
245 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7
246 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
247 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
248 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4
249 ; GFX9-NEXT: s_endpgm
250 ; GFX9-NEXT: .section .rodata,#alloc
251 ; GFX9-NEXT: .p2align 6
252 ; GFX9-NEXT: .amdhsa_kernel alignstack_attr
253 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
254 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128
255 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
256 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
257 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
258 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
259 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0
260 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
261 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
262 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
263 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
264 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
265 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
266 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
267 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0
268 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
269 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8
270 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
271 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
272 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
273 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0
274 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
275 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
276 ; GFX9-NEXT: .amdhsa_ieee_mode 1
277 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
278 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
279 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
280 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
281 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
282 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
283 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
284 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
285 ; GFX9-NEXT: .end_amdhsa_kernel
287 %alloca.align = alloca i32, align 4, addrspace(5)
288 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
292 attributes #0 = { nounwind }
293 attributes #1 = { nounwind "stackrealign" }
294 attributes #2 = { nounwind alignstack=128 }