1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
5 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4
6 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4
8 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
9 ; GCN-LABEL: test_indirect_call_sgpr_ptr:
10 ; GCN: .amd_kernel_code_t
11 ; GCN-NEXT: amd_code_version_major = 1
12 ; GCN-NEXT: amd_code_version_minor = 2
13 ; GCN-NEXT: amd_machine_kind = 1
14 ; GCN-NEXT: amd_machine_version_major = 7
15 ; GCN-NEXT: amd_machine_version_minor = 0
16 ; GCN-NEXT: amd_machine_version_stepping = 0
17 ; GCN-NEXT: kernel_code_entry_byte_offset = 256
18 ; GCN-NEXT: kernel_code_prefetch_byte_size = 0
19 ; GCN-NEXT: granulated_workitem_vgpr_count = 10
20 ; GCN-NEXT: granulated_wavefront_sgpr_count = 8
21 ; GCN-NEXT: priority = 0
22 ; GCN-NEXT: float_mode = 240
24 ; GCN-NEXT: enable_dx10_clamp = 1
25 ; GCN-NEXT: debug_mode = 0
26 ; GCN-NEXT: enable_ieee_mode = 1
27 ; GCN-NEXT: enable_wgp_mode = 0
28 ; GCN-NEXT: enable_mem_ordered = 0
29 ; GCN-NEXT: enable_fwd_progress = 0
30 ; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
31 ; GCN-NEXT: user_sgpr_count = 14
32 ; GCN-NEXT: enable_trap_handler = 0
33 ; GCN-NEXT: enable_sgpr_workgroup_id_x = 1
34 ; GCN-NEXT: enable_sgpr_workgroup_id_y = 1
35 ; GCN-NEXT: enable_sgpr_workgroup_id_z = 1
36 ; GCN-NEXT: enable_sgpr_workgroup_info = 0
37 ; GCN-NEXT: enable_vgpr_workitem_id = 2
38 ; GCN-NEXT: enable_exception_msb = 0
39 ; GCN-NEXT: granulated_lds_size = 0
40 ; GCN-NEXT: enable_exception = 0
41 ; GCN-NEXT: enable_sgpr_private_segment_buffer = 1
42 ; GCN-NEXT: enable_sgpr_dispatch_ptr = 1
43 ; GCN-NEXT: enable_sgpr_queue_ptr = 1
44 ; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1
45 ; GCN-NEXT: enable_sgpr_dispatch_id = 1
46 ; GCN-NEXT: enable_sgpr_flat_scratch_init = 1
47 ; GCN-NEXT: enable_sgpr_private_segment_size = 0
48 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0
49 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0
50 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0
51 ; GCN-NEXT: enable_wavefront_size32 = 0
52 ; GCN-NEXT: enable_ordered_append_gds = 0
53 ; GCN-NEXT: private_element_size = 1
54 ; GCN-NEXT: is_ptr64 = 1
55 ; GCN-NEXT: is_dynamic_callstack = 1
56 ; GCN-NEXT: is_debug_enabled = 0
57 ; GCN-NEXT: is_xnack_enabled = 0
58 ; GCN-NEXT: workitem_private_segment_byte_size = 16384
59 ; GCN-NEXT: workgroup_group_segment_byte_size = 0
60 ; GCN-NEXT: gds_segment_byte_size = 0
61 ; GCN-NEXT: kernarg_segment_byte_size = 64
62 ; GCN-NEXT: workgroup_fbarrier_count = 0
63 ; GCN-NEXT: wavefront_sgpr_count = 68
64 ; GCN-NEXT: workitem_vgpr_count = 42
65 ; GCN-NEXT: reserved_vgpr_first = 0
66 ; GCN-NEXT: reserved_vgpr_count = 0
67 ; GCN-NEXT: reserved_sgpr_first = 0
68 ; GCN-NEXT: reserved_sgpr_count = 0
69 ; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
70 ; GCN-NEXT: debug_private_segment_buffer_sgpr = 0
71 ; GCN-NEXT: kernarg_segment_alignment = 4
72 ; GCN-NEXT: group_segment_alignment = 4
73 ; GCN-NEXT: private_segment_alignment = 4
74 ; GCN-NEXT: wavefront_size = 6
75 ; GCN-NEXT: call_convention = -1
76 ; GCN-NEXT: runtime_loader_kernel_symbol = 0
77 ; GCN-NEXT: .end_amd_kernel_code_t
79 ; GCN-NEXT: s_mov_b32 s32, 0
80 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
81 ; GCN-NEXT: s_add_i32 s12, s12, s17
82 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
83 ; GCN-NEXT: s_add_u32 s0, s0, s17
84 ; GCN-NEXT: s_addc_u32 s1, s1, 0
85 ; GCN-NEXT: s_mov_b32 s13, s15
86 ; GCN-NEXT: s_mov_b32 s12, s14
87 ; GCN-NEXT: s_getpc_b64 s[14:15]
88 ; GCN-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
89 ; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
90 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
91 ; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
92 ; GCN-NEXT: s_add_u32 s8, s8, 8
93 ; GCN-NEXT: s_addc_u32 s9, s9, 0
94 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
95 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1
96 ; GCN-NEXT: v_or_b32_e32 v31, v0, v2
97 ; GCN-NEXT: s_mov_b32 s14, s16
98 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
99 ; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
102 ; GISEL-LABEL: test_indirect_call_sgpr_ptr:
103 ; GISEL: .amd_kernel_code_t
104 ; GISEL-NEXT: amd_code_version_major = 1
105 ; GISEL-NEXT: amd_code_version_minor = 2
106 ; GISEL-NEXT: amd_machine_kind = 1
107 ; GISEL-NEXT: amd_machine_version_major = 7
108 ; GISEL-NEXT: amd_machine_version_minor = 0
109 ; GISEL-NEXT: amd_machine_version_stepping = 0
110 ; GISEL-NEXT: kernel_code_entry_byte_offset = 256
111 ; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
112 ; GISEL-NEXT: granulated_workitem_vgpr_count = 10
113 ; GISEL-NEXT: granulated_wavefront_sgpr_count = 8
114 ; GISEL-NEXT: priority = 0
115 ; GISEL-NEXT: float_mode = 240
116 ; GISEL-NEXT: priv = 0
117 ; GISEL-NEXT: enable_dx10_clamp = 1
118 ; GISEL-NEXT: debug_mode = 0
119 ; GISEL-NEXT: enable_ieee_mode = 1
120 ; GISEL-NEXT: enable_wgp_mode = 0
121 ; GISEL-NEXT: enable_mem_ordered = 0
122 ; GISEL-NEXT: enable_fwd_progress = 0
123 ; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
124 ; GISEL-NEXT: user_sgpr_count = 14
125 ; GISEL-NEXT: enable_trap_handler = 0
126 ; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
127 ; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
128 ; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
129 ; GISEL-NEXT: enable_sgpr_workgroup_info = 0
130 ; GISEL-NEXT: enable_vgpr_workitem_id = 2
131 ; GISEL-NEXT: enable_exception_msb = 0
132 ; GISEL-NEXT: granulated_lds_size = 0
133 ; GISEL-NEXT: enable_exception = 0
134 ; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
135 ; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
136 ; GISEL-NEXT: enable_sgpr_queue_ptr = 1
137 ; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
138 ; GISEL-NEXT: enable_sgpr_dispatch_id = 1
139 ; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1
140 ; GISEL-NEXT: enable_sgpr_private_segment_size = 0
141 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
142 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
143 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
144 ; GISEL-NEXT: enable_wavefront_size32 = 0
145 ; GISEL-NEXT: enable_ordered_append_gds = 0
146 ; GISEL-NEXT: private_element_size = 1
147 ; GISEL-NEXT: is_ptr64 = 1
148 ; GISEL-NEXT: is_dynamic_callstack = 1
149 ; GISEL-NEXT: is_debug_enabled = 0
150 ; GISEL-NEXT: is_xnack_enabled = 0
151 ; GISEL-NEXT: workitem_private_segment_byte_size = 16384
152 ; GISEL-NEXT: workgroup_group_segment_byte_size = 0
153 ; GISEL-NEXT: gds_segment_byte_size = 0
154 ; GISEL-NEXT: kernarg_segment_byte_size = 64
155 ; GISEL-NEXT: workgroup_fbarrier_count = 0
156 ; GISEL-NEXT: wavefront_sgpr_count = 68
157 ; GISEL-NEXT: workitem_vgpr_count = 42
158 ; GISEL-NEXT: reserved_vgpr_first = 0
159 ; GISEL-NEXT: reserved_vgpr_count = 0
160 ; GISEL-NEXT: reserved_sgpr_first = 0
161 ; GISEL-NEXT: reserved_sgpr_count = 0
162 ; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
163 ; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
164 ; GISEL-NEXT: kernarg_segment_alignment = 4
165 ; GISEL-NEXT: group_segment_alignment = 4
166 ; GISEL-NEXT: private_segment_alignment = 4
167 ; GISEL-NEXT: wavefront_size = 6
168 ; GISEL-NEXT: call_convention = -1
169 ; GISEL-NEXT: runtime_loader_kernel_symbol = 0
170 ; GISEL-NEXT: .end_amd_kernel_code_t
171 ; GISEL-NEXT: ; %bb.0:
172 ; GISEL-NEXT: s_mov_b32 s32, 0
173 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
174 ; GISEL-NEXT: s_add_i32 s12, s12, s17
175 ; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
176 ; GISEL-NEXT: s_add_u32 s0, s0, s17
177 ; GISEL-NEXT: s_addc_u32 s1, s1, 0
178 ; GISEL-NEXT: s_mov_b32 s13, s15
179 ; GISEL-NEXT: s_mov_b32 s12, s14
180 ; GISEL-NEXT: s_getpc_b64 s[14:15]
181 ; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
182 ; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
183 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
184 ; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
185 ; GISEL-NEXT: s_add_u32 s8, s8, 8
186 ; GISEL-NEXT: s_addc_u32 s9, s9, 0
187 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
188 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
189 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
190 ; GISEL-NEXT: s_mov_b32 s14, s16
191 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
192 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
193 ; GISEL-NEXT: s_endpgm
194 %fptr = load void()*, void()* addrspace(4)* @gv.fptr0
199 define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
200 ; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
201 ; GCN: .amd_kernel_code_t
202 ; GCN-NEXT: amd_code_version_major = 1
203 ; GCN-NEXT: amd_code_version_minor = 2
204 ; GCN-NEXT: amd_machine_kind = 1
205 ; GCN-NEXT: amd_machine_version_major = 7
206 ; GCN-NEXT: amd_machine_version_minor = 0
207 ; GCN-NEXT: amd_machine_version_stepping = 0
208 ; GCN-NEXT: kernel_code_entry_byte_offset = 256
209 ; GCN-NEXT: kernel_code_prefetch_byte_size = 0
210 ; GCN-NEXT: granulated_workitem_vgpr_count = 10
211 ; GCN-NEXT: granulated_wavefront_sgpr_count = 8
212 ; GCN-NEXT: priority = 0
213 ; GCN-NEXT: float_mode = 240
215 ; GCN-NEXT: enable_dx10_clamp = 1
216 ; GCN-NEXT: debug_mode = 0
217 ; GCN-NEXT: enable_ieee_mode = 1
218 ; GCN-NEXT: enable_wgp_mode = 0
219 ; GCN-NEXT: enable_mem_ordered = 0
220 ; GCN-NEXT: enable_fwd_progress = 0
221 ; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
222 ; GCN-NEXT: user_sgpr_count = 14
223 ; GCN-NEXT: enable_trap_handler = 0
224 ; GCN-NEXT: enable_sgpr_workgroup_id_x = 1
225 ; GCN-NEXT: enable_sgpr_workgroup_id_y = 1
226 ; GCN-NEXT: enable_sgpr_workgroup_id_z = 1
227 ; GCN-NEXT: enable_sgpr_workgroup_info = 0
228 ; GCN-NEXT: enable_vgpr_workitem_id = 2
229 ; GCN-NEXT: enable_exception_msb = 0
230 ; GCN-NEXT: granulated_lds_size = 0
231 ; GCN-NEXT: enable_exception = 0
232 ; GCN-NEXT: enable_sgpr_private_segment_buffer = 1
233 ; GCN-NEXT: enable_sgpr_dispatch_ptr = 1
234 ; GCN-NEXT: enable_sgpr_queue_ptr = 1
235 ; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1
236 ; GCN-NEXT: enable_sgpr_dispatch_id = 1
237 ; GCN-NEXT: enable_sgpr_flat_scratch_init = 1
238 ; GCN-NEXT: enable_sgpr_private_segment_size = 0
239 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0
240 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0
241 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0
242 ; GCN-NEXT: enable_wavefront_size32 = 0
243 ; GCN-NEXT: enable_ordered_append_gds = 0
244 ; GCN-NEXT: private_element_size = 1
245 ; GCN-NEXT: is_ptr64 = 1
246 ; GCN-NEXT: is_dynamic_callstack = 1
247 ; GCN-NEXT: is_debug_enabled = 0
248 ; GCN-NEXT: is_xnack_enabled = 0
249 ; GCN-NEXT: workitem_private_segment_byte_size = 16384
250 ; GCN-NEXT: workgroup_group_segment_byte_size = 0
251 ; GCN-NEXT: gds_segment_byte_size = 0
252 ; GCN-NEXT: kernarg_segment_byte_size = 64
253 ; GCN-NEXT: workgroup_fbarrier_count = 0
254 ; GCN-NEXT: wavefront_sgpr_count = 68
255 ; GCN-NEXT: workitem_vgpr_count = 42
256 ; GCN-NEXT: reserved_vgpr_first = 0
257 ; GCN-NEXT: reserved_vgpr_count = 0
258 ; GCN-NEXT: reserved_sgpr_first = 0
259 ; GCN-NEXT: reserved_sgpr_count = 0
260 ; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
261 ; GCN-NEXT: debug_private_segment_buffer_sgpr = 0
262 ; GCN-NEXT: kernarg_segment_alignment = 4
263 ; GCN-NEXT: group_segment_alignment = 4
264 ; GCN-NEXT: private_segment_alignment = 4
265 ; GCN-NEXT: wavefront_size = 6
266 ; GCN-NEXT: call_convention = -1
267 ; GCN-NEXT: runtime_loader_kernel_symbol = 0
268 ; GCN-NEXT: .end_amd_kernel_code_t
270 ; GCN-NEXT: s_mov_b32 s32, 0
271 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
272 ; GCN-NEXT: s_add_i32 s12, s12, s17
273 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
274 ; GCN-NEXT: s_add_u32 s0, s0, s17
275 ; GCN-NEXT: s_addc_u32 s1, s1, 0
276 ; GCN-NEXT: s_mov_b32 s13, s15
277 ; GCN-NEXT: s_mov_b32 s12, s14
278 ; GCN-NEXT: s_getpc_b64 s[14:15]
279 ; GCN-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
280 ; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
281 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
282 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
283 ; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
284 ; GCN-NEXT: s_add_u32 s8, s8, 8
285 ; GCN-NEXT: s_addc_u32 s9, s9, 0
286 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1
287 ; GCN-NEXT: v_or_b32_e32 v31, v0, v2
288 ; GCN-NEXT: v_mov_b32_e32 v0, 0x7b
289 ; GCN-NEXT: s_mov_b32 s14, s16
290 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
291 ; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
294 ; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
295 ; GISEL: .amd_kernel_code_t
296 ; GISEL-NEXT: amd_code_version_major = 1
297 ; GISEL-NEXT: amd_code_version_minor = 2
298 ; GISEL-NEXT: amd_machine_kind = 1
299 ; GISEL-NEXT: amd_machine_version_major = 7
300 ; GISEL-NEXT: amd_machine_version_minor = 0
301 ; GISEL-NEXT: amd_machine_version_stepping = 0
302 ; GISEL-NEXT: kernel_code_entry_byte_offset = 256
303 ; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
304 ; GISEL-NEXT: granulated_workitem_vgpr_count = 10
305 ; GISEL-NEXT: granulated_wavefront_sgpr_count = 8
306 ; GISEL-NEXT: priority = 0
307 ; GISEL-NEXT: float_mode = 240
308 ; GISEL-NEXT: priv = 0
309 ; GISEL-NEXT: enable_dx10_clamp = 1
310 ; GISEL-NEXT: debug_mode = 0
311 ; GISEL-NEXT: enable_ieee_mode = 1
312 ; GISEL-NEXT: enable_wgp_mode = 0
313 ; GISEL-NEXT: enable_mem_ordered = 0
314 ; GISEL-NEXT: enable_fwd_progress = 0
315 ; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
316 ; GISEL-NEXT: user_sgpr_count = 14
317 ; GISEL-NEXT: enable_trap_handler = 0
318 ; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
319 ; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
320 ; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
321 ; GISEL-NEXT: enable_sgpr_workgroup_info = 0
322 ; GISEL-NEXT: enable_vgpr_workitem_id = 2
323 ; GISEL-NEXT: enable_exception_msb = 0
324 ; GISEL-NEXT: granulated_lds_size = 0
325 ; GISEL-NEXT: enable_exception = 0
326 ; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
327 ; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
328 ; GISEL-NEXT: enable_sgpr_queue_ptr = 1
329 ; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
330 ; GISEL-NEXT: enable_sgpr_dispatch_id = 1
331 ; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1
332 ; GISEL-NEXT: enable_sgpr_private_segment_size = 0
333 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
334 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
335 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
336 ; GISEL-NEXT: enable_wavefront_size32 = 0
337 ; GISEL-NEXT: enable_ordered_append_gds = 0
338 ; GISEL-NEXT: private_element_size = 1
339 ; GISEL-NEXT: is_ptr64 = 1
340 ; GISEL-NEXT: is_dynamic_callstack = 1
341 ; GISEL-NEXT: is_debug_enabled = 0
342 ; GISEL-NEXT: is_xnack_enabled = 0
343 ; GISEL-NEXT: workitem_private_segment_byte_size = 16384
344 ; GISEL-NEXT: workgroup_group_segment_byte_size = 0
345 ; GISEL-NEXT: gds_segment_byte_size = 0
346 ; GISEL-NEXT: kernarg_segment_byte_size = 64
347 ; GISEL-NEXT: workgroup_fbarrier_count = 0
348 ; GISEL-NEXT: wavefront_sgpr_count = 68
349 ; GISEL-NEXT: workitem_vgpr_count = 42
350 ; GISEL-NEXT: reserved_vgpr_first = 0
351 ; GISEL-NEXT: reserved_vgpr_count = 0
352 ; GISEL-NEXT: reserved_sgpr_first = 0
353 ; GISEL-NEXT: reserved_sgpr_count = 0
354 ; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
355 ; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
356 ; GISEL-NEXT: kernarg_segment_alignment = 4
357 ; GISEL-NEXT: group_segment_alignment = 4
358 ; GISEL-NEXT: private_segment_alignment = 4
359 ; GISEL-NEXT: wavefront_size = 6
360 ; GISEL-NEXT: call_convention = -1
361 ; GISEL-NEXT: runtime_loader_kernel_symbol = 0
362 ; GISEL-NEXT: .end_amd_kernel_code_t
363 ; GISEL-NEXT: ; %bb.0:
364 ; GISEL-NEXT: s_mov_b32 s32, 0
365 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
366 ; GISEL-NEXT: s_add_i32 s12, s12, s17
367 ; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
368 ; GISEL-NEXT: s_add_u32 s0, s0, s17
369 ; GISEL-NEXT: s_addc_u32 s1, s1, 0
370 ; GISEL-NEXT: s_mov_b32 s13, s15
371 ; GISEL-NEXT: s_mov_b32 s12, s14
372 ; GISEL-NEXT: s_getpc_b64 s[14:15]
373 ; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
374 ; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
375 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
376 ; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
377 ; GISEL-NEXT: s_add_u32 s8, s8, 8
378 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
379 ; GISEL-NEXT: s_addc_u32 s9, s9, 0
380 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
381 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
382 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
383 ; GISEL-NEXT: s_mov_b32 s14, s16
384 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
385 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
386 ; GISEL-NEXT: s_endpgm
387 %fptr = load void(i32)*, void(i32)* addrspace(4)* @gv.fptr1
388 call void %fptr(i32 123)
392 define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
393 ; GCN-LABEL: test_indirect_call_vgpr_ptr:
395 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
397 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
398 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
399 ; GCN-NEXT: v_writelane_b32 v40, s33, 17
400 ; GCN-NEXT: s_mov_b32 s33, s32
401 ; GCN-NEXT: s_addk_i32 s32, 0x400
402 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
403 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
404 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
405 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
406 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
407 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
408 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
409 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
410 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
411 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
412 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
413 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
414 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
415 ; GCN-NEXT: v_writelane_b32 v40, s46, 13
416 ; GCN-NEXT: v_writelane_b32 v40, s47, 14
417 ; GCN-NEXT: v_writelane_b32 v40, s48, 15
418 ; GCN-NEXT: v_writelane_b32 v40, s49, 16
419 ; GCN-NEXT: s_mov_b32 s42, s14
420 ; GCN-NEXT: s_mov_b32 s43, s13
421 ; GCN-NEXT: s_mov_b32 s44, s12
422 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
423 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
424 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
425 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
426 ; GCN-NEXT: s_mov_b64 s[46:47], exec
427 ; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
428 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
429 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
430 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
431 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
432 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
433 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
434 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
435 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
436 ; GCN-NEXT: s_mov_b32 s12, s44
437 ; GCN-NEXT: s_mov_b32 s13, s43
438 ; GCN-NEXT: s_mov_b32 s14, s42
439 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
440 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
441 ; GCN-NEXT: ; implicit-def: $vgpr31
442 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
443 ; GCN-NEXT: s_cbranch_execnz .LBB2_1
445 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
446 ; GCN-NEXT: v_readlane_b32 s49, v40, 16
447 ; GCN-NEXT: v_readlane_b32 s48, v40, 15
448 ; GCN-NEXT: v_readlane_b32 s47, v40, 14
449 ; GCN-NEXT: v_readlane_b32 s46, v40, 13
450 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
451 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
452 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
453 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
454 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
455 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
456 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
457 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
458 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
459 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
460 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
461 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
462 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
463 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
464 ; GCN-NEXT: v_readlane_b32 s33, v40, 17
465 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
466 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
467 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
468 ; GCN-NEXT: s_waitcnt vmcnt(0)
469 ; GCN-NEXT: s_setpc_b64 s[30:31]
471 ; GISEL-LABEL: test_indirect_call_vgpr_ptr:
473 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474 ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
475 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
476 ; GISEL-NEXT: s_mov_b64 exec, s[16:17]
477 ; GISEL-NEXT: v_writelane_b32 v40, s33, 17
478 ; GISEL-NEXT: s_mov_b32 s33, s32
479 ; GISEL-NEXT: s_addk_i32 s32, 0x400
480 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
481 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
482 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
483 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
484 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
485 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
486 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
487 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
488 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
489 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
490 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
491 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
492 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
493 ; GISEL-NEXT: v_writelane_b32 v40, s46, 13
494 ; GISEL-NEXT: v_writelane_b32 v40, s47, 14
495 ; GISEL-NEXT: v_writelane_b32 v40, s48, 15
496 ; GISEL-NEXT: v_writelane_b32 v40, s49, 16
497 ; GISEL-NEXT: s_mov_b32 s42, s14
498 ; GISEL-NEXT: s_mov_b32 s43, s13
499 ; GISEL-NEXT: s_mov_b32 s44, s12
500 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
501 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
502 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
503 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
504 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
505 ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
506 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
507 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
508 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
509 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
510 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
511 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
512 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
513 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
514 ; GISEL-NEXT: s_mov_b32 s12, s44
515 ; GISEL-NEXT: s_mov_b32 s13, s43
516 ; GISEL-NEXT: s_mov_b32 s14, s42
517 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
518 ; GISEL-NEXT: ; implicit-def: $vgpr0
519 ; GISEL-NEXT: ; implicit-def: $vgpr31
520 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
521 ; GISEL-NEXT: s_cbranch_execnz .LBB2_1
522 ; GISEL-NEXT: ; %bb.2:
523 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
524 ; GISEL-NEXT: v_readlane_b32 s49, v40, 16
525 ; GISEL-NEXT: v_readlane_b32 s48, v40, 15
526 ; GISEL-NEXT: v_readlane_b32 s47, v40, 14
527 ; GISEL-NEXT: v_readlane_b32 s46, v40, 13
528 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
529 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
530 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
531 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
532 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
533 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
534 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
535 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
536 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
537 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
538 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
539 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
540 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
541 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
542 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17
543 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
544 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
545 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
546 ; GISEL-NEXT: s_waitcnt vmcnt(0)
547 ; GISEL-NEXT: s_setpc_b64 s[30:31]
552 define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
553 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
555 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
557 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
558 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
559 ; GCN-NEXT: v_writelane_b32 v40, s33, 17
560 ; GCN-NEXT: s_mov_b32 s33, s32
561 ; GCN-NEXT: s_addk_i32 s32, 0x400
562 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
563 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
564 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
565 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
566 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
567 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
568 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
569 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
570 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
571 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
572 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
573 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
574 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
575 ; GCN-NEXT: v_writelane_b32 v40, s46, 13
576 ; GCN-NEXT: v_writelane_b32 v40, s47, 14
577 ; GCN-NEXT: v_writelane_b32 v40, s48, 15
578 ; GCN-NEXT: v_writelane_b32 v40, s49, 16
579 ; GCN-NEXT: s_mov_b32 s42, s14
580 ; GCN-NEXT: s_mov_b32 s43, s13
581 ; GCN-NEXT: s_mov_b32 s44, s12
582 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
583 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
584 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
585 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
586 ; GCN-NEXT: s_mov_b64 s[46:47], exec
587 ; GCN-NEXT: v_mov_b32_e32 v2, 0x7b
588 ; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
589 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
590 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
591 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
592 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
593 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
594 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
595 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
596 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
597 ; GCN-NEXT: s_mov_b32 s12, s44
598 ; GCN-NEXT: s_mov_b32 s13, s43
599 ; GCN-NEXT: s_mov_b32 s14, s42
600 ; GCN-NEXT: v_mov_b32_e32 v0, v2
601 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
602 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
603 ; GCN-NEXT: ; implicit-def: $vgpr31
604 ; GCN-NEXT: ; implicit-def: $vgpr2
605 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
606 ; GCN-NEXT: s_cbranch_execnz .LBB3_1
608 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
609 ; GCN-NEXT: v_readlane_b32 s49, v40, 16
610 ; GCN-NEXT: v_readlane_b32 s48, v40, 15
611 ; GCN-NEXT: v_readlane_b32 s47, v40, 14
612 ; GCN-NEXT: v_readlane_b32 s46, v40, 13
613 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
614 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
615 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
616 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
617 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
618 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
619 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
620 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
621 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
622 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
623 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
624 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
625 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
626 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
627 ; GCN-NEXT: v_readlane_b32 s33, v40, 17
628 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
629 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
630 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
631 ; GCN-NEXT: s_waitcnt vmcnt(0)
632 ; GCN-NEXT: s_setpc_b64 s[30:31]
634 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
636 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637 ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
638 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
639 ; GISEL-NEXT: s_mov_b64 exec, s[16:17]
640 ; GISEL-NEXT: v_writelane_b32 v40, s33, 17
641 ; GISEL-NEXT: s_mov_b32 s33, s32
642 ; GISEL-NEXT: s_addk_i32 s32, 0x400
643 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
644 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
645 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
646 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
647 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
648 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
649 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
650 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
651 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
652 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
653 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
654 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
655 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
656 ; GISEL-NEXT: v_writelane_b32 v40, s46, 13
657 ; GISEL-NEXT: v_writelane_b32 v40, s47, 14
658 ; GISEL-NEXT: v_writelane_b32 v40, s48, 15
659 ; GISEL-NEXT: v_writelane_b32 v40, s49, 16
660 ; GISEL-NEXT: s_mov_b32 s42, s14
661 ; GISEL-NEXT: s_mov_b32 s43, s13
662 ; GISEL-NEXT: s_mov_b32 s44, s12
663 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
664 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
665 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
666 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
667 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
668 ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
669 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
670 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
671 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
672 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
673 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
674 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
675 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
676 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
677 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
678 ; GISEL-NEXT: s_mov_b32 s12, s44
679 ; GISEL-NEXT: s_mov_b32 s13, s43
680 ; GISEL-NEXT: s_mov_b32 s14, s42
681 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
682 ; GISEL-NEXT: ; implicit-def: $vgpr0
683 ; GISEL-NEXT: ; implicit-def: $vgpr31
684 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
685 ; GISEL-NEXT: s_cbranch_execnz .LBB3_1
686 ; GISEL-NEXT: ; %bb.2:
687 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
688 ; GISEL-NEXT: v_readlane_b32 s49, v40, 16
689 ; GISEL-NEXT: v_readlane_b32 s48, v40, 15
690 ; GISEL-NEXT: v_readlane_b32 s47, v40, 14
691 ; GISEL-NEXT: v_readlane_b32 s46, v40, 13
692 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
693 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
694 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
695 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
696 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
697 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
698 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
699 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
700 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
701 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
702 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
703 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
704 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
705 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
706 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17
707 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
708 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
709 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
710 ; GISEL-NEXT: s_waitcnt vmcnt(0)
711 ; GISEL-NEXT: s_setpc_b64 s[30:31]
712 call void %fptr(i32 123)
716 define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
717 ; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
719 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
721 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
722 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
723 ; GCN-NEXT: v_writelane_b32 v40, s33, 17
724 ; GCN-NEXT: s_mov_b32 s33, s32
725 ; GCN-NEXT: s_addk_i32 s32, 0x400
726 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
727 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
728 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
729 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
730 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
731 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
732 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
733 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
734 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
735 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
736 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
737 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
738 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
739 ; GCN-NEXT: v_writelane_b32 v40, s46, 13
740 ; GCN-NEXT: v_writelane_b32 v40, s47, 14
741 ; GCN-NEXT: v_writelane_b32 v40, s48, 15
742 ; GCN-NEXT: v_writelane_b32 v40, s49, 16
743 ; GCN-NEXT: s_mov_b32 s42, s14
744 ; GCN-NEXT: s_mov_b32 s43, s13
745 ; GCN-NEXT: s_mov_b32 s44, s12
746 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
747 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
748 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
749 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
750 ; GCN-NEXT: s_mov_b64 s[46:47], exec
751 ; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
752 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
753 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
754 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
755 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
756 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
757 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
758 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
759 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
760 ; GCN-NEXT: s_mov_b32 s12, s44
761 ; GCN-NEXT: s_mov_b32 s13, s43
762 ; GCN-NEXT: s_mov_b32 s14, s42
763 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
764 ; GCN-NEXT: v_mov_b32_e32 v2, v0
765 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
766 ; GCN-NEXT: ; implicit-def: $vgpr31
767 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
768 ; GCN-NEXT: s_cbranch_execnz .LBB4_1
770 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
771 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2
772 ; GCN-NEXT: v_readlane_b32 s49, v40, 16
773 ; GCN-NEXT: v_readlane_b32 s48, v40, 15
774 ; GCN-NEXT: v_readlane_b32 s47, v40, 14
775 ; GCN-NEXT: v_readlane_b32 s46, v40, 13
776 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
777 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
778 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
779 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
780 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
781 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
782 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
783 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
784 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
785 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
786 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
787 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
788 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
789 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
790 ; GCN-NEXT: v_readlane_b32 s33, v40, 17
791 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
792 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
793 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
794 ; GCN-NEXT: s_waitcnt vmcnt(0)
795 ; GCN-NEXT: s_setpc_b64 s[30:31]
797 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
799 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
800 ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
801 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
802 ; GISEL-NEXT: s_mov_b64 exec, s[16:17]
803 ; GISEL-NEXT: v_writelane_b32 v40, s33, 17
804 ; GISEL-NEXT: s_mov_b32 s33, s32
805 ; GISEL-NEXT: s_addk_i32 s32, 0x400
806 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
807 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
808 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
809 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
810 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
811 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
812 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
813 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
814 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
815 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
816 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
817 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
818 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
819 ; GISEL-NEXT: v_writelane_b32 v40, s46, 13
820 ; GISEL-NEXT: v_writelane_b32 v40, s47, 14
821 ; GISEL-NEXT: v_writelane_b32 v40, s48, 15
822 ; GISEL-NEXT: v_writelane_b32 v40, s49, 16
823 ; GISEL-NEXT: s_mov_b32 s42, s14
824 ; GISEL-NEXT: s_mov_b32 s43, s13
825 ; GISEL-NEXT: s_mov_b32 s44, s12
826 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
827 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
828 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
829 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
830 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
831 ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
832 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
833 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
834 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
835 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
836 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
837 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
838 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
839 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
840 ; GISEL-NEXT: s_mov_b32 s12, s44
841 ; GISEL-NEXT: s_mov_b32 s13, s43
842 ; GISEL-NEXT: s_mov_b32 s14, s42
843 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
844 ; GISEL-NEXT: v_mov_b32_e32 v1, v0
845 ; GISEL-NEXT: ; implicit-def: $vgpr0
846 ; GISEL-NEXT: ; implicit-def: $vgpr31
847 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
848 ; GISEL-NEXT: s_cbranch_execnz .LBB4_1
849 ; GISEL-NEXT: ; %bb.2:
850 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
851 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1
852 ; GISEL-NEXT: v_readlane_b32 s49, v40, 16
853 ; GISEL-NEXT: v_readlane_b32 s48, v40, 15
854 ; GISEL-NEXT: v_readlane_b32 s47, v40, 14
855 ; GISEL-NEXT: v_readlane_b32 s46, v40, 13
856 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
857 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
858 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
859 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
860 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
861 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
862 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
863 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
864 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
865 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
866 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
867 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
868 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
869 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
870 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17
871 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
872 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
873 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
874 ; GISEL-NEXT: s_waitcnt vmcnt(0)
875 ; GISEL-NEXT: s_setpc_b64 s[30:31]
876 %a = call i32 %fptr()
881 define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
882 ; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
883 ; GCN: ; %bb.0: ; %bb0
884 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
885 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
886 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
887 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
888 ; GCN-NEXT: v_writelane_b32 v40, s33, 19
889 ; GCN-NEXT: s_mov_b32 s33, s32
890 ; GCN-NEXT: s_addk_i32 s32, 0x400
891 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
892 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
893 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
894 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
895 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
896 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
897 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
898 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
899 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
900 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
901 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
902 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
903 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
904 ; GCN-NEXT: v_writelane_b32 v40, s46, 13
905 ; GCN-NEXT: v_writelane_b32 v40, s47, 14
906 ; GCN-NEXT: v_writelane_b32 v40, s48, 15
907 ; GCN-NEXT: v_writelane_b32 v40, s49, 16
908 ; GCN-NEXT: v_writelane_b32 v40, s50, 17
909 ; GCN-NEXT: v_writelane_b32 v40, s51, 18
910 ; GCN-NEXT: s_mov_b32 s42, s14
911 ; GCN-NEXT: s_mov_b32 s43, s13
912 ; GCN-NEXT: s_mov_b32 s44, s12
913 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
914 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
915 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
916 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
917 ; GCN-NEXT: v_and_b32_e32 v2, 1, v2
918 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
919 ; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc
920 ; GCN-NEXT: s_cbranch_execz .LBB5_4
921 ; GCN-NEXT: ; %bb.1: ; %bb1
922 ; GCN-NEXT: s_mov_b64 s[48:49], exec
923 ; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
924 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
925 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
926 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
927 ; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc
928 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
929 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
930 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
931 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
932 ; GCN-NEXT: s_mov_b32 s12, s44
933 ; GCN-NEXT: s_mov_b32 s13, s43
934 ; GCN-NEXT: s_mov_b32 s14, s42
935 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
936 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
937 ; GCN-NEXT: ; implicit-def: $vgpr31
938 ; GCN-NEXT: s_xor_b64 exec, exec, s[50:51]
939 ; GCN-NEXT: s_cbranch_execnz .LBB5_2
941 ; GCN-NEXT: s_mov_b64 exec, s[48:49]
942 ; GCN-NEXT: .LBB5_4: ; %bb2
943 ; GCN-NEXT: s_or_b64 exec, exec, s[46:47]
944 ; GCN-NEXT: v_readlane_b32 s51, v40, 18
945 ; GCN-NEXT: v_readlane_b32 s50, v40, 17
946 ; GCN-NEXT: v_readlane_b32 s49, v40, 16
947 ; GCN-NEXT: v_readlane_b32 s48, v40, 15
948 ; GCN-NEXT: v_readlane_b32 s47, v40, 14
949 ; GCN-NEXT: v_readlane_b32 s46, v40, 13
950 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
951 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
952 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
953 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
954 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
955 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
956 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
957 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
958 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
959 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
960 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
961 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
962 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
963 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
964 ; GCN-NEXT: v_readlane_b32 s33, v40, 19
965 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
966 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
967 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
968 ; GCN-NEXT: s_waitcnt vmcnt(0)
969 ; GCN-NEXT: s_setpc_b64 s[30:31]
971 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
972 ; GISEL: ; %bb.0: ; %bb0
973 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
974 ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
975 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
976 ; GISEL-NEXT: s_mov_b64 exec, s[16:17]
977 ; GISEL-NEXT: v_writelane_b32 v40, s33, 19
978 ; GISEL-NEXT: s_mov_b32 s33, s32
979 ; GISEL-NEXT: s_addk_i32 s32, 0x400
980 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
981 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
982 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
983 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
984 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
985 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
986 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
987 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
988 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
989 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
990 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
991 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
992 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
993 ; GISEL-NEXT: v_writelane_b32 v40, s46, 13
994 ; GISEL-NEXT: v_writelane_b32 v40, s47, 14
995 ; GISEL-NEXT: v_writelane_b32 v40, s48, 15
996 ; GISEL-NEXT: v_writelane_b32 v40, s49, 16
997 ; GISEL-NEXT: v_writelane_b32 v40, s50, 17
998 ; GISEL-NEXT: v_writelane_b32 v40, s51, 18
999 ; GISEL-NEXT: s_mov_b32 s42, s14
1000 ; GISEL-NEXT: s_mov_b32 s43, s13
1001 ; GISEL-NEXT: s_mov_b32 s44, s12
1002 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
1003 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
1004 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
1005 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
1006 ; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
1007 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1008 ; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc
1009 ; GISEL-NEXT: s_cbranch_execz .LBB5_4
1010 ; GISEL-NEXT: ; %bb.1: ; %bb1
1011 ; GISEL-NEXT: s_mov_b64 s[48:49], exec
1012 ; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
1013 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
1014 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
1015 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
1016 ; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc
1017 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
1018 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
1019 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
1020 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
1021 ; GISEL-NEXT: s_mov_b32 s12, s44
1022 ; GISEL-NEXT: s_mov_b32 s13, s43
1023 ; GISEL-NEXT: s_mov_b32 s14, s42
1024 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
1025 ; GISEL-NEXT: ; implicit-def: $vgpr0
1026 ; GISEL-NEXT: ; implicit-def: $vgpr31
1027 ; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51]
1028 ; GISEL-NEXT: s_cbranch_execnz .LBB5_2
1029 ; GISEL-NEXT: ; %bb.3:
1030 ; GISEL-NEXT: s_mov_b64 exec, s[48:49]
1031 ; GISEL-NEXT: .LBB5_4: ; %bb2
1032 ; GISEL-NEXT: s_or_b64 exec, exec, s[46:47]
1033 ; GISEL-NEXT: v_readlane_b32 s51, v40, 18
1034 ; GISEL-NEXT: v_readlane_b32 s50, v40, 17
1035 ; GISEL-NEXT: v_readlane_b32 s49, v40, 16
1036 ; GISEL-NEXT: v_readlane_b32 s48, v40, 15
1037 ; GISEL-NEXT: v_readlane_b32 s47, v40, 14
1038 ; GISEL-NEXT: v_readlane_b32 s46, v40, 13
1039 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1040 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1041 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1042 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1043 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1044 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1045 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1046 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1047 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1048 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1049 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1050 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1051 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1052 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1053 ; GISEL-NEXT: v_readlane_b32 s33, v40, 19
1054 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1055 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1056 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1057 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1058 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1060 br i1 %cond, label %bb1, label %bb2
1070 define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
1071 ; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
1073 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1075 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1076 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1077 ; GCN-NEXT: v_writelane_b32 v40, s33, 32
1078 ; GCN-NEXT: s_mov_b32 s33, s32
1079 ; GCN-NEXT: s_addk_i32 s32, 0x400
1080 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1081 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1082 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1083 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1084 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1085 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1086 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1087 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1088 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1089 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1090 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1091 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1092 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1093 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1094 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1095 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1096 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1097 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1098 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1099 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1100 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1101 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1102 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1103 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1104 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1105 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1106 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1107 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1108 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1109 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1110 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1111 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1112 ; GCN-NEXT: s_mov_b64 s[6:7], exec
1113 ; GCN-NEXT: s_movk_i32 s4, 0x7b
1114 ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
1115 ; GCN-NEXT: v_readfirstlane_b32 s10, v0
1116 ; GCN-NEXT: v_readfirstlane_b32 s11, v1
1117 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
1118 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
1119 ; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11]
1120 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
1121 ; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
1122 ; GCN-NEXT: s_cbranch_execnz .LBB6_1
1123 ; GCN-NEXT: ; %bb.2:
1124 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
1125 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1126 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1127 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1128 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1129 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1130 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1131 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1132 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1133 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1134 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1135 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1136 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1137 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1138 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1139 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1140 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1141 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1142 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1143 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1144 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1145 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1146 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1147 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1148 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1149 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1150 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1151 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1152 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1153 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1154 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1155 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1156 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1157 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1158 ; GCN-NEXT: v_readlane_b32 s33, v40, 32
1159 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1160 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1161 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1162 ; GCN-NEXT: s_waitcnt vmcnt(0)
1163 ; GCN-NEXT: s_setpc_b64 s[30:31]
1165 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
1167 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1169 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1170 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1171 ; GISEL-NEXT: v_writelane_b32 v40, s33, 32
1172 ; GISEL-NEXT: s_mov_b32 s33, s32
1173 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1174 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1175 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1176 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1177 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1178 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1179 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1180 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1181 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1182 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1183 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1184 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1185 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1186 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1187 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1188 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1189 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1190 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1191 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1192 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1193 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1194 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1195 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1196 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1197 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1198 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1199 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1200 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1201 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1202 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1203 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1204 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1205 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1206 ; GISEL-NEXT: s_mov_b64 s[6:7], exec
1207 ; GISEL-NEXT: s_movk_i32 s4, 0x7b
1208 ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
1209 ; GISEL-NEXT: v_readfirstlane_b32 s8, v0
1210 ; GISEL-NEXT: v_readfirstlane_b32 s9, v1
1211 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
1212 ; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc
1213 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
1214 ; GISEL-NEXT: ; implicit-def: $vgpr0
1215 ; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
1216 ; GISEL-NEXT: s_cbranch_execnz .LBB6_1
1217 ; GISEL-NEXT: ; %bb.2:
1218 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
1219 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1220 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1221 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1222 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1223 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1224 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1225 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1226 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1227 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1228 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1229 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1230 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1231 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1232 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1233 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1234 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1235 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1236 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1237 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1238 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1239 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1240 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1241 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1242 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1243 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1244 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1245 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1246 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1247 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1248 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1249 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1250 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1251 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1252 ; GISEL-NEXT: v_readlane_b32 s33, v40, 32
1253 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1254 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1255 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1256 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1257 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1258 call amdgpu_gfx void %fptr(i32 inreg 123)
1262 define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) {
1263 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1265 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1266 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1267 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1268 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1269 ; GCN-NEXT: v_writelane_b32 v40, s33, 32
1270 ; GCN-NEXT: s_mov_b32 s33, s32
1271 ; GCN-NEXT: s_addk_i32 s32, 0x400
1272 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1273 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1274 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1275 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1276 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1277 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1278 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1279 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1280 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1281 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1282 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1283 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1284 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1285 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1286 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1287 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1288 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1289 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1290 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1291 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1292 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1293 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1294 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1295 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1296 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1297 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1298 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1299 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1300 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1301 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1302 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1303 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1304 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1305 ; GCN-NEXT: v_mov_b32_e32 v41, v0
1306 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1307 ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
1308 ; GCN-NEXT: v_readfirstlane_b32 s8, v1
1309 ; GCN-NEXT: v_readfirstlane_b32 s9, v2
1310 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1311 ; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
1312 ; GCN-NEXT: v_mov_b32_e32 v0, v41
1313 ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
1314 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
1315 ; GCN-NEXT: s_xor_b64 exec, exec, s[6:7]
1316 ; GCN-NEXT: s_cbranch_execnz .LBB7_1
1317 ; GCN-NEXT: ; %bb.2:
1318 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1319 ; GCN-NEXT: v_mov_b32_e32 v0, v41
1320 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1321 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1322 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1323 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1324 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1325 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1326 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1327 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1328 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1329 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1330 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1331 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1332 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1333 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1334 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1335 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1336 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1337 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1338 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1339 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1340 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1341 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1342 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1343 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1344 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1345 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1346 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1347 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1348 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1349 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1350 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1351 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1352 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1353 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1354 ; GCN-NEXT: v_readlane_b32 s33, v40, 32
1355 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1356 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1357 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1358 ; GCN-NEXT: s_waitcnt vmcnt(0)
1359 ; GCN-NEXT: s_setpc_b64 s[30:31]
1361 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1363 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1364 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1365 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1366 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1367 ; GISEL-NEXT: v_writelane_b32 v40, s33, 32
1368 ; GISEL-NEXT: s_mov_b32 s33, s32
1369 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1370 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1371 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1372 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1373 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1374 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1375 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1376 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1377 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1378 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1379 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1380 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1381 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1382 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1383 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1384 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1385 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1386 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1387 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1388 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1389 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1390 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1391 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1392 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1393 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1394 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1395 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1396 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1397 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1398 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1399 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1400 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1401 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1402 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1403 ; GISEL-NEXT: v_mov_b32_e32 v41, v0
1404 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1405 ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
1406 ; GISEL-NEXT: v_readfirstlane_b32 s6, v1
1407 ; GISEL-NEXT: v_readfirstlane_b32 s7, v2
1408 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1409 ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
1410 ; GISEL-NEXT: v_mov_b32_e32 v0, v41
1411 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
1412 ; GISEL-NEXT: ; implicit-def: $vgpr1
1413 ; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
1414 ; GISEL-NEXT: s_cbranch_execnz .LBB7_1
1415 ; GISEL-NEXT: ; %bb.2:
1416 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1417 ; GISEL-NEXT: v_mov_b32_e32 v0, v41
1418 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1419 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1420 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1421 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1422 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1423 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1424 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1425 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1426 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1427 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1428 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1429 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1430 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1431 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1432 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1433 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1434 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1435 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1436 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1437 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1438 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1439 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1440 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1441 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1442 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1443 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1444 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1445 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1446 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1447 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1448 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1449 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1450 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1451 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1452 ; GISEL-NEXT: v_readlane_b32 s33, v40, 32
1453 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1454 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1455 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1456 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1457 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1458 call amdgpu_gfx void %fptr(i32 %i)
1462 ; Use a variable inside a waterfall loop and use the return variable after the loop.
1463 ; TODO The argument and return variable could be in the same physical register, but the register
1464 ; allocator is not able to do that because the return value clashes with the liverange of an
1465 ; IMPLICIT_DEF of the argument.
1466 define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) {
1467 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1469 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1470 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1471 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1472 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1473 ; GCN-NEXT: v_writelane_b32 v40, s33, 32
1474 ; GCN-NEXT: s_mov_b32 s33, s32
1475 ; GCN-NEXT: s_addk_i32 s32, 0x400
1476 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1477 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1478 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1479 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1480 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1481 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1482 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1483 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1484 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1485 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1486 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1487 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1488 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1489 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1490 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1491 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1492 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1493 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1494 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1495 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1496 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1497 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1498 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1499 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1500 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1501 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1502 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1503 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1504 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1505 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1506 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1507 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1508 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1509 ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1510 ; GCN-NEXT: v_readfirstlane_b32 s8, v1
1511 ; GCN-NEXT: v_readfirstlane_b32 s9, v2
1512 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1513 ; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
1514 ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
1515 ; GCN-NEXT: v_mov_b32_e32 v3, v0
1516 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
1517 ; GCN-NEXT: ; implicit-def: $vgpr0
1518 ; GCN-NEXT: s_xor_b64 exec, exec, s[6:7]
1519 ; GCN-NEXT: s_cbranch_execnz .LBB8_1
1520 ; GCN-NEXT: ; %bb.2:
1521 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1522 ; GCN-NEXT: v_mov_b32_e32 v0, v3
1523 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1524 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1525 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1526 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1527 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1528 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1529 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1530 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1531 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1532 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1533 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1534 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1535 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1536 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1537 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1538 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1539 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1540 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1541 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1542 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1543 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1544 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1545 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1546 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1547 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1548 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1549 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1550 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1551 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1552 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1553 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1554 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1555 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1556 ; GCN-NEXT: v_readlane_b32 s33, v40, 32
1557 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1558 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1559 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1560 ; GCN-NEXT: s_waitcnt vmcnt(0)
1561 ; GCN-NEXT: s_setpc_b64 s[30:31]
1563 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1565 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1567 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1568 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1569 ; GISEL-NEXT: v_writelane_b32 v40, s33, 32
1570 ; GISEL-NEXT: s_mov_b32 s33, s32
1571 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1572 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1573 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1574 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1575 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1576 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1577 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1578 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1579 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1580 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1581 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1582 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1583 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1584 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1585 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1586 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1587 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1588 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1589 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1590 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1591 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1592 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1593 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1594 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1595 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1596 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1597 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1598 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1599 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1600 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1601 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1602 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1603 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1604 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1605 ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1606 ; GISEL-NEXT: v_readfirstlane_b32 s8, v1
1607 ; GISEL-NEXT: v_readfirstlane_b32 s9, v2
1608 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1609 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
1610 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
1611 ; GISEL-NEXT: v_mov_b32_e32 v2, v0
1612 ; GISEL-NEXT: ; implicit-def: $vgpr1
1613 ; GISEL-NEXT: ; implicit-def: $vgpr0
1614 ; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7]
1615 ; GISEL-NEXT: s_cbranch_execnz .LBB8_1
1616 ; GISEL-NEXT: ; %bb.2:
1617 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1618 ; GISEL-NEXT: v_mov_b32_e32 v0, v2
1619 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1620 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1621 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1622 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1623 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1624 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1625 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1626 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1627 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1628 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1629 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1630 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1631 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1632 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1633 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1634 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1635 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1636 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1637 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1638 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1639 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1640 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1641 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1642 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1643 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1644 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1645 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1646 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1647 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1648 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1649 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1650 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1651 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1652 ; GISEL-NEXT: v_readlane_b32 s33, v40, 32
1653 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1654 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1655 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1656 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1657 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1658 %ret = call amdgpu_gfx i32 %fptr(i32 %i)
1662 ; Calling a vgpr can never be a tail call.
1663 define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
1664 ; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
1666 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1667 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1668 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1669 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1670 ; GCN-NEXT: v_writelane_b32 v40, s33, 32
1671 ; GCN-NEXT: s_mov_b32 s33, s32
1672 ; GCN-NEXT: s_addk_i32 s32, 0x400
1673 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1674 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1675 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1676 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1677 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1678 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1679 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1680 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1681 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1682 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1683 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1684 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1685 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1686 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1687 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1688 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1689 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1690 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1691 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1692 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1693 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1694 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1695 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1696 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1697 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1698 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1699 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1700 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1701 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1702 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1703 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1704 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1705 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1706 ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
1707 ; GCN-NEXT: v_readfirstlane_b32 s8, v0
1708 ; GCN-NEXT: v_readfirstlane_b32 s9, v1
1709 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
1710 ; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
1711 ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
1712 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
1713 ; GCN-NEXT: s_xor_b64 exec, exec, s[6:7]
1714 ; GCN-NEXT: s_cbranch_execnz .LBB9_1
1715 ; GCN-NEXT: ; %bb.2:
1716 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1717 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1718 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1719 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1720 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1721 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1722 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1723 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1724 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1725 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1726 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1727 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1728 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1729 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1730 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1731 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1732 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1733 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1734 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1735 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1736 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1737 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1738 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1739 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1740 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1741 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1742 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1743 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1744 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1745 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1746 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1747 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1748 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1749 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1750 ; GCN-NEXT: v_readlane_b32 s33, v40, 32
1751 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1752 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1753 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1754 ; GCN-NEXT: s_waitcnt vmcnt(0)
1755 ; GCN-NEXT: s_setpc_b64 s[30:31]
1757 ; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
1759 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1761 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1762 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1763 ; GISEL-NEXT: v_writelane_b32 v40, s33, 32
1764 ; GISEL-NEXT: s_mov_b32 s33, s32
1765 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1766 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1767 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1768 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1769 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1770 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1771 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1772 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1773 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1774 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1775 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1776 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1777 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1778 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1779 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1780 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1781 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1782 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1783 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1784 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1785 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1786 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1787 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1788 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1789 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1790 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1791 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1792 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1793 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1794 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1795 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1796 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1797 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1798 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1799 ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
1800 ; GISEL-NEXT: v_readfirstlane_b32 s6, v0
1801 ; GISEL-NEXT: v_readfirstlane_b32 s7, v1
1802 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1803 ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
1804 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
1805 ; GISEL-NEXT: ; implicit-def: $vgpr0
1806 ; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
1807 ; GISEL-NEXT: s_cbranch_execnz .LBB9_1
1808 ; GISEL-NEXT: ; %bb.2:
1809 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1810 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1811 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1812 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1813 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1814 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1815 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1816 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1817 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1818 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1819 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1820 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1821 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1822 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1823 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1824 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1825 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1826 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1827 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1828 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1829 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1830 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1831 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1832 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1833 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1834 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1835 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1836 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1837 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1838 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1839 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1840 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1841 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1842 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1843 ; GISEL-NEXT: v_readlane_b32 s33, v40, 32
1844 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1845 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1846 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1847 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1848 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1849 tail call amdgpu_gfx void %fptr()