1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
5 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4
6 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4
8 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
9 ; GCN-LABEL: test_indirect_call_sgpr_ptr:
10 ; GCN: .amd_kernel_code_t
11 ; GCN-NEXT: amd_code_version_major = 1
12 ; GCN-NEXT: amd_code_version_minor = 2
13 ; GCN-NEXT: amd_machine_kind = 1
14 ; GCN-NEXT: amd_machine_version_major = 7
15 ; GCN-NEXT: amd_machine_version_minor = 0
16 ; GCN-NEXT: amd_machine_version_stepping = 0
17 ; GCN-NEXT: kernel_code_entry_byte_offset = 256
18 ; GCN-NEXT: kernel_code_prefetch_byte_size = 0
19 ; GCN-NEXT: granulated_workitem_vgpr_count = 10
20 ; GCN-NEXT: granulated_wavefront_sgpr_count = 8
21 ; GCN-NEXT: priority = 0
22 ; GCN-NEXT: float_mode = 240
24 ; GCN-NEXT: enable_dx10_clamp = 1
25 ; GCN-NEXT: debug_mode = 0
26 ; GCN-NEXT: enable_ieee_mode = 1
27 ; GCN-NEXT: enable_wgp_mode = 0
28 ; GCN-NEXT: enable_mem_ordered = 0
29 ; GCN-NEXT: enable_fwd_progress = 0
30 ; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
31 ; GCN-NEXT: user_sgpr_count = 14
32 ; GCN-NEXT: enable_trap_handler = 0
33 ; GCN-NEXT: enable_sgpr_workgroup_id_x = 1
34 ; GCN-NEXT: enable_sgpr_workgroup_id_y = 1
35 ; GCN-NEXT: enable_sgpr_workgroup_id_z = 1
36 ; GCN-NEXT: enable_sgpr_workgroup_info = 0
37 ; GCN-NEXT: enable_vgpr_workitem_id = 2
38 ; GCN-NEXT: enable_exception_msb = 0
39 ; GCN-NEXT: granulated_lds_size = 0
40 ; GCN-NEXT: enable_exception = 0
41 ; GCN-NEXT: enable_sgpr_private_segment_buffer = 1
42 ; GCN-NEXT: enable_sgpr_dispatch_ptr = 1
43 ; GCN-NEXT: enable_sgpr_queue_ptr = 1
44 ; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1
45 ; GCN-NEXT: enable_sgpr_dispatch_id = 1
46 ; GCN-NEXT: enable_sgpr_flat_scratch_init = 1
47 ; GCN-NEXT: enable_sgpr_private_segment_size = 0
48 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0
49 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0
50 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0
51 ; GCN-NEXT: enable_wavefront_size32 = 0
52 ; GCN-NEXT: enable_ordered_append_gds = 0
53 ; GCN-NEXT: private_element_size = 1
54 ; GCN-NEXT: is_ptr64 = 1
55 ; GCN-NEXT: is_dynamic_callstack = 1
56 ; GCN-NEXT: is_debug_enabled = 0
57 ; GCN-NEXT: is_xnack_enabled = 0
58 ; GCN-NEXT: workitem_private_segment_byte_size = 16384
59 ; GCN-NEXT: workgroup_group_segment_byte_size = 0
60 ; GCN-NEXT: gds_segment_byte_size = 0
61 ; GCN-NEXT: kernarg_segment_byte_size = 64
62 ; GCN-NEXT: workgroup_fbarrier_count = 0
63 ; GCN-NEXT: wavefront_sgpr_count = 68
64 ; GCN-NEXT: workitem_vgpr_count = 42
65 ; GCN-NEXT: reserved_vgpr_first = 0
66 ; GCN-NEXT: reserved_vgpr_count = 0
67 ; GCN-NEXT: reserved_sgpr_first = 0
68 ; GCN-NEXT: reserved_sgpr_count = 0
69 ; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
70 ; GCN-NEXT: debug_private_segment_buffer_sgpr = 0
71 ; GCN-NEXT: kernarg_segment_alignment = 4
72 ; GCN-NEXT: group_segment_alignment = 4
73 ; GCN-NEXT: private_segment_alignment = 4
74 ; GCN-NEXT: wavefront_size = 6
75 ; GCN-NEXT: call_convention = -1
76 ; GCN-NEXT: runtime_loader_kernel_symbol = 0
77 ; GCN-NEXT: .end_amd_kernel_code_t
79 ; GCN-NEXT: s_mov_b32 s32, 0
80 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
81 ; GCN-NEXT: s_add_i32 s12, s12, s17
82 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
83 ; GCN-NEXT: s_add_u32 s0, s0, s17
84 ; GCN-NEXT: s_addc_u32 s1, s1, 0
85 ; GCN-NEXT: s_mov_b32 s13, s15
86 ; GCN-NEXT: s_mov_b32 s12, s14
87 ; GCN-NEXT: s_getpc_b64 s[14:15]
88 ; GCN-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
89 ; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
90 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
91 ; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
92 ; GCN-NEXT: s_add_u32 s8, s8, 8
93 ; GCN-NEXT: s_addc_u32 s9, s9, 0
94 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
95 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1
96 ; GCN-NEXT: v_or_b32_e32 v31, v0, v2
97 ; GCN-NEXT: s_mov_b32 s14, s16
98 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
99 ; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
102 ; GISEL-LABEL: test_indirect_call_sgpr_ptr:
103 ; GISEL: .amd_kernel_code_t
104 ; GISEL-NEXT: amd_code_version_major = 1
105 ; GISEL-NEXT: amd_code_version_minor = 2
106 ; GISEL-NEXT: amd_machine_kind = 1
107 ; GISEL-NEXT: amd_machine_version_major = 7
108 ; GISEL-NEXT: amd_machine_version_minor = 0
109 ; GISEL-NEXT: amd_machine_version_stepping = 0
110 ; GISEL-NEXT: kernel_code_entry_byte_offset = 256
111 ; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
112 ; GISEL-NEXT: granulated_workitem_vgpr_count = 10
113 ; GISEL-NEXT: granulated_wavefront_sgpr_count = 8
114 ; GISEL-NEXT: priority = 0
115 ; GISEL-NEXT: float_mode = 240
116 ; GISEL-NEXT: priv = 0
117 ; GISEL-NEXT: enable_dx10_clamp = 1
118 ; GISEL-NEXT: debug_mode = 0
119 ; GISEL-NEXT: enable_ieee_mode = 1
120 ; GISEL-NEXT: enable_wgp_mode = 0
121 ; GISEL-NEXT: enable_mem_ordered = 0
122 ; GISEL-NEXT: enable_fwd_progress = 0
123 ; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
124 ; GISEL-NEXT: user_sgpr_count = 14
125 ; GISEL-NEXT: enable_trap_handler = 0
126 ; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
127 ; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
128 ; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
129 ; GISEL-NEXT: enable_sgpr_workgroup_info = 0
130 ; GISEL-NEXT: enable_vgpr_workitem_id = 2
131 ; GISEL-NEXT: enable_exception_msb = 0
132 ; GISEL-NEXT: granulated_lds_size = 0
133 ; GISEL-NEXT: enable_exception = 0
134 ; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
135 ; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
136 ; GISEL-NEXT: enable_sgpr_queue_ptr = 1
137 ; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
138 ; GISEL-NEXT: enable_sgpr_dispatch_id = 1
139 ; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1
140 ; GISEL-NEXT: enable_sgpr_private_segment_size = 0
141 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
142 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
143 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
144 ; GISEL-NEXT: enable_wavefront_size32 = 0
145 ; GISEL-NEXT: enable_ordered_append_gds = 0
146 ; GISEL-NEXT: private_element_size = 1
147 ; GISEL-NEXT: is_ptr64 = 1
148 ; GISEL-NEXT: is_dynamic_callstack = 1
149 ; GISEL-NEXT: is_debug_enabled = 0
150 ; GISEL-NEXT: is_xnack_enabled = 0
151 ; GISEL-NEXT: workitem_private_segment_byte_size = 16384
152 ; GISEL-NEXT: workgroup_group_segment_byte_size = 0
153 ; GISEL-NEXT: gds_segment_byte_size = 0
154 ; GISEL-NEXT: kernarg_segment_byte_size = 64
155 ; GISEL-NEXT: workgroup_fbarrier_count = 0
156 ; GISEL-NEXT: wavefront_sgpr_count = 68
157 ; GISEL-NEXT: workitem_vgpr_count = 42
158 ; GISEL-NEXT: reserved_vgpr_first = 0
159 ; GISEL-NEXT: reserved_vgpr_count = 0
160 ; GISEL-NEXT: reserved_sgpr_first = 0
161 ; GISEL-NEXT: reserved_sgpr_count = 0
162 ; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
163 ; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
164 ; GISEL-NEXT: kernarg_segment_alignment = 4
165 ; GISEL-NEXT: group_segment_alignment = 4
166 ; GISEL-NEXT: private_segment_alignment = 4
167 ; GISEL-NEXT: wavefront_size = 6
168 ; GISEL-NEXT: call_convention = -1
169 ; GISEL-NEXT: runtime_loader_kernel_symbol = 0
170 ; GISEL-NEXT: .end_amd_kernel_code_t
171 ; GISEL-NEXT: ; %bb.0:
172 ; GISEL-NEXT: s_mov_b32 s32, 0
173 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
174 ; GISEL-NEXT: s_add_i32 s12, s12, s17
175 ; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
176 ; GISEL-NEXT: s_add_u32 s0, s0, s17
177 ; GISEL-NEXT: s_addc_u32 s1, s1, 0
178 ; GISEL-NEXT: s_mov_b32 s13, s15
179 ; GISEL-NEXT: s_mov_b32 s12, s14
180 ; GISEL-NEXT: s_getpc_b64 s[14:15]
181 ; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
182 ; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
183 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
184 ; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
185 ; GISEL-NEXT: s_add_u32 s8, s8, 8
186 ; GISEL-NEXT: s_addc_u32 s9, s9, 0
187 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
188 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
189 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
190 ; GISEL-NEXT: s_mov_b32 s14, s16
191 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
192 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
193 ; GISEL-NEXT: s_endpgm
194 %fptr = load void()*, void()* addrspace(4)* @gv.fptr0
199 define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
200 ; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
201 ; GCN: .amd_kernel_code_t
202 ; GCN-NEXT: amd_code_version_major = 1
203 ; GCN-NEXT: amd_code_version_minor = 2
204 ; GCN-NEXT: amd_machine_kind = 1
205 ; GCN-NEXT: amd_machine_version_major = 7
206 ; GCN-NEXT: amd_machine_version_minor = 0
207 ; GCN-NEXT: amd_machine_version_stepping = 0
208 ; GCN-NEXT: kernel_code_entry_byte_offset = 256
209 ; GCN-NEXT: kernel_code_prefetch_byte_size = 0
210 ; GCN-NEXT: granulated_workitem_vgpr_count = 10
211 ; GCN-NEXT: granulated_wavefront_sgpr_count = 8
212 ; GCN-NEXT: priority = 0
213 ; GCN-NEXT: float_mode = 240
215 ; GCN-NEXT: enable_dx10_clamp = 1
216 ; GCN-NEXT: debug_mode = 0
217 ; GCN-NEXT: enable_ieee_mode = 1
218 ; GCN-NEXT: enable_wgp_mode = 0
219 ; GCN-NEXT: enable_mem_ordered = 0
220 ; GCN-NEXT: enable_fwd_progress = 0
221 ; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
222 ; GCN-NEXT: user_sgpr_count = 14
223 ; GCN-NEXT: enable_trap_handler = 0
224 ; GCN-NEXT: enable_sgpr_workgroup_id_x = 1
225 ; GCN-NEXT: enable_sgpr_workgroup_id_y = 1
226 ; GCN-NEXT: enable_sgpr_workgroup_id_z = 1
227 ; GCN-NEXT: enable_sgpr_workgroup_info = 0
228 ; GCN-NEXT: enable_vgpr_workitem_id = 2
229 ; GCN-NEXT: enable_exception_msb = 0
230 ; GCN-NEXT: granulated_lds_size = 0
231 ; GCN-NEXT: enable_exception = 0
232 ; GCN-NEXT: enable_sgpr_private_segment_buffer = 1
233 ; GCN-NEXT: enable_sgpr_dispatch_ptr = 1
234 ; GCN-NEXT: enable_sgpr_queue_ptr = 1
235 ; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1
236 ; GCN-NEXT: enable_sgpr_dispatch_id = 1
237 ; GCN-NEXT: enable_sgpr_flat_scratch_init = 1
238 ; GCN-NEXT: enable_sgpr_private_segment_size = 0
239 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0
240 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0
241 ; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0
242 ; GCN-NEXT: enable_wavefront_size32 = 0
243 ; GCN-NEXT: enable_ordered_append_gds = 0
244 ; GCN-NEXT: private_element_size = 1
245 ; GCN-NEXT: is_ptr64 = 1
246 ; GCN-NEXT: is_dynamic_callstack = 1
247 ; GCN-NEXT: is_debug_enabled = 0
248 ; GCN-NEXT: is_xnack_enabled = 0
249 ; GCN-NEXT: workitem_private_segment_byte_size = 16384
250 ; GCN-NEXT: workgroup_group_segment_byte_size = 0
251 ; GCN-NEXT: gds_segment_byte_size = 0
252 ; GCN-NEXT: kernarg_segment_byte_size = 64
253 ; GCN-NEXT: workgroup_fbarrier_count = 0
254 ; GCN-NEXT: wavefront_sgpr_count = 68
255 ; GCN-NEXT: workitem_vgpr_count = 42
256 ; GCN-NEXT: reserved_vgpr_first = 0
257 ; GCN-NEXT: reserved_vgpr_count = 0
258 ; GCN-NEXT: reserved_sgpr_first = 0
259 ; GCN-NEXT: reserved_sgpr_count = 0
260 ; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
261 ; GCN-NEXT: debug_private_segment_buffer_sgpr = 0
262 ; GCN-NEXT: kernarg_segment_alignment = 4
263 ; GCN-NEXT: group_segment_alignment = 4
264 ; GCN-NEXT: private_segment_alignment = 4
265 ; GCN-NEXT: wavefront_size = 6
266 ; GCN-NEXT: call_convention = -1
267 ; GCN-NEXT: runtime_loader_kernel_symbol = 0
268 ; GCN-NEXT: .end_amd_kernel_code_t
270 ; GCN-NEXT: s_mov_b32 s32, 0
271 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
272 ; GCN-NEXT: s_add_i32 s12, s12, s17
273 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
274 ; GCN-NEXT: s_add_u32 s0, s0, s17
275 ; GCN-NEXT: s_addc_u32 s1, s1, 0
276 ; GCN-NEXT: s_mov_b32 s13, s15
277 ; GCN-NEXT: s_mov_b32 s12, s14
278 ; GCN-NEXT: s_getpc_b64 s[14:15]
279 ; GCN-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
280 ; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
281 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
282 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
283 ; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
284 ; GCN-NEXT: s_add_u32 s8, s8, 8
285 ; GCN-NEXT: s_addc_u32 s9, s9, 0
286 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1
287 ; GCN-NEXT: v_or_b32_e32 v31, v0, v2
288 ; GCN-NEXT: v_mov_b32_e32 v0, 0x7b
289 ; GCN-NEXT: s_mov_b32 s14, s16
290 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
291 ; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
294 ; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
295 ; GISEL: .amd_kernel_code_t
296 ; GISEL-NEXT: amd_code_version_major = 1
297 ; GISEL-NEXT: amd_code_version_minor = 2
298 ; GISEL-NEXT: amd_machine_kind = 1
299 ; GISEL-NEXT: amd_machine_version_major = 7
300 ; GISEL-NEXT: amd_machine_version_minor = 0
301 ; GISEL-NEXT: amd_machine_version_stepping = 0
302 ; GISEL-NEXT: kernel_code_entry_byte_offset = 256
303 ; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
304 ; GISEL-NEXT: granulated_workitem_vgpr_count = 10
305 ; GISEL-NEXT: granulated_wavefront_sgpr_count = 8
306 ; GISEL-NEXT: priority = 0
307 ; GISEL-NEXT: float_mode = 240
308 ; GISEL-NEXT: priv = 0
309 ; GISEL-NEXT: enable_dx10_clamp = 1
310 ; GISEL-NEXT: debug_mode = 0
311 ; GISEL-NEXT: enable_ieee_mode = 1
312 ; GISEL-NEXT: enable_wgp_mode = 0
313 ; GISEL-NEXT: enable_mem_ordered = 0
314 ; GISEL-NEXT: enable_fwd_progress = 0
315 ; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
316 ; GISEL-NEXT: user_sgpr_count = 14
317 ; GISEL-NEXT: enable_trap_handler = 0
318 ; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
319 ; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
320 ; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
321 ; GISEL-NEXT: enable_sgpr_workgroup_info = 0
322 ; GISEL-NEXT: enable_vgpr_workitem_id = 2
323 ; GISEL-NEXT: enable_exception_msb = 0
324 ; GISEL-NEXT: granulated_lds_size = 0
325 ; GISEL-NEXT: enable_exception = 0
326 ; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
327 ; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
328 ; GISEL-NEXT: enable_sgpr_queue_ptr = 1
329 ; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
330 ; GISEL-NEXT: enable_sgpr_dispatch_id = 1
331 ; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1
332 ; GISEL-NEXT: enable_sgpr_private_segment_size = 0
333 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
334 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
335 ; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
336 ; GISEL-NEXT: enable_wavefront_size32 = 0
337 ; GISEL-NEXT: enable_ordered_append_gds = 0
338 ; GISEL-NEXT: private_element_size = 1
339 ; GISEL-NEXT: is_ptr64 = 1
340 ; GISEL-NEXT: is_dynamic_callstack = 1
341 ; GISEL-NEXT: is_debug_enabled = 0
342 ; GISEL-NEXT: is_xnack_enabled = 0
343 ; GISEL-NEXT: workitem_private_segment_byte_size = 16384
344 ; GISEL-NEXT: workgroup_group_segment_byte_size = 0
345 ; GISEL-NEXT: gds_segment_byte_size = 0
346 ; GISEL-NEXT: kernarg_segment_byte_size = 64
347 ; GISEL-NEXT: workgroup_fbarrier_count = 0
348 ; GISEL-NEXT: wavefront_sgpr_count = 68
349 ; GISEL-NEXT: workitem_vgpr_count = 42
350 ; GISEL-NEXT: reserved_vgpr_first = 0
351 ; GISEL-NEXT: reserved_vgpr_count = 0
352 ; GISEL-NEXT: reserved_sgpr_first = 0
353 ; GISEL-NEXT: reserved_sgpr_count = 0
354 ; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
355 ; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
356 ; GISEL-NEXT: kernarg_segment_alignment = 4
357 ; GISEL-NEXT: group_segment_alignment = 4
358 ; GISEL-NEXT: private_segment_alignment = 4
359 ; GISEL-NEXT: wavefront_size = 6
360 ; GISEL-NEXT: call_convention = -1
361 ; GISEL-NEXT: runtime_loader_kernel_symbol = 0
362 ; GISEL-NEXT: .end_amd_kernel_code_t
363 ; GISEL-NEXT: ; %bb.0:
364 ; GISEL-NEXT: s_mov_b32 s32, 0
365 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
366 ; GISEL-NEXT: s_add_i32 s12, s12, s17
367 ; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
368 ; GISEL-NEXT: s_add_u32 s0, s0, s17
369 ; GISEL-NEXT: s_addc_u32 s1, s1, 0
370 ; GISEL-NEXT: s_mov_b32 s13, s15
371 ; GISEL-NEXT: s_mov_b32 s12, s14
372 ; GISEL-NEXT: s_getpc_b64 s[14:15]
373 ; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
374 ; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
375 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
376 ; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
377 ; GISEL-NEXT: s_add_u32 s8, s8, 8
378 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
379 ; GISEL-NEXT: s_addc_u32 s9, s9, 0
380 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
381 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
382 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
383 ; GISEL-NEXT: s_mov_b32 s14, s16
384 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
385 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
386 ; GISEL-NEXT: s_endpgm
387 %fptr = load void(i32)*, void(i32)* addrspace(4)* @gv.fptr1
388 call void %fptr(i32 123)
392 define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
393 ; GCN-LABEL: test_indirect_call_vgpr_ptr:
395 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
397 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
398 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
399 ; GCN-NEXT: v_writelane_b32 v40, s33, 18
400 ; GCN-NEXT: s_mov_b32 s33, s32
401 ; GCN-NEXT: s_addk_i32 s32, 0x400
402 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
403 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
404 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
405 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
406 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
407 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
408 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
409 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
410 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
411 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
412 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
413 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
414 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
415 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
416 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
417 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
418 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
419 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
420 ; GCN-NEXT: s_mov_b32 s42, s15
421 ; GCN-NEXT: s_mov_b32 s43, s14
422 ; GCN-NEXT: s_mov_b32 s44, s13
423 ; GCN-NEXT: s_mov_b32 s45, s12
424 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
425 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
426 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
427 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
428 ; GCN-NEXT: s_mov_b64 s[46:47], exec
429 ; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
430 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
431 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
432 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
433 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
434 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
435 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
436 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
437 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
438 ; GCN-NEXT: s_mov_b32 s12, s45
439 ; GCN-NEXT: s_mov_b32 s13, s44
440 ; GCN-NEXT: s_mov_b32 s14, s43
441 ; GCN-NEXT: s_mov_b32 s15, s42
442 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
443 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
444 ; GCN-NEXT: ; implicit-def: $vgpr31
445 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
446 ; GCN-NEXT: s_cbranch_execnz .LBB2_1
448 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
449 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
450 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
451 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
452 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
453 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
454 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
455 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
456 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
457 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
458 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
459 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
460 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
461 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
462 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
463 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
464 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
465 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
466 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
467 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
468 ; GCN-NEXT: v_readlane_b32 s33, v40, 18
469 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
470 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
471 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
472 ; GCN-NEXT: s_waitcnt vmcnt(0)
473 ; GCN-NEXT: s_setpc_b64 s[30:31]
475 ; GISEL-LABEL: test_indirect_call_vgpr_ptr:
477 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478 ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
479 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
480 ; GISEL-NEXT: s_mov_b64 exec, s[16:17]
481 ; GISEL-NEXT: v_writelane_b32 v40, s33, 18
482 ; GISEL-NEXT: s_mov_b32 s33, s32
483 ; GISEL-NEXT: s_addk_i32 s32, 0x400
484 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
485 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
486 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
487 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
488 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
489 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
490 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
491 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
492 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
493 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
494 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
495 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
496 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
497 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
498 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
499 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
500 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
501 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
502 ; GISEL-NEXT: s_mov_b32 s42, s15
503 ; GISEL-NEXT: s_mov_b32 s43, s14
504 ; GISEL-NEXT: s_mov_b32 s44, s13
505 ; GISEL-NEXT: s_mov_b32 s45, s12
506 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
507 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
508 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
509 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
510 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
511 ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
512 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
513 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
514 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
515 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
516 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
517 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
518 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
519 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
520 ; GISEL-NEXT: s_mov_b32 s12, s45
521 ; GISEL-NEXT: s_mov_b32 s13, s44
522 ; GISEL-NEXT: s_mov_b32 s14, s43
523 ; GISEL-NEXT: s_mov_b32 s15, s42
524 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
525 ; GISEL-NEXT: ; implicit-def: $vgpr0
526 ; GISEL-NEXT: ; implicit-def: $vgpr31
527 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
528 ; GISEL-NEXT: s_cbranch_execnz .LBB2_1
529 ; GISEL-NEXT: ; %bb.2:
530 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
531 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
532 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
533 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
534 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
535 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
536 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
537 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
538 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
539 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
540 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
541 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
542 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
543 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
544 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
545 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
546 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
547 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
548 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
549 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
550 ; GISEL-NEXT: v_readlane_b32 s33, v40, 18
551 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
552 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
553 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
554 ; GISEL-NEXT: s_waitcnt vmcnt(0)
555 ; GISEL-NEXT: s_setpc_b64 s[30:31]
560 define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
561 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
563 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
565 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
566 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
567 ; GCN-NEXT: v_writelane_b32 v40, s33, 18
568 ; GCN-NEXT: s_mov_b32 s33, s32
569 ; GCN-NEXT: s_addk_i32 s32, 0x400
570 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
571 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
572 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
573 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
574 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
575 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
576 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
577 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
578 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
579 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
580 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
581 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
582 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
583 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
584 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
585 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
586 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
587 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
588 ; GCN-NEXT: s_mov_b32 s42, s15
589 ; GCN-NEXT: s_mov_b32 s43, s14
590 ; GCN-NEXT: s_mov_b32 s44, s13
591 ; GCN-NEXT: s_mov_b32 s45, s12
592 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
593 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
594 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
595 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
596 ; GCN-NEXT: s_mov_b64 s[46:47], exec
597 ; GCN-NEXT: v_mov_b32_e32 v2, 0x7b
598 ; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
599 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
600 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
601 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
602 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
603 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
604 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
605 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
606 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
607 ; GCN-NEXT: s_mov_b32 s12, s45
608 ; GCN-NEXT: s_mov_b32 s13, s44
609 ; GCN-NEXT: s_mov_b32 s14, s43
610 ; GCN-NEXT: s_mov_b32 s15, s42
611 ; GCN-NEXT: v_mov_b32_e32 v0, v2
612 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
613 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
614 ; GCN-NEXT: ; implicit-def: $vgpr31
615 ; GCN-NEXT: ; implicit-def: $vgpr2
616 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
617 ; GCN-NEXT: s_cbranch_execnz .LBB3_1
619 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
620 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
621 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
622 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
623 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
624 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
625 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
626 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
627 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
628 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
629 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
630 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
631 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
632 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
633 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
634 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
635 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
636 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
637 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
638 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
639 ; GCN-NEXT: v_readlane_b32 s33, v40, 18
640 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
641 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
642 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
643 ; GCN-NEXT: s_waitcnt vmcnt(0)
644 ; GCN-NEXT: s_setpc_b64 s[30:31]
646 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
648 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649 ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
650 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
651 ; GISEL-NEXT: s_mov_b64 exec, s[16:17]
652 ; GISEL-NEXT: v_writelane_b32 v40, s33, 18
653 ; GISEL-NEXT: s_mov_b32 s33, s32
654 ; GISEL-NEXT: s_addk_i32 s32, 0x400
655 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
656 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
657 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
658 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
659 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
660 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
661 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
662 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
663 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
664 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
665 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
666 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
667 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
668 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
669 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
670 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
671 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
672 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
673 ; GISEL-NEXT: s_mov_b32 s42, s15
674 ; GISEL-NEXT: s_mov_b32 s43, s14
675 ; GISEL-NEXT: s_mov_b32 s44, s13
676 ; GISEL-NEXT: s_mov_b32 s45, s12
677 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
678 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
679 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
680 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
681 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
682 ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
683 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
684 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
685 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
686 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
687 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
688 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
689 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
690 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
691 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
692 ; GISEL-NEXT: s_mov_b32 s12, s45
693 ; GISEL-NEXT: s_mov_b32 s13, s44
694 ; GISEL-NEXT: s_mov_b32 s14, s43
695 ; GISEL-NEXT: s_mov_b32 s15, s42
696 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
697 ; GISEL-NEXT: ; implicit-def: $vgpr0
698 ; GISEL-NEXT: ; implicit-def: $vgpr31
699 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
700 ; GISEL-NEXT: s_cbranch_execnz .LBB3_1
701 ; GISEL-NEXT: ; %bb.2:
702 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
703 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
704 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
705 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
706 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
707 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
708 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
709 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
710 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
711 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
712 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
713 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
714 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
715 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
716 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
717 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
718 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
719 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
720 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
721 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
722 ; GISEL-NEXT: v_readlane_b32 s33, v40, 18
723 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
724 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
725 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
726 ; GISEL-NEXT: s_waitcnt vmcnt(0)
727 ; GISEL-NEXT: s_setpc_b64 s[30:31]
728 call void %fptr(i32 123)
732 define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
733 ; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
735 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
737 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
738 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
739 ; GCN-NEXT: v_writelane_b32 v40, s33, 18
740 ; GCN-NEXT: s_mov_b32 s33, s32
741 ; GCN-NEXT: s_addk_i32 s32, 0x400
742 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
743 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
744 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
745 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
746 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
747 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
748 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
749 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
750 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
751 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
752 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
753 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
754 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
755 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
756 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
757 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
758 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
759 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
760 ; GCN-NEXT: s_mov_b32 s42, s15
761 ; GCN-NEXT: s_mov_b32 s43, s14
762 ; GCN-NEXT: s_mov_b32 s44, s13
763 ; GCN-NEXT: s_mov_b32 s45, s12
764 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
765 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
766 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
767 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
768 ; GCN-NEXT: s_mov_b64 s[46:47], exec
769 ; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
770 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
771 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
772 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
773 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
774 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
775 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
776 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
777 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
778 ; GCN-NEXT: s_mov_b32 s12, s45
779 ; GCN-NEXT: s_mov_b32 s13, s44
780 ; GCN-NEXT: s_mov_b32 s14, s43
781 ; GCN-NEXT: s_mov_b32 s15, s42
782 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
783 ; GCN-NEXT: v_mov_b32_e32 v2, v0
784 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
785 ; GCN-NEXT: ; implicit-def: $vgpr31
786 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
787 ; GCN-NEXT: s_cbranch_execnz .LBB4_1
789 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
790 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2
791 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
792 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
793 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
794 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
795 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
796 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
797 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
798 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
799 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
800 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
801 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
802 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
803 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
804 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
805 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
806 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
807 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
808 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
809 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
810 ; GCN-NEXT: v_readlane_b32 s33, v40, 18
811 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
812 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
813 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
814 ; GCN-NEXT: s_waitcnt vmcnt(0)
815 ; GCN-NEXT: s_setpc_b64 s[30:31]
817 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
819 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820 ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
821 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
822 ; GISEL-NEXT: s_mov_b64 exec, s[16:17]
823 ; GISEL-NEXT: v_writelane_b32 v40, s33, 18
824 ; GISEL-NEXT: s_mov_b32 s33, s32
825 ; GISEL-NEXT: s_addk_i32 s32, 0x400
826 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
827 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
828 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
829 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
830 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
831 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
832 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
833 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
834 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
835 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
836 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
837 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
838 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
839 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
840 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
841 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
842 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
843 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
844 ; GISEL-NEXT: s_mov_b32 s42, s15
845 ; GISEL-NEXT: s_mov_b32 s43, s14
846 ; GISEL-NEXT: s_mov_b32 s44, s13
847 ; GISEL-NEXT: s_mov_b32 s45, s12
848 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
849 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
850 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
851 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
852 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
853 ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
854 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
855 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
856 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
857 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
858 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
859 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
860 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
861 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
862 ; GISEL-NEXT: s_mov_b32 s12, s45
863 ; GISEL-NEXT: s_mov_b32 s13, s44
864 ; GISEL-NEXT: s_mov_b32 s14, s43
865 ; GISEL-NEXT: s_mov_b32 s15, s42
866 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
867 ; GISEL-NEXT: v_mov_b32_e32 v1, v0
868 ; GISEL-NEXT: ; implicit-def: $vgpr0
869 ; GISEL-NEXT: ; implicit-def: $vgpr31
870 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
871 ; GISEL-NEXT: s_cbranch_execnz .LBB4_1
872 ; GISEL-NEXT: ; %bb.2:
873 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
874 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1
875 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
876 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
877 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
878 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
879 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
880 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
881 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
882 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
883 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
884 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
885 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
886 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
887 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
888 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
889 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
890 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
891 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
892 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
893 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
894 ; GISEL-NEXT: v_readlane_b32 s33, v40, 18
895 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
896 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
897 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
898 ; GISEL-NEXT: s_waitcnt vmcnt(0)
899 ; GISEL-NEXT: s_setpc_b64 s[30:31]
900 %a = call i32 %fptr()
905 define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
906 ; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
907 ; GCN: ; %bb.0: ; %bb0
908 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
910 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
911 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
912 ; GCN-NEXT: v_writelane_b32 v40, s33, 20
913 ; GCN-NEXT: s_mov_b32 s33, s32
914 ; GCN-NEXT: s_addk_i32 s32, 0x400
915 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
916 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
917 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
918 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
919 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
920 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
921 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
922 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
923 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
924 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
925 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
926 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
927 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
928 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
929 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
930 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
931 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
932 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
933 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
934 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
935 ; GCN-NEXT: s_mov_b32 s42, s15
936 ; GCN-NEXT: s_mov_b32 s43, s14
937 ; GCN-NEXT: s_mov_b32 s44, s13
938 ; GCN-NEXT: s_mov_b32 s45, s12
939 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
940 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
941 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
942 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
943 ; GCN-NEXT: v_and_b32_e32 v2, 1, v2
944 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
945 ; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc
946 ; GCN-NEXT: s_cbranch_execz .LBB5_4
947 ; GCN-NEXT: ; %bb.1: ; %bb1
948 ; GCN-NEXT: s_mov_b64 s[48:49], exec
949 ; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
950 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
951 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
952 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
953 ; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc
954 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
955 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
956 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
957 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
958 ; GCN-NEXT: s_mov_b32 s12, s45
959 ; GCN-NEXT: s_mov_b32 s13, s44
960 ; GCN-NEXT: s_mov_b32 s14, s43
961 ; GCN-NEXT: s_mov_b32 s15, s42
962 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
963 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
964 ; GCN-NEXT: ; implicit-def: $vgpr31
965 ; GCN-NEXT: s_xor_b64 exec, exec, s[50:51]
966 ; GCN-NEXT: s_cbranch_execnz .LBB5_2
968 ; GCN-NEXT: s_mov_b64 exec, s[48:49]
969 ; GCN-NEXT: .LBB5_4: ; %bb2
970 ; GCN-NEXT: s_or_b64 exec, exec, s[46:47]
971 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
972 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
973 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
974 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
975 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
976 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
977 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
978 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
979 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
980 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
981 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
982 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
983 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
984 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
985 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
986 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
987 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
988 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
989 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
990 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
991 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
992 ; GCN-NEXT: v_readlane_b32 s33, v40, 20
993 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
994 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
995 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
996 ; GCN-NEXT: s_waitcnt vmcnt(0)
997 ; GCN-NEXT: s_setpc_b64 s[30:31]
999 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
1000 ; GISEL: ; %bb.0: ; %bb0
1001 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1002 ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
1003 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1004 ; GISEL-NEXT: s_mov_b64 exec, s[16:17]
1005 ; GISEL-NEXT: v_writelane_b32 v40, s33, 20
1006 ; GISEL-NEXT: s_mov_b32 s33, s32
1007 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1008 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1009 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1010 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1011 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1012 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1013 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1014 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1015 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1016 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1017 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1018 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1019 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1020 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1021 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1022 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1023 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1024 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1025 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1026 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1027 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1028 ; GISEL-NEXT: s_mov_b32 s42, s15
1029 ; GISEL-NEXT: s_mov_b32 s43, s14
1030 ; GISEL-NEXT: s_mov_b32 s44, s13
1031 ; GISEL-NEXT: s_mov_b32 s45, s12
1032 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
1033 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
1034 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
1035 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
1036 ; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
1037 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1038 ; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc
1039 ; GISEL-NEXT: s_cbranch_execz .LBB5_4
1040 ; GISEL-NEXT: ; %bb.1: ; %bb1
1041 ; GISEL-NEXT: s_mov_b64 s[48:49], exec
1042 ; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
1043 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
1044 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
1045 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
1046 ; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc
1047 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
1048 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
1049 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
1050 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
1051 ; GISEL-NEXT: s_mov_b32 s12, s45
1052 ; GISEL-NEXT: s_mov_b32 s13, s44
1053 ; GISEL-NEXT: s_mov_b32 s14, s43
1054 ; GISEL-NEXT: s_mov_b32 s15, s42
1055 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
1056 ; GISEL-NEXT: ; implicit-def: $vgpr0
1057 ; GISEL-NEXT: ; implicit-def: $vgpr31
1058 ; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51]
1059 ; GISEL-NEXT: s_cbranch_execnz .LBB5_2
1060 ; GISEL-NEXT: ; %bb.3:
1061 ; GISEL-NEXT: s_mov_b64 exec, s[48:49]
1062 ; GISEL-NEXT: .LBB5_4: ; %bb2
1063 ; GISEL-NEXT: s_or_b64 exec, exec, s[46:47]
1064 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1065 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1066 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1067 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1068 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1069 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1070 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1071 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1072 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1073 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1074 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1075 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1076 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1077 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1078 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1079 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1080 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1081 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1082 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1083 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1084 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1085 ; GISEL-NEXT: v_readlane_b32 s33, v40, 20
1086 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1087 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1088 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1089 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1090 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1092 br i1 %cond, label %bb1, label %bb2
1102 define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
1103 ; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
1105 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1107 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1108 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1109 ; GCN-NEXT: v_writelane_b32 v40, s33, 32
1110 ; GCN-NEXT: s_mov_b32 s33, s32
1111 ; GCN-NEXT: s_addk_i32 s32, 0x400
1112 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1113 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1114 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1115 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1116 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1117 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1118 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1119 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1120 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1121 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1122 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1123 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1124 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1125 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1126 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1127 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1128 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1129 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1130 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1131 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1132 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1133 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1134 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1135 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1136 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1137 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1138 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1139 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1140 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1141 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1142 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1143 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1144 ; GCN-NEXT: s_mov_b64 s[6:7], exec
1145 ; GCN-NEXT: s_movk_i32 s4, 0x7b
1146 ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
1147 ; GCN-NEXT: v_readfirstlane_b32 s8, v0
1148 ; GCN-NEXT: v_readfirstlane_b32 s9, v1
1149 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
1150 ; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc
1151 ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
1152 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
1153 ; GCN-NEXT: s_xor_b64 exec, exec, s[10:11]
1154 ; GCN-NEXT: s_cbranch_execnz .LBB6_1
1155 ; GCN-NEXT: ; %bb.2:
1156 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
1157 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1158 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1159 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1160 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1161 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1162 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1163 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1164 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1165 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1166 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1167 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1168 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1169 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1170 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1171 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1172 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1173 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1174 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1175 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1176 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1177 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1178 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1179 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1180 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1181 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1182 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1183 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1184 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1185 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1186 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1187 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1188 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1189 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1190 ; GCN-NEXT: v_readlane_b32 s33, v40, 32
1191 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1192 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1193 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1194 ; GCN-NEXT: s_waitcnt vmcnt(0)
1195 ; GCN-NEXT: s_setpc_b64 s[30:31]
1197 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
1199 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1200 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1201 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1202 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1203 ; GISEL-NEXT: v_writelane_b32 v40, s33, 32
1204 ; GISEL-NEXT: s_mov_b32 s33, s32
1205 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1206 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1207 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1208 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1209 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1210 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1211 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1212 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1213 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1214 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1215 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1216 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1217 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1218 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1219 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1220 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1221 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1222 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1223 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1224 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1225 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1226 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1227 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1228 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1229 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1230 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1231 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1232 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1233 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1234 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1235 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1236 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1237 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1238 ; GISEL-NEXT: s_mov_b64 s[6:7], exec
1239 ; GISEL-NEXT: s_movk_i32 s4, 0x7b
1240 ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
1241 ; GISEL-NEXT: v_readfirstlane_b32 s8, v0
1242 ; GISEL-NEXT: v_readfirstlane_b32 s9, v1
1243 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
1244 ; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc
1245 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
1246 ; GISEL-NEXT: ; implicit-def: $vgpr0
1247 ; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
1248 ; GISEL-NEXT: s_cbranch_execnz .LBB6_1
1249 ; GISEL-NEXT: ; %bb.2:
1250 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
1251 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1252 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1253 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1254 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1255 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1256 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1257 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1258 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1259 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1260 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1261 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1262 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1263 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1264 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1265 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1266 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1267 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1268 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1269 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1270 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1271 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1272 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1273 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1274 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1275 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1276 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1277 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1278 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1279 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1280 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1281 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1282 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1283 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1284 ; GISEL-NEXT: v_readlane_b32 s33, v40, 32
1285 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1286 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1287 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1288 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1289 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1290 call amdgpu_gfx void %fptr(i32 inreg 123)
1294 define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) {
1295 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1297 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1299 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1300 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1301 ; GCN-NEXT: v_writelane_b32 v40, s33, 32
1302 ; GCN-NEXT: s_mov_b32 s33, s32
1303 ; GCN-NEXT: s_addk_i32 s32, 0x400
1304 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1305 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1306 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1307 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1308 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1309 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1310 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1311 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1312 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1313 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1314 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1315 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1316 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1317 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1318 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1319 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1320 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1321 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1322 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1323 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1324 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1325 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1326 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1327 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1328 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1329 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1330 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1331 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1332 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1333 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1334 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1335 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1336 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1337 ; GCN-NEXT: v_mov_b32_e32 v41, v0
1338 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1339 ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
1340 ; GCN-NEXT: v_readfirstlane_b32 s6, v1
1341 ; GCN-NEXT: v_readfirstlane_b32 s7, v2
1342 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1343 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
1344 ; GCN-NEXT: v_mov_b32_e32 v0, v41
1345 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
1346 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
1347 ; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
1348 ; GCN-NEXT: s_cbranch_execnz .LBB7_1
1349 ; GCN-NEXT: ; %bb.2:
1350 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1351 ; GCN-NEXT: v_mov_b32_e32 v0, v41
1352 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1353 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1354 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1355 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1356 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1357 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1358 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1359 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1360 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1361 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1362 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1363 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1364 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1365 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1366 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1367 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1368 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1369 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1370 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1371 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1372 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1373 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1374 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1375 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1376 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1377 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1378 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1379 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1380 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1381 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1382 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1383 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1384 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1385 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1386 ; GCN-NEXT: v_readlane_b32 s33, v40, 32
1387 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1388 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1389 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1390 ; GCN-NEXT: s_waitcnt vmcnt(0)
1391 ; GCN-NEXT: s_setpc_b64 s[30:31]
1393 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1395 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1397 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1398 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1399 ; GISEL-NEXT: v_writelane_b32 v40, s33, 32
1400 ; GISEL-NEXT: s_mov_b32 s33, s32
1401 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1402 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1403 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1404 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1405 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1406 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1407 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1408 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1409 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1410 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1411 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1412 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1413 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1414 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1415 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1416 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1417 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1418 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1419 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1420 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1421 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1422 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1423 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1424 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1425 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1426 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1427 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1428 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1429 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1430 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1431 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1432 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1433 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1434 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1435 ; GISEL-NEXT: v_mov_b32_e32 v41, v0
1436 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1437 ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
1438 ; GISEL-NEXT: v_readfirstlane_b32 s6, v1
1439 ; GISEL-NEXT: v_readfirstlane_b32 s7, v2
1440 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1441 ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
1442 ; GISEL-NEXT: v_mov_b32_e32 v0, v41
1443 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
1444 ; GISEL-NEXT: ; implicit-def: $vgpr1
1445 ; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
1446 ; GISEL-NEXT: s_cbranch_execnz .LBB7_1
1447 ; GISEL-NEXT: ; %bb.2:
1448 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1449 ; GISEL-NEXT: v_mov_b32_e32 v0, v41
1450 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1451 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1452 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1453 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1454 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1455 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1456 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1457 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1458 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1459 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1460 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1461 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1462 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1463 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1464 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1465 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1466 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1467 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1468 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1469 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1470 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1471 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1472 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1473 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1474 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1475 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1476 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1477 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1478 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1479 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1480 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1481 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1482 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1483 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1484 ; GISEL-NEXT: v_readlane_b32 s33, v40, 32
1485 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1486 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1487 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1488 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1489 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1490 call amdgpu_gfx void %fptr(i32 %i)
1494 ; Use a variable inside a waterfall loop and use the return variable after the loop.
1495 ; TODO The argument and return variable could be in the same physical register, but the register
1496 ; allocator is not able to do that because the return value clashes with the liverange of an
1497 ; IMPLICIT_DEF of the argument.
1498 define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) {
1499 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1501 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1503 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1504 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1505 ; GCN-NEXT: v_writelane_b32 v40, s33, 32
1506 ; GCN-NEXT: s_mov_b32 s33, s32
1507 ; GCN-NEXT: s_addk_i32 s32, 0x400
1508 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1509 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1510 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1511 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1512 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1513 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1514 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1515 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1516 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1517 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1518 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1519 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1520 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1521 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1522 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1523 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1524 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1525 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1526 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1527 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1528 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1529 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1530 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1531 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1532 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1533 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1534 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1535 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1536 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1537 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1538 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1539 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1540 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1541 ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1542 ; GCN-NEXT: v_readfirstlane_b32 s6, v1
1543 ; GCN-NEXT: v_readfirstlane_b32 s7, v2
1544 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1545 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
1546 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
1547 ; GCN-NEXT: v_mov_b32_e32 v3, v0
1548 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
1549 ; GCN-NEXT: ; implicit-def: $vgpr0
1550 ; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
1551 ; GCN-NEXT: s_cbranch_execnz .LBB8_1
1552 ; GCN-NEXT: ; %bb.2:
1553 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1554 ; GCN-NEXT: v_mov_b32_e32 v0, v3
1555 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1556 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1557 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1558 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1559 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1560 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1561 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1562 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1563 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1564 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1565 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1566 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1567 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1568 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1569 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1570 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1571 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1572 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1573 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1574 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1575 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1576 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1577 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1578 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1579 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1580 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1581 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1582 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1583 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1584 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1585 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1586 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1587 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1588 ; GCN-NEXT: v_readlane_b32 s33, v40, 32
1589 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1590 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1591 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1592 ; GCN-NEXT: s_waitcnt vmcnt(0)
1593 ; GCN-NEXT: s_setpc_b64 s[30:31]
1595 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1597 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1598 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1599 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1600 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1601 ; GISEL-NEXT: v_writelane_b32 v40, s33, 32
1602 ; GISEL-NEXT: s_mov_b32 s33, s32
1603 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1604 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1605 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1606 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1607 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1608 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1609 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1610 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1611 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1612 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1613 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1614 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1615 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1616 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1617 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1618 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1619 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1620 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1621 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1622 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1623 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1624 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1625 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1626 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1627 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1628 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1629 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1630 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1631 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1632 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1633 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1634 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1635 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1636 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1637 ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1638 ; GISEL-NEXT: v_readfirstlane_b32 s8, v1
1639 ; GISEL-NEXT: v_readfirstlane_b32 s9, v2
1640 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1641 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
1642 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
1643 ; GISEL-NEXT: v_mov_b32_e32 v2, v0
1644 ; GISEL-NEXT: ; implicit-def: $vgpr1
1645 ; GISEL-NEXT: ; implicit-def: $vgpr0
1646 ; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7]
1647 ; GISEL-NEXT: s_cbranch_execnz .LBB8_1
1648 ; GISEL-NEXT: ; %bb.2:
1649 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1650 ; GISEL-NEXT: v_mov_b32_e32 v0, v2
1651 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1652 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1653 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1654 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1655 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1656 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1657 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1658 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1659 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1660 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1661 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1662 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1663 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1664 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1665 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1666 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1667 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1668 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1669 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1670 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1671 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1672 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1673 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1674 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1675 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1676 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1677 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1678 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1679 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1680 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1681 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1682 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1683 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1684 ; GISEL-NEXT: v_readlane_b32 s33, v40, 32
1685 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1686 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1687 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1688 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1689 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1690 %ret = call amdgpu_gfx i32 %fptr(i32 %i)
1694 ; Calling a vgpr can never be a tail call.
1695 define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
1696 ; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
1698 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1699 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1700 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1701 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1702 ; GCN-NEXT: v_writelane_b32 v40, s33, 32
1703 ; GCN-NEXT: s_mov_b32 s33, s32
1704 ; GCN-NEXT: s_addk_i32 s32, 0x400
1705 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1706 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1707 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1708 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1709 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1710 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1711 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1712 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1713 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1714 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1715 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1716 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1717 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1718 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1719 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1720 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1721 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1722 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1723 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1724 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1725 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1726 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1727 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1728 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1729 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1730 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1731 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1732 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1733 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1734 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1735 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1736 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1737 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1738 ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
1739 ; GCN-NEXT: v_readfirstlane_b32 s6, v0
1740 ; GCN-NEXT: v_readfirstlane_b32 s7, v1
1741 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1742 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
1743 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
1744 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
1745 ; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
1746 ; GCN-NEXT: s_cbranch_execnz .LBB9_1
1747 ; GCN-NEXT: ; %bb.2:
1748 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1749 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1750 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1751 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1752 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1753 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1754 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1755 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1756 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1757 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1758 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1759 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1760 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1761 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1762 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1763 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1764 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1765 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1766 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1767 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1768 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1769 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1770 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1771 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1772 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1773 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1774 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1775 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1776 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1777 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1778 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1779 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1780 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1781 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1782 ; GCN-NEXT: v_readlane_b32 s33, v40, 32
1783 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1784 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1785 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1786 ; GCN-NEXT: s_waitcnt vmcnt(0)
1787 ; GCN-NEXT: s_setpc_b64 s[30:31]
1789 ; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
1791 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1792 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1793 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1794 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1795 ; GISEL-NEXT: v_writelane_b32 v40, s33, 32
1796 ; GISEL-NEXT: s_mov_b32 s33, s32
1797 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1798 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1799 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1800 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1801 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1802 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1803 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1804 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1805 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1806 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1807 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1808 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1809 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1810 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1811 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1812 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1813 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1814 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1815 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1816 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1817 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1818 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1819 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1820 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1821 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1822 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1823 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1824 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1825 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1826 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1827 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1828 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1829 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1830 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1831 ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
1832 ; GISEL-NEXT: v_readfirstlane_b32 s6, v0
1833 ; GISEL-NEXT: v_readfirstlane_b32 s7, v1
1834 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1835 ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
1836 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
1837 ; GISEL-NEXT: ; implicit-def: $vgpr0
1838 ; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
1839 ; GISEL-NEXT: s_cbranch_execnz .LBB9_1
1840 ; GISEL-NEXT: ; %bb.2:
1841 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1842 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1843 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1844 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1845 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1846 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1847 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1848 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1849 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1850 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1851 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1852 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1853 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1854 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1855 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1856 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1857 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1858 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1859 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1860 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1861 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1862 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1863 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1864 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1865 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1866 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1867 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1868 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1869 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1870 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1871 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1872 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1873 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1874 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1875 ; GISEL-NEXT: v_readlane_b32 s33, v40, 32
1876 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1877 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1878 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1879 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1880 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1881 tail call amdgpu_gfx void %fptr()