1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
5 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
6 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
8 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
9 ; GCN-LABEL: test_indirect_call_sgpr_ptr:
11 ; GCN-NEXT: s_mov_b32 s32, 0
12 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
13 ; GCN-NEXT: s_add_i32 s12, s12, s17
14 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
15 ; GCN-NEXT: s_add_u32 s0, s0, s17
16 ; GCN-NEXT: s_addc_u32 s1, s1, 0
17 ; GCN-NEXT: s_mov_b32 s13, s15
18 ; GCN-NEXT: s_mov_b32 s12, s14
19 ; GCN-NEXT: s_getpc_b64 s[14:15]
20 ; GCN-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
21 ; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
22 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
23 ; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
24 ; GCN-NEXT: s_add_u32 s8, s8, 8
25 ; GCN-NEXT: s_addc_u32 s9, s9, 0
26 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
27 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1
28 ; GCN-NEXT: v_or_b32_e32 v31, v0, v2
29 ; GCN-NEXT: s_mov_b32 s14, s16
30 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
31 ; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
34 ; GISEL-LABEL: test_indirect_call_sgpr_ptr:
36 ; GISEL-NEXT: s_mov_b32 s32, 0
37 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
38 ; GISEL-NEXT: s_add_i32 s12, s12, s17
39 ; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
40 ; GISEL-NEXT: s_add_u32 s0, s0, s17
41 ; GISEL-NEXT: s_addc_u32 s1, s1, 0
42 ; GISEL-NEXT: s_mov_b32 s13, s15
43 ; GISEL-NEXT: s_mov_b32 s12, s14
44 ; GISEL-NEXT: s_getpc_b64 s[14:15]
45 ; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
46 ; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
47 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
48 ; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
49 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
50 ; GISEL-NEXT: s_add_u32 s8, s8, 8
51 ; GISEL-NEXT: s_addc_u32 s9, s9, 0
52 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
53 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
54 ; GISEL-NEXT: s_mov_b32 s14, s16
55 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
56 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
57 ; GISEL-NEXT: s_endpgm
58 %fptr = load ptr, ptr addrspace(4) @gv.fptr0
63 define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
64 ; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
66 ; GCN-NEXT: s_mov_b32 s32, 0
67 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
68 ; GCN-NEXT: s_add_i32 s12, s12, s17
69 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
70 ; GCN-NEXT: s_add_u32 s0, s0, s17
71 ; GCN-NEXT: s_addc_u32 s1, s1, 0
72 ; GCN-NEXT: s_mov_b32 s13, s15
73 ; GCN-NEXT: s_mov_b32 s12, s14
74 ; GCN-NEXT: s_getpc_b64 s[14:15]
75 ; GCN-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
76 ; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
77 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
78 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
79 ; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
80 ; GCN-NEXT: s_add_u32 s8, s8, 8
81 ; GCN-NEXT: s_addc_u32 s9, s9, 0
82 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1
83 ; GCN-NEXT: v_or_b32_e32 v31, v0, v2
84 ; GCN-NEXT: v_mov_b32_e32 v0, 0x7b
85 ; GCN-NEXT: s_mov_b32 s14, s16
86 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
87 ; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
90 ; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
92 ; GISEL-NEXT: s_mov_b32 s32, 0
93 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
94 ; GISEL-NEXT: s_add_i32 s12, s12, s17
95 ; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
96 ; GISEL-NEXT: s_add_u32 s0, s0, s17
97 ; GISEL-NEXT: s_addc_u32 s1, s1, 0
98 ; GISEL-NEXT: s_mov_b32 s13, s15
99 ; GISEL-NEXT: s_mov_b32 s12, s14
100 ; GISEL-NEXT: s_getpc_b64 s[14:15]
101 ; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
102 ; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
103 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
104 ; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
105 ; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
106 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
107 ; GISEL-NEXT: s_add_u32 s8, s8, 8
108 ; GISEL-NEXT: s_addc_u32 s9, s9, 0
109 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
110 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
111 ; GISEL-NEXT: s_mov_b32 s14, s16
112 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
113 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
114 ; GISEL-NEXT: s_endpgm
115 %fptr = load ptr, ptr addrspace(4) @gv.fptr1
116 call void %fptr(i32 123)
120 define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
121 ; GCN-LABEL: test_indirect_call_vgpr_ptr:
123 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GCN-NEXT: s_mov_b32 s16, s33
125 ; GCN-NEXT: s_mov_b32 s33, s32
126 ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1
127 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
128 ; GCN-NEXT: s_mov_b64 exec, s[18:19]
129 ; GCN-NEXT: v_writelane_b32 v40, s16, 18
130 ; GCN-NEXT: s_addk_i32 s32, 0x400
131 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
132 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
133 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
134 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
135 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
136 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
137 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
138 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
139 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
140 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
141 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
142 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
143 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
144 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
145 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
146 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
147 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
148 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
149 ; GCN-NEXT: s_mov_b32 s42, s15
150 ; GCN-NEXT: s_mov_b32 s43, s14
151 ; GCN-NEXT: s_mov_b32 s44, s13
152 ; GCN-NEXT: s_mov_b32 s45, s12
153 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
154 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
155 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
156 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
157 ; GCN-NEXT: s_mov_b64 s[46:47], exec
158 ; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
159 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
160 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
161 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
162 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
163 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
164 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
165 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
166 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
167 ; GCN-NEXT: s_mov_b32 s12, s45
168 ; GCN-NEXT: s_mov_b32 s13, s44
169 ; GCN-NEXT: s_mov_b32 s14, s43
170 ; GCN-NEXT: s_mov_b32 s15, s42
171 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
172 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
173 ; GCN-NEXT: ; implicit-def: $vgpr31
174 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
175 ; GCN-NEXT: s_cbranch_execnz .LBB2_1
177 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
178 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
179 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
180 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
181 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
182 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
183 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
184 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
185 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
186 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
187 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
188 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
189 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
190 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
191 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
192 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
193 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
194 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
195 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
196 ; GCN-NEXT: v_readlane_b32 s4, v40, 18
197 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
198 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
199 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
200 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
201 ; GCN-NEXT: s_mov_b32 s33, s4
202 ; GCN-NEXT: s_waitcnt vmcnt(0)
203 ; GCN-NEXT: s_setpc_b64 s[30:31]
205 ; GISEL-LABEL: test_indirect_call_vgpr_ptr:
207 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208 ; GISEL-NEXT: s_mov_b32 s16, s33
209 ; GISEL-NEXT: s_mov_b32 s33, s32
210 ; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1
211 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
212 ; GISEL-NEXT: s_mov_b64 exec, s[18:19]
213 ; GISEL-NEXT: v_writelane_b32 v40, s16, 18
214 ; GISEL-NEXT: s_addk_i32 s32, 0x400
215 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
216 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
217 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
218 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
219 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
220 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
221 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
222 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
223 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
224 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
225 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
226 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
227 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
228 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
229 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
230 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
231 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
232 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
233 ; GISEL-NEXT: s_mov_b32 s42, s15
234 ; GISEL-NEXT: s_mov_b32 s43, s14
235 ; GISEL-NEXT: s_mov_b32 s44, s13
236 ; GISEL-NEXT: s_mov_b32 s45, s12
237 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
238 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
239 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
240 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
241 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
242 ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
243 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
244 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
245 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
246 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
247 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
248 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
249 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
250 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
251 ; GISEL-NEXT: s_mov_b32 s12, s45
252 ; GISEL-NEXT: s_mov_b32 s13, s44
253 ; GISEL-NEXT: s_mov_b32 s14, s43
254 ; GISEL-NEXT: s_mov_b32 s15, s42
255 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
256 ; GISEL-NEXT: ; implicit-def: $vgpr0
257 ; GISEL-NEXT: ; implicit-def: $vgpr31
258 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
259 ; GISEL-NEXT: s_cbranch_execnz .LBB2_1
260 ; GISEL-NEXT: ; %bb.2:
261 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
262 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
263 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
264 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
265 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
266 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
267 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
268 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
269 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
270 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
271 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
272 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
273 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
274 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
275 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
276 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
277 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
278 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
279 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
280 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18
281 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
282 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
283 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
284 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
285 ; GISEL-NEXT: s_mov_b32 s33, s4
286 ; GISEL-NEXT: s_waitcnt vmcnt(0)
287 ; GISEL-NEXT: s_setpc_b64 s[30:31]
292 define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
293 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
295 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GCN-NEXT: s_mov_b32 s16, s33
297 ; GCN-NEXT: s_mov_b32 s33, s32
298 ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1
299 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
300 ; GCN-NEXT: s_mov_b64 exec, s[18:19]
301 ; GCN-NEXT: v_writelane_b32 v40, s16, 18
302 ; GCN-NEXT: s_addk_i32 s32, 0x400
303 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
304 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
305 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
306 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
307 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
308 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
309 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
310 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
311 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
312 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
313 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
314 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
315 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
316 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
317 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
318 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
319 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
320 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
321 ; GCN-NEXT: s_mov_b32 s42, s15
322 ; GCN-NEXT: s_mov_b32 s43, s14
323 ; GCN-NEXT: s_mov_b32 s44, s13
324 ; GCN-NEXT: s_mov_b32 s45, s12
325 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
326 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
327 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
328 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
329 ; GCN-NEXT: s_mov_b64 s[46:47], exec
330 ; GCN-NEXT: v_mov_b32_e32 v2, 0x7b
331 ; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
332 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
333 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
334 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
335 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
336 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
337 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
338 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
339 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
340 ; GCN-NEXT: s_mov_b32 s12, s45
341 ; GCN-NEXT: s_mov_b32 s13, s44
342 ; GCN-NEXT: s_mov_b32 s14, s43
343 ; GCN-NEXT: s_mov_b32 s15, s42
344 ; GCN-NEXT: v_mov_b32_e32 v0, v2
345 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
346 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
347 ; GCN-NEXT: ; implicit-def: $vgpr31
348 ; GCN-NEXT: ; implicit-def: $vgpr2
349 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
350 ; GCN-NEXT: s_cbranch_execnz .LBB3_1
352 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
353 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
354 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
355 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
356 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
357 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
358 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
359 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
360 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
361 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
362 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
363 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
364 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
365 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
366 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
367 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
368 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
369 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
370 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
371 ; GCN-NEXT: v_readlane_b32 s4, v40, 18
372 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
373 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
374 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
375 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
376 ; GCN-NEXT: s_mov_b32 s33, s4
377 ; GCN-NEXT: s_waitcnt vmcnt(0)
378 ; GCN-NEXT: s_setpc_b64 s[30:31]
380 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
382 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383 ; GISEL-NEXT: s_mov_b32 s16, s33
384 ; GISEL-NEXT: s_mov_b32 s33, s32
385 ; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1
386 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
387 ; GISEL-NEXT: s_mov_b64 exec, s[18:19]
388 ; GISEL-NEXT: v_writelane_b32 v40, s16, 18
389 ; GISEL-NEXT: s_addk_i32 s32, 0x400
390 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
391 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
392 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
393 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
394 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
395 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
396 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
397 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
398 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
399 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
400 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
401 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
402 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
403 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
404 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
405 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
406 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
407 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
408 ; GISEL-NEXT: s_mov_b32 s42, s15
409 ; GISEL-NEXT: s_mov_b32 s43, s14
410 ; GISEL-NEXT: s_mov_b32 s44, s13
411 ; GISEL-NEXT: s_mov_b32 s45, s12
412 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
413 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
414 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
415 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
416 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
417 ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
418 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
419 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
420 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
421 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
422 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
423 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
424 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
425 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
426 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
427 ; GISEL-NEXT: s_mov_b32 s12, s45
428 ; GISEL-NEXT: s_mov_b32 s13, s44
429 ; GISEL-NEXT: s_mov_b32 s14, s43
430 ; GISEL-NEXT: s_mov_b32 s15, s42
431 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
432 ; GISEL-NEXT: ; implicit-def: $vgpr0
433 ; GISEL-NEXT: ; implicit-def: $vgpr31
434 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
435 ; GISEL-NEXT: s_cbranch_execnz .LBB3_1
436 ; GISEL-NEXT: ; %bb.2:
437 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
438 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
439 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
440 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
441 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
442 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
443 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
444 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
445 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
446 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
447 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
448 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
449 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
450 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
451 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
452 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
453 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
454 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
455 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
456 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18
457 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
458 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
459 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
460 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
461 ; GISEL-NEXT: s_mov_b32 s33, s4
462 ; GISEL-NEXT: s_waitcnt vmcnt(0)
463 ; GISEL-NEXT: s_setpc_b64 s[30:31]
464 call void %fptr(i32 123)
468 define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
469 ; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
471 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472 ; GCN-NEXT: s_mov_b32 s16, s33
473 ; GCN-NEXT: s_mov_b32 s33, s32
474 ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1
475 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
476 ; GCN-NEXT: s_mov_b64 exec, s[18:19]
477 ; GCN-NEXT: v_writelane_b32 v40, s16, 18
478 ; GCN-NEXT: s_addk_i32 s32, 0x400
479 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
480 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
481 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
482 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
483 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
484 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
485 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
486 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
487 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
488 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
489 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
490 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
491 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
492 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
493 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
494 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
495 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
496 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
497 ; GCN-NEXT: s_mov_b32 s42, s15
498 ; GCN-NEXT: s_mov_b32 s43, s14
499 ; GCN-NEXT: s_mov_b32 s44, s13
500 ; GCN-NEXT: s_mov_b32 s45, s12
501 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
502 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
503 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
504 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
505 ; GCN-NEXT: s_mov_b64 s[46:47], exec
506 ; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
507 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
508 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
509 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
510 ; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
511 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
512 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
513 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
514 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
515 ; GCN-NEXT: s_mov_b32 s12, s45
516 ; GCN-NEXT: s_mov_b32 s13, s44
517 ; GCN-NEXT: s_mov_b32 s14, s43
518 ; GCN-NEXT: s_mov_b32 s15, s42
519 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
520 ; GCN-NEXT: v_mov_b32_e32 v2, v0
521 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
522 ; GCN-NEXT: ; implicit-def: $vgpr31
523 ; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
524 ; GCN-NEXT: s_cbranch_execnz .LBB4_1
526 ; GCN-NEXT: s_mov_b64 exec, s[46:47]
527 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2
528 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
529 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
530 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
531 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
532 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
533 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
534 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
535 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
536 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
537 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
538 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
539 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
540 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
541 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
542 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
543 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
544 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
545 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
546 ; GCN-NEXT: v_readlane_b32 s4, v40, 18
547 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
548 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
549 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
550 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
551 ; GCN-NEXT: s_mov_b32 s33, s4
552 ; GCN-NEXT: s_waitcnt vmcnt(0)
553 ; GCN-NEXT: s_setpc_b64 s[30:31]
555 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
557 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558 ; GISEL-NEXT: s_mov_b32 s16, s33
559 ; GISEL-NEXT: s_mov_b32 s33, s32
560 ; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1
561 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
562 ; GISEL-NEXT: s_mov_b64 exec, s[18:19]
563 ; GISEL-NEXT: v_writelane_b32 v40, s16, 18
564 ; GISEL-NEXT: s_addk_i32 s32, 0x400
565 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
566 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
567 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
568 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
569 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
570 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
571 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
572 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
573 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
574 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
575 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
576 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
577 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
578 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
579 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
580 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
581 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
582 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
583 ; GISEL-NEXT: s_mov_b32 s42, s15
584 ; GISEL-NEXT: s_mov_b32 s43, s14
585 ; GISEL-NEXT: s_mov_b32 s44, s13
586 ; GISEL-NEXT: s_mov_b32 s45, s12
587 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
588 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
589 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
590 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
591 ; GISEL-NEXT: s_mov_b64 s[46:47], exec
592 ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
593 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
594 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
595 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
596 ; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
597 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
598 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
599 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
600 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
601 ; GISEL-NEXT: s_mov_b32 s12, s45
602 ; GISEL-NEXT: s_mov_b32 s13, s44
603 ; GISEL-NEXT: s_mov_b32 s14, s43
604 ; GISEL-NEXT: s_mov_b32 s15, s42
605 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
606 ; GISEL-NEXT: v_mov_b32_e32 v1, v0
607 ; GISEL-NEXT: ; implicit-def: $vgpr0
608 ; GISEL-NEXT: ; implicit-def: $vgpr31
609 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
610 ; GISEL-NEXT: s_cbranch_execnz .LBB4_1
611 ; GISEL-NEXT: ; %bb.2:
612 ; GISEL-NEXT: s_mov_b64 exec, s[46:47]
613 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1
614 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
615 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
616 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
617 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
618 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
619 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
620 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
621 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
622 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
623 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
624 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
625 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
626 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
627 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
628 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
629 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
630 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
631 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
632 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18
633 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
634 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
635 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
636 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
637 ; GISEL-NEXT: s_mov_b32 s33, s4
638 ; GISEL-NEXT: s_waitcnt vmcnt(0)
639 ; GISEL-NEXT: s_setpc_b64 s[30:31]
640 %a = call i32 %fptr()
645 define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
646 ; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
647 ; GCN: ; %bb.0: ; %bb0
648 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649 ; GCN-NEXT: s_mov_b32 s16, s33
650 ; GCN-NEXT: s_mov_b32 s33, s32
651 ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1
652 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
653 ; GCN-NEXT: s_mov_b64 exec, s[18:19]
654 ; GCN-NEXT: v_writelane_b32 v40, s16, 20
655 ; GCN-NEXT: s_addk_i32 s32, 0x400
656 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
657 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
658 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
659 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
660 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
661 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
662 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
663 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
664 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
665 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
666 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
667 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
668 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
669 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
670 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
671 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
672 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
673 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
674 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
675 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
676 ; GCN-NEXT: s_mov_b32 s42, s15
677 ; GCN-NEXT: s_mov_b32 s43, s14
678 ; GCN-NEXT: s_mov_b32 s44, s13
679 ; GCN-NEXT: s_mov_b32 s45, s12
680 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
681 ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
682 ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
683 ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
684 ; GCN-NEXT: v_and_b32_e32 v2, 1, v2
685 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
686 ; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc
687 ; GCN-NEXT: s_cbranch_execz .LBB5_4
688 ; GCN-NEXT: ; %bb.1: ; %bb1
689 ; GCN-NEXT: s_mov_b64 s[48:49], exec
690 ; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
691 ; GCN-NEXT: v_readfirstlane_b32 s16, v0
692 ; GCN-NEXT: v_readfirstlane_b32 s17, v1
693 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
694 ; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc
695 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
696 ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
697 ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
698 ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
699 ; GCN-NEXT: s_mov_b32 s12, s45
700 ; GCN-NEXT: s_mov_b32 s13, s44
701 ; GCN-NEXT: s_mov_b32 s14, s43
702 ; GCN-NEXT: s_mov_b32 s15, s42
703 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
704 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
705 ; GCN-NEXT: ; implicit-def: $vgpr31
706 ; GCN-NEXT: s_xor_b64 exec, exec, s[50:51]
707 ; GCN-NEXT: s_cbranch_execnz .LBB5_2
709 ; GCN-NEXT: s_mov_b64 exec, s[48:49]
710 ; GCN-NEXT: .LBB5_4: ; %bb2
711 ; GCN-NEXT: s_or_b64 exec, exec, s[46:47]
712 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
713 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
714 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
715 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
716 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
717 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
718 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
719 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
720 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
721 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
722 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
723 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
724 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
725 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
726 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
727 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
728 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
729 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
730 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
731 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
732 ; GCN-NEXT: v_readlane_b32 s4, v40, 20
733 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
734 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
735 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
736 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
737 ; GCN-NEXT: s_mov_b32 s33, s4
738 ; GCN-NEXT: s_waitcnt vmcnt(0)
739 ; GCN-NEXT: s_setpc_b64 s[30:31]
741 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
742 ; GISEL: ; %bb.0: ; %bb0
743 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744 ; GISEL-NEXT: s_mov_b32 s16, s33
745 ; GISEL-NEXT: s_mov_b32 s33, s32
746 ; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1
747 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
748 ; GISEL-NEXT: s_mov_b64 exec, s[18:19]
749 ; GISEL-NEXT: v_writelane_b32 v40, s16, 20
750 ; GISEL-NEXT: s_addk_i32 s32, 0x400
751 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
752 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
753 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
754 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
755 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
756 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
757 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
758 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
759 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
760 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
761 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
762 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
763 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
764 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
765 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
766 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
767 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
768 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
769 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
770 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
771 ; GISEL-NEXT: s_mov_b32 s42, s15
772 ; GISEL-NEXT: s_mov_b32 s43, s14
773 ; GISEL-NEXT: s_mov_b32 s44, s13
774 ; GISEL-NEXT: s_mov_b32 s45, s12
775 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
776 ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
777 ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
778 ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
779 ; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
780 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
781 ; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc
782 ; GISEL-NEXT: s_cbranch_execz .LBB5_4
783 ; GISEL-NEXT: ; %bb.1: ; %bb1
784 ; GISEL-NEXT: s_mov_b64 s[48:49], exec
785 ; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
786 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0
787 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1
788 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
789 ; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc
790 ; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
791 ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
792 ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
793 ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
794 ; GISEL-NEXT: s_mov_b32 s12, s45
795 ; GISEL-NEXT: s_mov_b32 s13, s44
796 ; GISEL-NEXT: s_mov_b32 s14, s43
797 ; GISEL-NEXT: s_mov_b32 s15, s42
798 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
799 ; GISEL-NEXT: ; implicit-def: $vgpr0
800 ; GISEL-NEXT: ; implicit-def: $vgpr31
801 ; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51]
802 ; GISEL-NEXT: s_cbranch_execnz .LBB5_2
803 ; GISEL-NEXT: ; %bb.3:
804 ; GISEL-NEXT: s_mov_b64 exec, s[48:49]
805 ; GISEL-NEXT: .LBB5_4: ; %bb2
806 ; GISEL-NEXT: s_or_b64 exec, exec, s[46:47]
807 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
808 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
809 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
810 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
811 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
812 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
813 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
814 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
815 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
816 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
817 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
818 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
819 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
820 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
821 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
822 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
823 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
824 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
825 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
826 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
827 ; GISEL-NEXT: v_readlane_b32 s4, v40, 20
828 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
829 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
830 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
831 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
832 ; GISEL-NEXT: s_mov_b32 s33, s4
833 ; GISEL-NEXT: s_waitcnt vmcnt(0)
834 ; GISEL-NEXT: s_setpc_b64 s[30:31]
836 br i1 %cond, label %bb1, label %bb2
846 define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
847 ; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
849 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850 ; GCN-NEXT: s_mov_b32 s5, s33
851 ; GCN-NEXT: s_mov_b32 s33, s32
852 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
853 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
854 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
855 ; GCN-NEXT: s_addk_i32 s32, 0x400
856 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
857 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
858 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
859 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
860 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
861 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
862 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
863 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
864 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
865 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
866 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
867 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
868 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
869 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
870 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
871 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
872 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
873 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
874 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
875 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
876 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
877 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
878 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
879 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
880 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
881 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
882 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
883 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
884 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
885 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
886 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
887 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
888 ; GCN-NEXT: s_mov_b64 s[6:7], exec
889 ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
890 ; GCN-NEXT: v_readfirstlane_b32 s8, v0
891 ; GCN-NEXT: v_readfirstlane_b32 s9, v1
892 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
893 ; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc
894 ; GCN-NEXT: s_movk_i32 s4, 0x7b
895 ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
896 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
897 ; GCN-NEXT: s_xor_b64 exec, exec, s[10:11]
898 ; GCN-NEXT: s_cbranch_execnz .LBB6_1
900 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
901 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
902 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
903 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
904 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
905 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
906 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
907 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
908 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
909 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
910 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
911 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
912 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
913 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
914 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
915 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
916 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
917 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
918 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
919 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
920 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
921 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
922 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
923 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
924 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
925 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
926 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
927 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
928 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
929 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
930 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
931 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
932 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
933 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
934 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
935 ; GCN-NEXT: s_mov_b64 exec, s[6:7]
936 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
937 ; GCN-NEXT: s_mov_b32 s33, s5
938 ; GCN-NEXT: s_waitcnt vmcnt(0)
939 ; GCN-NEXT: s_setpc_b64 s[30:31]
941 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
943 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944 ; GISEL-NEXT: s_mov_b32 s5, s33
945 ; GISEL-NEXT: s_mov_b32 s33, s32
946 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
947 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
948 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
949 ; GISEL-NEXT: s_addk_i32 s32, 0x400
950 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
951 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
952 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
953 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
954 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
955 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
956 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
957 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
958 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
959 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
960 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
961 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
962 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
963 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
964 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
965 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
966 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
967 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
968 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
969 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
970 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
971 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
972 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
973 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
974 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
975 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
976 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
977 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
978 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
979 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
980 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
981 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
982 ; GISEL-NEXT: s_mov_b64 s[6:7], exec
983 ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
984 ; GISEL-NEXT: v_readfirstlane_b32 s8, v0
985 ; GISEL-NEXT: v_readfirstlane_b32 s9, v1
986 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
987 ; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc
988 ; GISEL-NEXT: s_movk_i32 s4, 0x7b
989 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
990 ; GISEL-NEXT: ; implicit-def: $vgpr0
991 ; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
992 ; GISEL-NEXT: s_cbranch_execnz .LBB6_1
993 ; GISEL-NEXT: ; %bb.2:
994 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
995 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
996 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
997 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
998 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
999 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1000 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1001 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1002 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1003 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1004 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1005 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1006 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1007 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1008 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1009 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1010 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1011 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1012 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1013 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1014 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1015 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1016 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1017 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1018 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1019 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1020 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1021 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1022 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1023 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1024 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1025 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1026 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1027 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
1028 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1029 ; GISEL-NEXT: s_mov_b64 exec, s[6:7]
1030 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1031 ; GISEL-NEXT: s_mov_b32 s33, s5
1032 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1033 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1034 call amdgpu_gfx void %fptr(i32 inreg 123)
1038 define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
1039 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1041 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1042 ; GCN-NEXT: s_mov_b32 s10, s33
1043 ; GCN-NEXT: s_mov_b32 s33, s32
1044 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1045 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
1046 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1047 ; GCN-NEXT: s_addk_i32 s32, 0x400
1048 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1049 ; GCN-NEXT: v_writelane_b32 v41, s30, 0
1050 ; GCN-NEXT: v_writelane_b32 v41, s31, 1
1051 ; GCN-NEXT: v_writelane_b32 v41, s34, 2
1052 ; GCN-NEXT: v_writelane_b32 v41, s35, 3
1053 ; GCN-NEXT: v_writelane_b32 v41, s36, 4
1054 ; GCN-NEXT: v_writelane_b32 v41, s37, 5
1055 ; GCN-NEXT: v_writelane_b32 v41, s38, 6
1056 ; GCN-NEXT: v_writelane_b32 v41, s39, 7
1057 ; GCN-NEXT: v_writelane_b32 v41, s40, 8
1058 ; GCN-NEXT: v_writelane_b32 v41, s41, 9
1059 ; GCN-NEXT: v_writelane_b32 v41, s42, 10
1060 ; GCN-NEXT: v_writelane_b32 v41, s43, 11
1061 ; GCN-NEXT: v_writelane_b32 v41, s44, 12
1062 ; GCN-NEXT: v_writelane_b32 v41, s45, 13
1063 ; GCN-NEXT: v_writelane_b32 v41, s46, 14
1064 ; GCN-NEXT: v_writelane_b32 v41, s47, 15
1065 ; GCN-NEXT: v_writelane_b32 v41, s48, 16
1066 ; GCN-NEXT: v_writelane_b32 v41, s49, 17
1067 ; GCN-NEXT: v_writelane_b32 v41, s50, 18
1068 ; GCN-NEXT: v_writelane_b32 v41, s51, 19
1069 ; GCN-NEXT: v_writelane_b32 v41, s52, 20
1070 ; GCN-NEXT: v_writelane_b32 v41, s53, 21
1071 ; GCN-NEXT: v_writelane_b32 v41, s54, 22
1072 ; GCN-NEXT: v_writelane_b32 v41, s55, 23
1073 ; GCN-NEXT: v_writelane_b32 v41, s56, 24
1074 ; GCN-NEXT: v_writelane_b32 v41, s57, 25
1075 ; GCN-NEXT: v_writelane_b32 v41, s58, 26
1076 ; GCN-NEXT: v_writelane_b32 v41, s59, 27
1077 ; GCN-NEXT: v_writelane_b32 v41, s60, 28
1078 ; GCN-NEXT: v_writelane_b32 v41, s61, 29
1079 ; GCN-NEXT: v_writelane_b32 v41, s62, 30
1080 ; GCN-NEXT: v_writelane_b32 v41, s63, 31
1081 ; GCN-NEXT: v_mov_b32_e32 v40, v0
1082 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1083 ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
1084 ; GCN-NEXT: v_readfirstlane_b32 s6, v1
1085 ; GCN-NEXT: v_readfirstlane_b32 s7, v2
1086 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1087 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
1088 ; GCN-NEXT: v_mov_b32_e32 v0, v40
1089 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
1090 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
1091 ; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
1092 ; GCN-NEXT: s_cbranch_execnz .LBB7_1
1093 ; GCN-NEXT: ; %bb.2:
1094 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1095 ; GCN-NEXT: v_mov_b32_e32 v0, v40
1096 ; GCN-NEXT: v_readlane_b32 s63, v41, 31
1097 ; GCN-NEXT: v_readlane_b32 s62, v41, 30
1098 ; GCN-NEXT: v_readlane_b32 s61, v41, 29
1099 ; GCN-NEXT: v_readlane_b32 s60, v41, 28
1100 ; GCN-NEXT: v_readlane_b32 s59, v41, 27
1101 ; GCN-NEXT: v_readlane_b32 s58, v41, 26
1102 ; GCN-NEXT: v_readlane_b32 s57, v41, 25
1103 ; GCN-NEXT: v_readlane_b32 s56, v41, 24
1104 ; GCN-NEXT: v_readlane_b32 s55, v41, 23
1105 ; GCN-NEXT: v_readlane_b32 s54, v41, 22
1106 ; GCN-NEXT: v_readlane_b32 s53, v41, 21
1107 ; GCN-NEXT: v_readlane_b32 s52, v41, 20
1108 ; GCN-NEXT: v_readlane_b32 s51, v41, 19
1109 ; GCN-NEXT: v_readlane_b32 s50, v41, 18
1110 ; GCN-NEXT: v_readlane_b32 s49, v41, 17
1111 ; GCN-NEXT: v_readlane_b32 s48, v41, 16
1112 ; GCN-NEXT: v_readlane_b32 s47, v41, 15
1113 ; GCN-NEXT: v_readlane_b32 s46, v41, 14
1114 ; GCN-NEXT: v_readlane_b32 s45, v41, 13
1115 ; GCN-NEXT: v_readlane_b32 s44, v41, 12
1116 ; GCN-NEXT: v_readlane_b32 s43, v41, 11
1117 ; GCN-NEXT: v_readlane_b32 s42, v41, 10
1118 ; GCN-NEXT: v_readlane_b32 s41, v41, 9
1119 ; GCN-NEXT: v_readlane_b32 s40, v41, 8
1120 ; GCN-NEXT: v_readlane_b32 s39, v41, 7
1121 ; GCN-NEXT: v_readlane_b32 s38, v41, 6
1122 ; GCN-NEXT: v_readlane_b32 s37, v41, 5
1123 ; GCN-NEXT: v_readlane_b32 s36, v41, 4
1124 ; GCN-NEXT: v_readlane_b32 s35, v41, 3
1125 ; GCN-NEXT: v_readlane_b32 s34, v41, 2
1126 ; GCN-NEXT: v_readlane_b32 s31, v41, 1
1127 ; GCN-NEXT: v_readlane_b32 s30, v41, 0
1128 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1129 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1130 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
1131 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1132 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1133 ; GCN-NEXT: s_mov_b32 s33, s10
1134 ; GCN-NEXT: s_waitcnt vmcnt(0)
1135 ; GCN-NEXT: s_setpc_b64 s[30:31]
1137 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1139 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1140 ; GISEL-NEXT: s_mov_b32 s10, s33
1141 ; GISEL-NEXT: s_mov_b32 s33, s32
1142 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1143 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
1144 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1145 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1146 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1147 ; GISEL-NEXT: v_writelane_b32 v41, s30, 0
1148 ; GISEL-NEXT: v_writelane_b32 v41, s31, 1
1149 ; GISEL-NEXT: v_writelane_b32 v41, s34, 2
1150 ; GISEL-NEXT: v_writelane_b32 v41, s35, 3
1151 ; GISEL-NEXT: v_writelane_b32 v41, s36, 4
1152 ; GISEL-NEXT: v_writelane_b32 v41, s37, 5
1153 ; GISEL-NEXT: v_writelane_b32 v41, s38, 6
1154 ; GISEL-NEXT: v_writelane_b32 v41, s39, 7
1155 ; GISEL-NEXT: v_writelane_b32 v41, s40, 8
1156 ; GISEL-NEXT: v_writelane_b32 v41, s41, 9
1157 ; GISEL-NEXT: v_writelane_b32 v41, s42, 10
1158 ; GISEL-NEXT: v_writelane_b32 v41, s43, 11
1159 ; GISEL-NEXT: v_writelane_b32 v41, s44, 12
1160 ; GISEL-NEXT: v_writelane_b32 v41, s45, 13
1161 ; GISEL-NEXT: v_writelane_b32 v41, s46, 14
1162 ; GISEL-NEXT: v_writelane_b32 v41, s47, 15
1163 ; GISEL-NEXT: v_writelane_b32 v41, s48, 16
1164 ; GISEL-NEXT: v_writelane_b32 v41, s49, 17
1165 ; GISEL-NEXT: v_writelane_b32 v41, s50, 18
1166 ; GISEL-NEXT: v_writelane_b32 v41, s51, 19
1167 ; GISEL-NEXT: v_writelane_b32 v41, s52, 20
1168 ; GISEL-NEXT: v_writelane_b32 v41, s53, 21
1169 ; GISEL-NEXT: v_writelane_b32 v41, s54, 22
1170 ; GISEL-NEXT: v_writelane_b32 v41, s55, 23
1171 ; GISEL-NEXT: v_writelane_b32 v41, s56, 24
1172 ; GISEL-NEXT: v_writelane_b32 v41, s57, 25
1173 ; GISEL-NEXT: v_writelane_b32 v41, s58, 26
1174 ; GISEL-NEXT: v_writelane_b32 v41, s59, 27
1175 ; GISEL-NEXT: v_writelane_b32 v41, s60, 28
1176 ; GISEL-NEXT: v_writelane_b32 v41, s61, 29
1177 ; GISEL-NEXT: v_writelane_b32 v41, s62, 30
1178 ; GISEL-NEXT: v_writelane_b32 v41, s63, 31
1179 ; GISEL-NEXT: v_mov_b32_e32 v40, v0
1180 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1181 ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
1182 ; GISEL-NEXT: v_readfirstlane_b32 s6, v1
1183 ; GISEL-NEXT: v_readfirstlane_b32 s7, v2
1184 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1185 ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
1186 ; GISEL-NEXT: v_mov_b32_e32 v0, v40
1187 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
1188 ; GISEL-NEXT: ; implicit-def: $vgpr1
1189 ; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
1190 ; GISEL-NEXT: s_cbranch_execnz .LBB7_1
1191 ; GISEL-NEXT: ; %bb.2:
1192 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1193 ; GISEL-NEXT: v_mov_b32_e32 v0, v40
1194 ; GISEL-NEXT: v_readlane_b32 s63, v41, 31
1195 ; GISEL-NEXT: v_readlane_b32 s62, v41, 30
1196 ; GISEL-NEXT: v_readlane_b32 s61, v41, 29
1197 ; GISEL-NEXT: v_readlane_b32 s60, v41, 28
1198 ; GISEL-NEXT: v_readlane_b32 s59, v41, 27
1199 ; GISEL-NEXT: v_readlane_b32 s58, v41, 26
1200 ; GISEL-NEXT: v_readlane_b32 s57, v41, 25
1201 ; GISEL-NEXT: v_readlane_b32 s56, v41, 24
1202 ; GISEL-NEXT: v_readlane_b32 s55, v41, 23
1203 ; GISEL-NEXT: v_readlane_b32 s54, v41, 22
1204 ; GISEL-NEXT: v_readlane_b32 s53, v41, 21
1205 ; GISEL-NEXT: v_readlane_b32 s52, v41, 20
1206 ; GISEL-NEXT: v_readlane_b32 s51, v41, 19
1207 ; GISEL-NEXT: v_readlane_b32 s50, v41, 18
1208 ; GISEL-NEXT: v_readlane_b32 s49, v41, 17
1209 ; GISEL-NEXT: v_readlane_b32 s48, v41, 16
1210 ; GISEL-NEXT: v_readlane_b32 s47, v41, 15
1211 ; GISEL-NEXT: v_readlane_b32 s46, v41, 14
1212 ; GISEL-NEXT: v_readlane_b32 s45, v41, 13
1213 ; GISEL-NEXT: v_readlane_b32 s44, v41, 12
1214 ; GISEL-NEXT: v_readlane_b32 s43, v41, 11
1215 ; GISEL-NEXT: v_readlane_b32 s42, v41, 10
1216 ; GISEL-NEXT: v_readlane_b32 s41, v41, 9
1217 ; GISEL-NEXT: v_readlane_b32 s40, v41, 8
1218 ; GISEL-NEXT: v_readlane_b32 s39, v41, 7
1219 ; GISEL-NEXT: v_readlane_b32 s38, v41, 6
1220 ; GISEL-NEXT: v_readlane_b32 s37, v41, 5
1221 ; GISEL-NEXT: v_readlane_b32 s36, v41, 4
1222 ; GISEL-NEXT: v_readlane_b32 s35, v41, 3
1223 ; GISEL-NEXT: v_readlane_b32 s34, v41, 2
1224 ; GISEL-NEXT: v_readlane_b32 s31, v41, 1
1225 ; GISEL-NEXT: v_readlane_b32 s30, v41, 0
1226 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1227 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1228 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
1229 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1230 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1231 ; GISEL-NEXT: s_mov_b32 s33, s10
1232 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1233 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1234 call amdgpu_gfx void %fptr(i32 %i)
1238 ; Use a variable inside a waterfall loop and use the return variable after the loop.
1239 ; TODO The argument and return variable could be in the same physical register, but the register
1240 ; allocator is not able to do that because the return value clashes with the liverange of an
1241 ; IMPLICIT_DEF of the argument.
1242 define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
1243 ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1245 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246 ; GCN-NEXT: s_mov_b32 s10, s33
1247 ; GCN-NEXT: s_mov_b32 s33, s32
1248 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1249 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1250 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1251 ; GCN-NEXT: s_addk_i32 s32, 0x400
1252 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1253 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1254 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1255 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1256 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1257 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1258 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1259 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1260 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1261 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1262 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1263 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1264 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1265 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1266 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1267 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1268 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1269 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1270 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1271 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1272 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1273 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1274 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1275 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1276 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1277 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1278 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1279 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1280 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1281 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1282 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1283 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1284 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1285 ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1286 ; GCN-NEXT: v_readfirstlane_b32 s8, v1
1287 ; GCN-NEXT: v_readfirstlane_b32 s9, v2
1288 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1289 ; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
1290 ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
1291 ; GCN-NEXT: v_mov_b32_e32 v3, v0
1292 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
1293 ; GCN-NEXT: ; implicit-def: $vgpr0
1294 ; GCN-NEXT: s_xor_b64 exec, exec, s[6:7]
1295 ; GCN-NEXT: s_cbranch_execnz .LBB8_1
1296 ; GCN-NEXT: ; %bb.2:
1297 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1298 ; GCN-NEXT: v_mov_b32_e32 v0, v3
1299 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1300 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1301 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1302 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1303 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1304 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1305 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1306 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1307 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1308 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1309 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1310 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1311 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1312 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1313 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1314 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1315 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1316 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1317 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1318 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1319 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1320 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1321 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1322 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1323 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1324 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1325 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1326 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1327 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1328 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1329 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1330 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1331 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1332 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1333 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1334 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1335 ; GCN-NEXT: s_mov_b32 s33, s10
1336 ; GCN-NEXT: s_waitcnt vmcnt(0)
1337 ; GCN-NEXT: s_setpc_b64 s[30:31]
1339 ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1341 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342 ; GISEL-NEXT: s_mov_b32 s10, s33
1343 ; GISEL-NEXT: s_mov_b32 s33, s32
1344 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1345 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1346 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1347 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1348 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1349 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1350 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1351 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1352 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1353 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1354 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1355 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1356 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1357 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1358 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1359 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1360 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1361 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1362 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1363 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1364 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1365 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1366 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1367 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1368 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1369 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1370 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1371 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1372 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1373 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1374 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1375 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1376 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1377 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1378 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1379 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1380 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1381 ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1382 ; GISEL-NEXT: v_readfirstlane_b32 s8, v1
1383 ; GISEL-NEXT: v_readfirstlane_b32 s9, v2
1384 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1385 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
1386 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
1387 ; GISEL-NEXT: v_mov_b32_e32 v2, v0
1388 ; GISEL-NEXT: ; implicit-def: $vgpr1
1389 ; GISEL-NEXT: ; implicit-def: $vgpr0
1390 ; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7]
1391 ; GISEL-NEXT: s_cbranch_execnz .LBB8_1
1392 ; GISEL-NEXT: ; %bb.2:
1393 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1394 ; GISEL-NEXT: v_mov_b32_e32 v0, v2
1395 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1396 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1397 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1398 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1399 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1400 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1401 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1402 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1403 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1404 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1405 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1406 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1407 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1408 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1409 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1410 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1411 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1412 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1413 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1414 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1415 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1416 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1417 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1418 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1419 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1420 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1421 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1422 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1423 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1424 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1425 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1426 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1427 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1428 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1429 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1430 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1431 ; GISEL-NEXT: s_mov_b32 s33, s10
1432 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1433 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1434 %ret = call amdgpu_gfx i32 %fptr(i32 %i)
1438 ; Calling a vgpr can never be a tail call.
1439 define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
1440 ; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
1442 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443 ; GCN-NEXT: s_mov_b32 s10, s33
1444 ; GCN-NEXT: s_mov_b32 s33, s32
1445 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1446 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1447 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1448 ; GCN-NEXT: s_addk_i32 s32, 0x400
1449 ; GCN-NEXT: v_writelane_b32 v40, s30, 0
1450 ; GCN-NEXT: v_writelane_b32 v40, s31, 1
1451 ; GCN-NEXT: v_writelane_b32 v40, s34, 2
1452 ; GCN-NEXT: v_writelane_b32 v40, s35, 3
1453 ; GCN-NEXT: v_writelane_b32 v40, s36, 4
1454 ; GCN-NEXT: v_writelane_b32 v40, s37, 5
1455 ; GCN-NEXT: v_writelane_b32 v40, s38, 6
1456 ; GCN-NEXT: v_writelane_b32 v40, s39, 7
1457 ; GCN-NEXT: v_writelane_b32 v40, s40, 8
1458 ; GCN-NEXT: v_writelane_b32 v40, s41, 9
1459 ; GCN-NEXT: v_writelane_b32 v40, s42, 10
1460 ; GCN-NEXT: v_writelane_b32 v40, s43, 11
1461 ; GCN-NEXT: v_writelane_b32 v40, s44, 12
1462 ; GCN-NEXT: v_writelane_b32 v40, s45, 13
1463 ; GCN-NEXT: v_writelane_b32 v40, s46, 14
1464 ; GCN-NEXT: v_writelane_b32 v40, s47, 15
1465 ; GCN-NEXT: v_writelane_b32 v40, s48, 16
1466 ; GCN-NEXT: v_writelane_b32 v40, s49, 17
1467 ; GCN-NEXT: v_writelane_b32 v40, s50, 18
1468 ; GCN-NEXT: v_writelane_b32 v40, s51, 19
1469 ; GCN-NEXT: v_writelane_b32 v40, s52, 20
1470 ; GCN-NEXT: v_writelane_b32 v40, s53, 21
1471 ; GCN-NEXT: v_writelane_b32 v40, s54, 22
1472 ; GCN-NEXT: v_writelane_b32 v40, s55, 23
1473 ; GCN-NEXT: v_writelane_b32 v40, s56, 24
1474 ; GCN-NEXT: v_writelane_b32 v40, s57, 25
1475 ; GCN-NEXT: v_writelane_b32 v40, s58, 26
1476 ; GCN-NEXT: v_writelane_b32 v40, s59, 27
1477 ; GCN-NEXT: v_writelane_b32 v40, s60, 28
1478 ; GCN-NEXT: v_writelane_b32 v40, s61, 29
1479 ; GCN-NEXT: v_writelane_b32 v40, s62, 30
1480 ; GCN-NEXT: v_writelane_b32 v40, s63, 31
1481 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1482 ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
1483 ; GCN-NEXT: v_readfirstlane_b32 s6, v0
1484 ; GCN-NEXT: v_readfirstlane_b32 s7, v1
1485 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1486 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
1487 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
1488 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
1489 ; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
1490 ; GCN-NEXT: s_cbranch_execnz .LBB9_1
1491 ; GCN-NEXT: ; %bb.2:
1492 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1493 ; GCN-NEXT: v_readlane_b32 s63, v40, 31
1494 ; GCN-NEXT: v_readlane_b32 s62, v40, 30
1495 ; GCN-NEXT: v_readlane_b32 s61, v40, 29
1496 ; GCN-NEXT: v_readlane_b32 s60, v40, 28
1497 ; GCN-NEXT: v_readlane_b32 s59, v40, 27
1498 ; GCN-NEXT: v_readlane_b32 s58, v40, 26
1499 ; GCN-NEXT: v_readlane_b32 s57, v40, 25
1500 ; GCN-NEXT: v_readlane_b32 s56, v40, 24
1501 ; GCN-NEXT: v_readlane_b32 s55, v40, 23
1502 ; GCN-NEXT: v_readlane_b32 s54, v40, 22
1503 ; GCN-NEXT: v_readlane_b32 s53, v40, 21
1504 ; GCN-NEXT: v_readlane_b32 s52, v40, 20
1505 ; GCN-NEXT: v_readlane_b32 s51, v40, 19
1506 ; GCN-NEXT: v_readlane_b32 s50, v40, 18
1507 ; GCN-NEXT: v_readlane_b32 s49, v40, 17
1508 ; GCN-NEXT: v_readlane_b32 s48, v40, 16
1509 ; GCN-NEXT: v_readlane_b32 s47, v40, 15
1510 ; GCN-NEXT: v_readlane_b32 s46, v40, 14
1511 ; GCN-NEXT: v_readlane_b32 s45, v40, 13
1512 ; GCN-NEXT: v_readlane_b32 s44, v40, 12
1513 ; GCN-NEXT: v_readlane_b32 s43, v40, 11
1514 ; GCN-NEXT: v_readlane_b32 s42, v40, 10
1515 ; GCN-NEXT: v_readlane_b32 s41, v40, 9
1516 ; GCN-NEXT: v_readlane_b32 s40, v40, 8
1517 ; GCN-NEXT: v_readlane_b32 s39, v40, 7
1518 ; GCN-NEXT: v_readlane_b32 s38, v40, 6
1519 ; GCN-NEXT: v_readlane_b32 s37, v40, 5
1520 ; GCN-NEXT: v_readlane_b32 s36, v40, 4
1521 ; GCN-NEXT: v_readlane_b32 s35, v40, 3
1522 ; GCN-NEXT: v_readlane_b32 s34, v40, 2
1523 ; GCN-NEXT: v_readlane_b32 s31, v40, 1
1524 ; GCN-NEXT: v_readlane_b32 s30, v40, 0
1525 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
1526 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1527 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1528 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
1529 ; GCN-NEXT: s_mov_b32 s33, s10
1530 ; GCN-NEXT: s_waitcnt vmcnt(0)
1531 ; GCN-NEXT: s_setpc_b64 s[30:31]
1533 ; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
1535 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1536 ; GISEL-NEXT: s_mov_b32 s10, s33
1537 ; GISEL-NEXT: s_mov_b32 s33, s32
1538 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1539 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1540 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1541 ; GISEL-NEXT: s_addk_i32 s32, 0x400
1542 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0
1543 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1
1544 ; GISEL-NEXT: v_writelane_b32 v40, s34, 2
1545 ; GISEL-NEXT: v_writelane_b32 v40, s35, 3
1546 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4
1547 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5
1548 ; GISEL-NEXT: v_writelane_b32 v40, s38, 6
1549 ; GISEL-NEXT: v_writelane_b32 v40, s39, 7
1550 ; GISEL-NEXT: v_writelane_b32 v40, s40, 8
1551 ; GISEL-NEXT: v_writelane_b32 v40, s41, 9
1552 ; GISEL-NEXT: v_writelane_b32 v40, s42, 10
1553 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11
1554 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12
1555 ; GISEL-NEXT: v_writelane_b32 v40, s45, 13
1556 ; GISEL-NEXT: v_writelane_b32 v40, s46, 14
1557 ; GISEL-NEXT: v_writelane_b32 v40, s47, 15
1558 ; GISEL-NEXT: v_writelane_b32 v40, s48, 16
1559 ; GISEL-NEXT: v_writelane_b32 v40, s49, 17
1560 ; GISEL-NEXT: v_writelane_b32 v40, s50, 18
1561 ; GISEL-NEXT: v_writelane_b32 v40, s51, 19
1562 ; GISEL-NEXT: v_writelane_b32 v40, s52, 20
1563 ; GISEL-NEXT: v_writelane_b32 v40, s53, 21
1564 ; GISEL-NEXT: v_writelane_b32 v40, s54, 22
1565 ; GISEL-NEXT: v_writelane_b32 v40, s55, 23
1566 ; GISEL-NEXT: v_writelane_b32 v40, s56, 24
1567 ; GISEL-NEXT: v_writelane_b32 v40, s57, 25
1568 ; GISEL-NEXT: v_writelane_b32 v40, s58, 26
1569 ; GISEL-NEXT: v_writelane_b32 v40, s59, 27
1570 ; GISEL-NEXT: v_writelane_b32 v40, s60, 28
1571 ; GISEL-NEXT: v_writelane_b32 v40, s61, 29
1572 ; GISEL-NEXT: v_writelane_b32 v40, s62, 30
1573 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31
1574 ; GISEL-NEXT: s_mov_b64 s[4:5], exec
1575 ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
1576 ; GISEL-NEXT: v_readfirstlane_b32 s6, v0
1577 ; GISEL-NEXT: v_readfirstlane_b32 s7, v1
1578 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1579 ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
1580 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
1581 ; GISEL-NEXT: ; implicit-def: $vgpr0
1582 ; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
1583 ; GISEL-NEXT: s_cbranch_execnz .LBB9_1
1584 ; GISEL-NEXT: ; %bb.2:
1585 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1586 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31
1587 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30
1588 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29
1589 ; GISEL-NEXT: v_readlane_b32 s60, v40, 28
1590 ; GISEL-NEXT: v_readlane_b32 s59, v40, 27
1591 ; GISEL-NEXT: v_readlane_b32 s58, v40, 26
1592 ; GISEL-NEXT: v_readlane_b32 s57, v40, 25
1593 ; GISEL-NEXT: v_readlane_b32 s56, v40, 24
1594 ; GISEL-NEXT: v_readlane_b32 s55, v40, 23
1595 ; GISEL-NEXT: v_readlane_b32 s54, v40, 22
1596 ; GISEL-NEXT: v_readlane_b32 s53, v40, 21
1597 ; GISEL-NEXT: v_readlane_b32 s52, v40, 20
1598 ; GISEL-NEXT: v_readlane_b32 s51, v40, 19
1599 ; GISEL-NEXT: v_readlane_b32 s50, v40, 18
1600 ; GISEL-NEXT: v_readlane_b32 s49, v40, 17
1601 ; GISEL-NEXT: v_readlane_b32 s48, v40, 16
1602 ; GISEL-NEXT: v_readlane_b32 s47, v40, 15
1603 ; GISEL-NEXT: v_readlane_b32 s46, v40, 14
1604 ; GISEL-NEXT: v_readlane_b32 s45, v40, 13
1605 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12
1606 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11
1607 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10
1608 ; GISEL-NEXT: v_readlane_b32 s41, v40, 9
1609 ; GISEL-NEXT: v_readlane_b32 s40, v40, 8
1610 ; GISEL-NEXT: v_readlane_b32 s39, v40, 7
1611 ; GISEL-NEXT: v_readlane_b32 s38, v40, 6
1612 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5
1613 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4
1614 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3
1615 ; GISEL-NEXT: v_readlane_b32 s34, v40, 2
1616 ; GISEL-NEXT: v_readlane_b32 s31, v40, 1
1617 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0
1618 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
1619 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1620 ; GISEL-NEXT: s_mov_b64 exec, s[4:5]
1621 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00
1622 ; GISEL-NEXT: s_mov_b32 s33, s10
1623 ; GISEL-NEXT: s_waitcnt vmcnt(0)
1624 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1625 tail call amdgpu_gfx void %fptr()
1629 !llvm.module.flags = !{!0}
1630 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}