1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CI %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s
8 declare hidden void @external_void_func_i1(i1) #0
9 declare hidden void @external_void_func_i1_signext(i1 signext) #0
10 declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0
12 declare hidden void @external_void_func_i8(i8) #0
13 declare hidden void @external_void_func_i8_signext(i8 signext) #0
14 declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0
16 declare hidden void @external_void_func_i16(i16) #0
17 declare hidden void @external_void_func_i16_signext(i16 signext) #0
18 declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0
20 declare hidden void @external_void_func_i32(i32) #0
21 declare hidden void @external_void_func_i64(i64) #0
22 declare hidden void @external_void_func_v2i64(<2 x i64>) #0
23 declare hidden void @external_void_func_v3i64(<3 x i64>) #0
24 declare hidden void @external_void_func_v4i64(<4 x i64>) #0
26 declare hidden void @external_void_func_f16(half) #0
27 declare hidden void @external_void_func_f32(float) #0
28 declare hidden void @external_void_func_f64(double) #0
29 declare hidden void @external_void_func_v2f32(<2 x float>) #0
30 declare hidden void @external_void_func_v2f64(<2 x double>) #0
31 declare hidden void @external_void_func_v3f32(<3 x float>) #0
32 declare hidden void @external_void_func_v3f64(<3 x double>) #0
33 declare hidden void @external_void_func_v5f32(<5 x float>) #0
35 declare hidden void @external_void_func_v2i16(<2 x i16>) #0
36 declare hidden void @external_void_func_v2f16(<2 x half>) #0
37 declare hidden void @external_void_func_v3i16(<3 x i16>) #0
38 declare hidden void @external_void_func_v3f16(<3 x half>) #0
39 declare hidden void @external_void_func_v4i16(<4 x i16>) #0
40 declare hidden void @external_void_func_v4f16(<4 x half>) #0
42 declare hidden void @external_void_func_v2i32(<2 x i32>) #0
43 declare hidden void @external_void_func_v3i32(<3 x i32>) #0
44 declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
45 declare hidden void @external_void_func_v4i32(<4 x i32>) #0
46 declare hidden void @external_void_func_v5i32(<5 x i32>) #0
47 declare hidden void @external_void_func_v8i32(<8 x i32>) #0
48 declare hidden void @external_void_func_v16i32(<16 x i32>) #0
49 declare hidden void @external_void_func_v32i32(<32 x i32>) #0
50 declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
52 ; return value and argument
53 declare hidden i32 @external_i32_func_i32(i32) #0
56 declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
57 declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
58 declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
60 declare hidden void @external_void_func_v16i8(<16 x i8>) #0
62 ; FIXME: Should be passing -1
63 define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
64 ; VI-LABEL: test_call_external_void_func_i1_imm:
66 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
67 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
68 ; VI-NEXT: s_mov_b32 s38, -1
69 ; VI-NEXT: s_mov_b32 s39, 0xe80000
70 ; VI-NEXT: s_add_u32 s36, s36, s3
71 ; VI-NEXT: s_addc_u32 s37, s37, 0
72 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
73 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
74 ; VI-NEXT: s_getpc_b64 s[4:5]
75 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
76 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
77 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
78 ; VI-NEXT: v_mov_b32_e32 v0, 1
79 ; VI-NEXT: s_mov_b32 s32, 0
80 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
83 ; CI-LABEL: test_call_external_void_func_i1_imm:
85 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
86 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
87 ; CI-NEXT: s_mov_b32 s38, -1
88 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
89 ; CI-NEXT: s_add_u32 s36, s36, s3
90 ; CI-NEXT: s_addc_u32 s37, s37, 0
91 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
92 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
93 ; CI-NEXT: s_getpc_b64 s[4:5]
94 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
95 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
96 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
97 ; CI-NEXT: v_mov_b32_e32 v0, 1
98 ; CI-NEXT: s_mov_b32 s32, 0
99 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
102 ; GFX9-LABEL: test_call_external_void_func_i1_imm:
104 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
105 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
106 ; GFX9-NEXT: s_mov_b32 s38, -1
107 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
108 ; GFX9-NEXT: s_add_u32 s36, s36, s3
109 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
110 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
111 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
112 ; GFX9-NEXT: s_getpc_b64 s[4:5]
113 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
114 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
115 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
116 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
117 ; GFX9-NEXT: s_mov_b32 s32, 0
118 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
119 ; GFX9-NEXT: s_endpgm
121 ; GFX11-LABEL: test_call_external_void_func_i1_imm:
123 ; GFX11-NEXT: v_mov_b32_e32 v0, 1
124 ; GFX11-NEXT: s_getpc_b64 s[2:3]
125 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1@rel32@lo+4
126 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1@rel32@hi+12
127 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
128 ; GFX11-NEXT: s_mov_b32 s32, 0
129 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
130 ; GFX11-NEXT: s_endpgm
132 ; HSA-LABEL: test_call_external_void_func_i1_imm:
134 ; HSA-NEXT: s_add_i32 s6, s6, s9
135 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
136 ; HSA-NEXT: s_add_u32 s0, s0, s9
137 ; HSA-NEXT: s_addc_u32 s1, s1, 0
138 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
139 ; HSA-NEXT: s_getpc_b64 s[8:9]
140 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1@rel32@lo+4
141 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1@rel32@hi+12
142 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
143 ; HSA-NEXT: v_mov_b32_e32 v0, 1
144 ; HSA-NEXT: s_mov_b32 s32, 0
145 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
147 call void @external_void_func_i1(i1 true)
151 define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
152 ; VI-LABEL: test_call_external_void_func_i1_signext:
154 ; VI-NEXT: s_mov_b32 s3, 0xf000
155 ; VI-NEXT: s_mov_b32 s2, -1
156 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
157 ; VI-NEXT: s_waitcnt vmcnt(0)
158 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
159 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
160 ; VI-NEXT: s_mov_b32 s38, -1
161 ; VI-NEXT: s_mov_b32 s39, 0xe80000
162 ; VI-NEXT: s_add_u32 s36, s36, s5
163 ; VI-NEXT: s_addc_u32 s37, s37, 0
164 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
165 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
166 ; VI-NEXT: s_getpc_b64 s[4:5]
167 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
168 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
169 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
170 ; VI-NEXT: s_mov_b32 s32, 0
171 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1
172 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
175 ; CI-LABEL: test_call_external_void_func_i1_signext:
177 ; CI-NEXT: s_mov_b32 s3, 0xf000
178 ; CI-NEXT: s_mov_b32 s2, -1
179 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
180 ; CI-NEXT: s_waitcnt vmcnt(0)
181 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
182 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
183 ; CI-NEXT: s_mov_b32 s38, -1
184 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
185 ; CI-NEXT: s_add_u32 s36, s36, s5
186 ; CI-NEXT: s_addc_u32 s37, s37, 0
187 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
188 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
189 ; CI-NEXT: s_getpc_b64 s[4:5]
190 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
191 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
192 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
193 ; CI-NEXT: s_mov_b32 s32, 0
194 ; CI-NEXT: v_bfe_i32 v0, v0, 0, 1
195 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
198 ; GFX9-LABEL: test_call_external_void_func_i1_signext:
200 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
201 ; GFX9-NEXT: s_mov_b32 s2, -1
202 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
203 ; GFX9-NEXT: s_waitcnt vmcnt(0)
204 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
205 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
206 ; GFX9-NEXT: s_mov_b32 s38, -1
207 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
208 ; GFX9-NEXT: s_add_u32 s36, s36, s5
209 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
210 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
211 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
212 ; GFX9-NEXT: s_getpc_b64 s[4:5]
213 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
214 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
215 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
216 ; GFX9-NEXT: s_mov_b32 s32, 0
217 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1
218 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
219 ; GFX9-NEXT: s_endpgm
221 ; GFX11-LABEL: test_call_external_void_func_i1_signext:
223 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
224 ; GFX11-NEXT: s_mov_b32 s2, -1
225 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
226 ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc
227 ; GFX11-NEXT: s_waitcnt vmcnt(0)
228 ; GFX11-NEXT: s_getpc_b64 s[2:3]
229 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_signext@rel32@lo+4
230 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_signext@rel32@hi+12
231 ; GFX11-NEXT: s_mov_b32 s32, 0
232 ; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 1
233 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
234 ; GFX11-NEXT: s_endpgm
236 ; HSA-LABEL: test_call_external_void_func_i1_signext:
238 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
239 ; HSA-NEXT: s_mov_b32 s6, -1
240 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc
241 ; HSA-NEXT: s_waitcnt vmcnt(0)
242 ; HSA-NEXT: s_add_i32 s8, s8, s11
243 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
244 ; HSA-NEXT: s_add_u32 s0, s0, s11
245 ; HSA-NEXT: s_addc_u32 s1, s1, 0
246 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
247 ; HSA-NEXT: s_getpc_b64 s[8:9]
248 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1_signext@rel32@lo+4
249 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_signext@rel32@hi+12
250 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
251 ; HSA-NEXT: s_mov_b32 s32, 0
252 ; HSA-NEXT: v_bfe_i32 v0, v0, 0, 1
253 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
255 %var = load volatile i1, ptr addrspace(1) undef
256 call void @external_void_func_i1_signext(i1 signext %var)
260 ; FIXME: load should be scheduled before getpc
261 define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
262 ; VI-LABEL: test_call_external_void_func_i1_zeroext:
264 ; VI-NEXT: s_mov_b32 s3, 0xf000
265 ; VI-NEXT: s_mov_b32 s2, -1
266 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
267 ; VI-NEXT: s_waitcnt vmcnt(0)
268 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
269 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
270 ; VI-NEXT: s_mov_b32 s38, -1
271 ; VI-NEXT: s_mov_b32 s39, 0xe80000
272 ; VI-NEXT: s_add_u32 s36, s36, s5
273 ; VI-NEXT: s_addc_u32 s37, s37, 0
274 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
275 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
276 ; VI-NEXT: s_getpc_b64 s[4:5]
277 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
278 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
279 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
280 ; VI-NEXT: s_mov_b32 s32, 0
281 ; VI-NEXT: v_and_b32_e32 v0, 1, v0
282 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
285 ; CI-LABEL: test_call_external_void_func_i1_zeroext:
287 ; CI-NEXT: s_mov_b32 s3, 0xf000
288 ; CI-NEXT: s_mov_b32 s2, -1
289 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
290 ; CI-NEXT: s_waitcnt vmcnt(0)
291 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
292 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
293 ; CI-NEXT: s_mov_b32 s38, -1
294 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
295 ; CI-NEXT: s_add_u32 s36, s36, s5
296 ; CI-NEXT: s_addc_u32 s37, s37, 0
297 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
298 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
299 ; CI-NEXT: s_getpc_b64 s[4:5]
300 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
301 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
302 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
303 ; CI-NEXT: s_mov_b32 s32, 0
304 ; CI-NEXT: v_and_b32_e32 v0, 1, v0
305 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
308 ; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
310 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
311 ; GFX9-NEXT: s_mov_b32 s2, -1
312 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
313 ; GFX9-NEXT: s_waitcnt vmcnt(0)
314 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
315 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
316 ; GFX9-NEXT: s_mov_b32 s38, -1
317 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
318 ; GFX9-NEXT: s_add_u32 s36, s36, s5
319 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
320 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
321 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
322 ; GFX9-NEXT: s_getpc_b64 s[4:5]
323 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
324 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
325 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
326 ; GFX9-NEXT: s_mov_b32 s32, 0
327 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
328 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
329 ; GFX9-NEXT: s_endpgm
331 ; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
333 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
334 ; GFX11-NEXT: s_mov_b32 s2, -1
335 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
336 ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc
337 ; GFX11-NEXT: s_waitcnt vmcnt(0)
338 ; GFX11-NEXT: s_getpc_b64 s[2:3]
339 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_zeroext@rel32@lo+4
340 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_zeroext@rel32@hi+12
341 ; GFX11-NEXT: s_mov_b32 s32, 0
342 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
343 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
344 ; GFX11-NEXT: s_endpgm
346 ; HSA-LABEL: test_call_external_void_func_i1_zeroext:
348 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
349 ; HSA-NEXT: s_mov_b32 s6, -1
350 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc
351 ; HSA-NEXT: s_waitcnt vmcnt(0)
352 ; HSA-NEXT: s_add_i32 s8, s8, s11
353 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
354 ; HSA-NEXT: s_add_u32 s0, s0, s11
355 ; HSA-NEXT: s_addc_u32 s1, s1, 0
356 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
357 ; HSA-NEXT: s_getpc_b64 s[8:9]
358 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1_zeroext@rel32@lo+4
359 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_zeroext@rel32@hi+12
360 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
361 ; HSA-NEXT: s_mov_b32 s32, 0
362 ; HSA-NEXT: v_and_b32_e32 v0, 1, v0
363 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
365 %var = load volatile i1, ptr addrspace(1) undef
366 call void @external_void_func_i1_zeroext(i1 zeroext %var)
370 define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
371 ; VI-LABEL: test_call_external_void_func_i8_imm:
373 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
374 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
375 ; VI-NEXT: s_mov_b32 s38, -1
376 ; VI-NEXT: s_mov_b32 s39, 0xe80000
377 ; VI-NEXT: s_add_u32 s36, s36, s5
378 ; VI-NEXT: s_addc_u32 s37, s37, 0
379 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
380 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
381 ; VI-NEXT: s_getpc_b64 s[4:5]
382 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
383 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
384 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
385 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
386 ; VI-NEXT: s_mov_b32 s32, 0
387 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
390 ; CI-LABEL: test_call_external_void_func_i8_imm:
392 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
393 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
394 ; CI-NEXT: s_mov_b32 s38, -1
395 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
396 ; CI-NEXT: s_add_u32 s36, s36, s5
397 ; CI-NEXT: s_addc_u32 s37, s37, 0
398 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
399 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
400 ; CI-NEXT: s_getpc_b64 s[4:5]
401 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
402 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
403 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
404 ; CI-NEXT: v_mov_b32_e32 v0, 0x7b
405 ; CI-NEXT: s_mov_b32 s32, 0
406 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
409 ; GFX9-LABEL: test_call_external_void_func_i8_imm:
411 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
412 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
413 ; GFX9-NEXT: s_mov_b32 s38, -1
414 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
415 ; GFX9-NEXT: s_add_u32 s36, s36, s5
416 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
417 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
418 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
419 ; GFX9-NEXT: s_getpc_b64 s[4:5]
420 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
421 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
422 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
423 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
424 ; GFX9-NEXT: s_mov_b32 s32, 0
425 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
426 ; GFX9-NEXT: s_endpgm
428 ; GFX11-LABEL: test_call_external_void_func_i8_imm:
430 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
431 ; GFX11-NEXT: s_getpc_b64 s[2:3]
432 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
433 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
434 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
435 ; GFX11-NEXT: s_mov_b32 s32, 0
436 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
437 ; GFX11-NEXT: s_endpgm
439 ; HSA-LABEL: test_call_external_void_func_i8_imm:
441 ; HSA-NEXT: s_add_i32 s8, s8, s11
442 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
443 ; HSA-NEXT: s_add_u32 s0, s0, s11
444 ; HSA-NEXT: s_addc_u32 s1, s1, 0
445 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
446 ; HSA-NEXT: s_getpc_b64 s[8:9]
447 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8@rel32@lo+4
448 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8@rel32@hi+12
449 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
450 ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b
451 ; HSA-NEXT: s_mov_b32 s32, 0
452 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
454 call void @external_void_func_i8(i8 123)
458 ; FIXME: don't wait before call
459 define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
460 ; VI-LABEL: test_call_external_void_func_i8_signext:
462 ; VI-NEXT: s_mov_b32 s3, 0xf000
463 ; VI-NEXT: s_mov_b32 s2, -1
464 ; VI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
465 ; VI-NEXT: s_waitcnt vmcnt(0)
466 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
467 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
468 ; VI-NEXT: s_mov_b32 s38, -1
469 ; VI-NEXT: s_mov_b32 s39, 0xe80000
470 ; VI-NEXT: s_add_u32 s36, s36, s5
471 ; VI-NEXT: s_addc_u32 s37, s37, 0
472 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
473 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
474 ; VI-NEXT: s_getpc_b64 s[4:5]
475 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
476 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
477 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
478 ; VI-NEXT: s_mov_b32 s32, 0
479 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
482 ; CI-LABEL: test_call_external_void_func_i8_signext:
484 ; CI-NEXT: s_mov_b32 s3, 0xf000
485 ; CI-NEXT: s_mov_b32 s2, -1
486 ; CI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
487 ; CI-NEXT: s_waitcnt vmcnt(0)
488 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
489 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
490 ; CI-NEXT: s_mov_b32 s38, -1
491 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
492 ; CI-NEXT: s_add_u32 s36, s36, s5
493 ; CI-NEXT: s_addc_u32 s37, s37, 0
494 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
495 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
496 ; CI-NEXT: s_getpc_b64 s[4:5]
497 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
498 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
499 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
500 ; CI-NEXT: s_mov_b32 s32, 0
501 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
504 ; GFX9-LABEL: test_call_external_void_func_i8_signext:
506 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
507 ; GFX9-NEXT: s_mov_b32 s2, -1
508 ; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
509 ; GFX9-NEXT: s_waitcnt vmcnt(0)
510 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
511 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
512 ; GFX9-NEXT: s_mov_b32 s38, -1
513 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
514 ; GFX9-NEXT: s_add_u32 s36, s36, s5
515 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
516 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
517 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
518 ; GFX9-NEXT: s_getpc_b64 s[4:5]
519 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
520 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
521 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
522 ; GFX9-NEXT: s_mov_b32 s32, 0
523 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
524 ; GFX9-NEXT: s_endpgm
526 ; GFX11-LABEL: test_call_external_void_func_i8_signext:
528 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
529 ; GFX11-NEXT: s_mov_b32 s2, -1
530 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
531 ; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0 glc dlc
532 ; GFX11-NEXT: s_waitcnt vmcnt(0)
533 ; GFX11-NEXT: s_getpc_b64 s[2:3]
534 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_signext@rel32@lo+4
535 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_signext@rel32@hi+12
536 ; GFX11-NEXT: s_mov_b32 s32, 0
537 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
538 ; GFX11-NEXT: s_endpgm
540 ; HSA-LABEL: test_call_external_void_func_i8_signext:
542 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
543 ; HSA-NEXT: s_mov_b32 s6, -1
544 ; HSA-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 glc
545 ; HSA-NEXT: s_waitcnt vmcnt(0)
546 ; HSA-NEXT: s_add_i32 s8, s8, s11
547 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
548 ; HSA-NEXT: s_add_u32 s0, s0, s11
549 ; HSA-NEXT: s_addc_u32 s1, s1, 0
550 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
551 ; HSA-NEXT: s_getpc_b64 s[8:9]
552 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8_signext@rel32@lo+4
553 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8_signext@rel32@hi+12
554 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
555 ; HSA-NEXT: s_mov_b32 s32, 0
556 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
558 %var = load volatile i8, ptr addrspace(1) undef
559 call void @external_void_func_i8_signext(i8 signext %var)
563 define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
564 ; VI-LABEL: test_call_external_void_func_i8_zeroext:
566 ; VI-NEXT: s_mov_b32 s3, 0xf000
567 ; VI-NEXT: s_mov_b32 s2, -1
568 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
569 ; VI-NEXT: s_waitcnt vmcnt(0)
570 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
571 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
572 ; VI-NEXT: s_mov_b32 s38, -1
573 ; VI-NEXT: s_mov_b32 s39, 0xe80000
574 ; VI-NEXT: s_add_u32 s36, s36, s5
575 ; VI-NEXT: s_addc_u32 s37, s37, 0
576 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
577 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
578 ; VI-NEXT: s_getpc_b64 s[4:5]
579 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
580 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
581 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
582 ; VI-NEXT: s_mov_b32 s32, 0
583 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
586 ; CI-LABEL: test_call_external_void_func_i8_zeroext:
588 ; CI-NEXT: s_mov_b32 s3, 0xf000
589 ; CI-NEXT: s_mov_b32 s2, -1
590 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
591 ; CI-NEXT: s_waitcnt vmcnt(0)
592 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
593 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
594 ; CI-NEXT: s_mov_b32 s38, -1
595 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
596 ; CI-NEXT: s_add_u32 s36, s36, s5
597 ; CI-NEXT: s_addc_u32 s37, s37, 0
598 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
599 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
600 ; CI-NEXT: s_getpc_b64 s[4:5]
601 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
602 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
603 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
604 ; CI-NEXT: s_mov_b32 s32, 0
605 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
608 ; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
610 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
611 ; GFX9-NEXT: s_mov_b32 s2, -1
612 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
613 ; GFX9-NEXT: s_waitcnt vmcnt(0)
614 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
615 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
616 ; GFX9-NEXT: s_mov_b32 s38, -1
617 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
618 ; GFX9-NEXT: s_add_u32 s36, s36, s5
619 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
620 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
621 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
622 ; GFX9-NEXT: s_getpc_b64 s[4:5]
623 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
624 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
625 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
626 ; GFX9-NEXT: s_mov_b32 s32, 0
627 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
628 ; GFX9-NEXT: s_endpgm
630 ; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
632 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
633 ; GFX11-NEXT: s_mov_b32 s2, -1
634 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
635 ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc
636 ; GFX11-NEXT: s_waitcnt vmcnt(0)
637 ; GFX11-NEXT: s_getpc_b64 s[2:3]
638 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_zeroext@rel32@lo+4
639 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_zeroext@rel32@hi+12
640 ; GFX11-NEXT: s_mov_b32 s32, 0
641 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
642 ; GFX11-NEXT: s_endpgm
644 ; HSA-LABEL: test_call_external_void_func_i8_zeroext:
646 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
647 ; HSA-NEXT: s_mov_b32 s6, -1
648 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc
649 ; HSA-NEXT: s_waitcnt vmcnt(0)
650 ; HSA-NEXT: s_add_i32 s8, s8, s11
651 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
652 ; HSA-NEXT: s_add_u32 s0, s0, s11
653 ; HSA-NEXT: s_addc_u32 s1, s1, 0
654 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
655 ; HSA-NEXT: s_getpc_b64 s[8:9]
656 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8_zeroext@rel32@lo+4
657 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8_zeroext@rel32@hi+12
658 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
659 ; HSA-NEXT: s_mov_b32 s32, 0
660 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
662 %var = load volatile i8, ptr addrspace(1) undef
663 call void @external_void_func_i8_zeroext(i8 zeroext %var)
667 define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
668 ; VI-LABEL: test_call_external_void_func_i16_imm:
670 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
671 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
672 ; VI-NEXT: s_mov_b32 s38, -1
673 ; VI-NEXT: s_mov_b32 s39, 0xe80000
674 ; VI-NEXT: s_add_u32 s36, s36, s3
675 ; VI-NEXT: s_addc_u32 s37, s37, 0
676 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
677 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
678 ; VI-NEXT: s_getpc_b64 s[4:5]
679 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
680 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
681 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
682 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
683 ; VI-NEXT: s_mov_b32 s32, 0
684 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
687 ; CI-LABEL: test_call_external_void_func_i16_imm:
689 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
690 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
691 ; CI-NEXT: s_mov_b32 s38, -1
692 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
693 ; CI-NEXT: s_add_u32 s36, s36, s3
694 ; CI-NEXT: s_addc_u32 s37, s37, 0
695 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
696 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
697 ; CI-NEXT: s_getpc_b64 s[4:5]
698 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
699 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
700 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
701 ; CI-NEXT: v_mov_b32_e32 v0, 0x7b
702 ; CI-NEXT: s_mov_b32 s32, 0
703 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
706 ; GFX9-LABEL: test_call_external_void_func_i16_imm:
708 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
709 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
710 ; GFX9-NEXT: s_mov_b32 s38, -1
711 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
712 ; GFX9-NEXT: s_add_u32 s36, s36, s3
713 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
714 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
715 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
716 ; GFX9-NEXT: s_getpc_b64 s[4:5]
717 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
718 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
719 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
720 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
721 ; GFX9-NEXT: s_mov_b32 s32, 0
722 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
723 ; GFX9-NEXT: s_endpgm
725 ; GFX11-LABEL: test_call_external_void_func_i16_imm:
727 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
728 ; GFX11-NEXT: s_getpc_b64 s[2:3]
729 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
730 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
731 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
732 ; GFX11-NEXT: s_mov_b32 s32, 0
733 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
734 ; GFX11-NEXT: s_endpgm
736 ; HSA-LABEL: test_call_external_void_func_i16_imm:
738 ; HSA-NEXT: s_add_i32 s6, s6, s9
739 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
740 ; HSA-NEXT: s_add_u32 s0, s0, s9
741 ; HSA-NEXT: s_addc_u32 s1, s1, 0
742 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
743 ; HSA-NEXT: s_getpc_b64 s[8:9]
744 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16@rel32@lo+4
745 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16@rel32@hi+12
746 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
747 ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b
748 ; HSA-NEXT: s_mov_b32 s32, 0
749 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
751 call void @external_void_func_i16(i16 123)
755 define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
756 ; VI-LABEL: test_call_external_void_func_i16_signext:
758 ; VI-NEXT: s_mov_b32 s3, 0xf000
759 ; VI-NEXT: s_mov_b32 s2, -1
760 ; VI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
761 ; VI-NEXT: s_waitcnt vmcnt(0)
762 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
763 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
764 ; VI-NEXT: s_mov_b32 s38, -1
765 ; VI-NEXT: s_mov_b32 s39, 0xe80000
766 ; VI-NEXT: s_add_u32 s36, s36, s5
767 ; VI-NEXT: s_addc_u32 s37, s37, 0
768 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
769 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
770 ; VI-NEXT: s_getpc_b64 s[4:5]
771 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
772 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
773 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
774 ; VI-NEXT: s_mov_b32 s32, 0
775 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
778 ; CI-LABEL: test_call_external_void_func_i16_signext:
780 ; CI-NEXT: s_mov_b32 s3, 0xf000
781 ; CI-NEXT: s_mov_b32 s2, -1
782 ; CI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
783 ; CI-NEXT: s_waitcnt vmcnt(0)
784 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
785 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
786 ; CI-NEXT: s_mov_b32 s38, -1
787 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
788 ; CI-NEXT: s_add_u32 s36, s36, s5
789 ; CI-NEXT: s_addc_u32 s37, s37, 0
790 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
791 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
792 ; CI-NEXT: s_getpc_b64 s[4:5]
793 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
794 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
795 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
796 ; CI-NEXT: s_mov_b32 s32, 0
797 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
800 ; GFX9-LABEL: test_call_external_void_func_i16_signext:
802 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
803 ; GFX9-NEXT: s_mov_b32 s2, -1
804 ; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
805 ; GFX9-NEXT: s_waitcnt vmcnt(0)
806 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
807 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
808 ; GFX9-NEXT: s_mov_b32 s38, -1
809 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
810 ; GFX9-NEXT: s_add_u32 s36, s36, s5
811 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
812 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
813 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
814 ; GFX9-NEXT: s_getpc_b64 s[4:5]
815 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
816 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
817 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
818 ; GFX9-NEXT: s_mov_b32 s32, 0
819 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
820 ; GFX9-NEXT: s_endpgm
822 ; GFX11-LABEL: test_call_external_void_func_i16_signext:
824 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
825 ; GFX11-NEXT: s_mov_b32 s2, -1
826 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
827 ; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0 glc dlc
828 ; GFX11-NEXT: s_waitcnt vmcnt(0)
829 ; GFX11-NEXT: s_getpc_b64 s[2:3]
830 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_signext@rel32@lo+4
831 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_signext@rel32@hi+12
832 ; GFX11-NEXT: s_mov_b32 s32, 0
833 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
834 ; GFX11-NEXT: s_endpgm
836 ; HSA-LABEL: test_call_external_void_func_i16_signext:
838 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
839 ; HSA-NEXT: s_mov_b32 s6, -1
840 ; HSA-NEXT: buffer_load_sshort v0, off, s[4:7], 0 glc
841 ; HSA-NEXT: s_waitcnt vmcnt(0)
842 ; HSA-NEXT: s_add_i32 s8, s8, s11
843 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
844 ; HSA-NEXT: s_add_u32 s0, s0, s11
845 ; HSA-NEXT: s_addc_u32 s1, s1, 0
846 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
847 ; HSA-NEXT: s_getpc_b64 s[8:9]
848 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16_signext@rel32@lo+4
849 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16_signext@rel32@hi+12
850 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
851 ; HSA-NEXT: s_mov_b32 s32, 0
852 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
854 %var = load volatile i16, ptr addrspace(1) undef
855 call void @external_void_func_i16_signext(i16 signext %var)
859 define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
860 ; VI-LABEL: test_call_external_void_func_i16_zeroext:
862 ; VI-NEXT: s_mov_b32 s3, 0xf000
863 ; VI-NEXT: s_mov_b32 s2, -1
864 ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
865 ; VI-NEXT: s_waitcnt vmcnt(0)
866 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
867 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
868 ; VI-NEXT: s_mov_b32 s38, -1
869 ; VI-NEXT: s_mov_b32 s39, 0xe80000
870 ; VI-NEXT: s_add_u32 s36, s36, s5
871 ; VI-NEXT: s_addc_u32 s37, s37, 0
872 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
873 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
874 ; VI-NEXT: s_getpc_b64 s[4:5]
875 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
876 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
877 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
878 ; VI-NEXT: s_mov_b32 s32, 0
879 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
882 ; CI-LABEL: test_call_external_void_func_i16_zeroext:
884 ; CI-NEXT: s_mov_b32 s3, 0xf000
885 ; CI-NEXT: s_mov_b32 s2, -1
886 ; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
887 ; CI-NEXT: s_waitcnt vmcnt(0)
888 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
889 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
890 ; CI-NEXT: s_mov_b32 s38, -1
891 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
892 ; CI-NEXT: s_add_u32 s36, s36, s5
893 ; CI-NEXT: s_addc_u32 s37, s37, 0
894 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
895 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
896 ; CI-NEXT: s_getpc_b64 s[4:5]
897 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
898 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
899 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
900 ; CI-NEXT: s_mov_b32 s32, 0
901 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
904 ; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
906 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
907 ; GFX9-NEXT: s_mov_b32 s2, -1
908 ; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
909 ; GFX9-NEXT: s_waitcnt vmcnt(0)
910 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
911 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
912 ; GFX9-NEXT: s_mov_b32 s38, -1
913 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
914 ; GFX9-NEXT: s_add_u32 s36, s36, s5
915 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
916 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
917 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
918 ; GFX9-NEXT: s_getpc_b64 s[4:5]
919 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
920 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
921 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
922 ; GFX9-NEXT: s_mov_b32 s32, 0
923 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
924 ; GFX9-NEXT: s_endpgm
926 ; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
928 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
929 ; GFX11-NEXT: s_mov_b32 s2, -1
930 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
931 ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 glc dlc
932 ; GFX11-NEXT: s_waitcnt vmcnt(0)
933 ; GFX11-NEXT: s_getpc_b64 s[2:3]
934 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_zeroext@rel32@lo+4
935 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_zeroext@rel32@hi+12
936 ; GFX11-NEXT: s_mov_b32 s32, 0
937 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
938 ; GFX11-NEXT: s_endpgm
940 ; HSA-LABEL: test_call_external_void_func_i16_zeroext:
942 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
943 ; HSA-NEXT: s_mov_b32 s6, -1
944 ; HSA-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc
945 ; HSA-NEXT: s_waitcnt vmcnt(0)
946 ; HSA-NEXT: s_add_i32 s8, s8, s11
947 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
948 ; HSA-NEXT: s_add_u32 s0, s0, s11
949 ; HSA-NEXT: s_addc_u32 s1, s1, 0
950 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
951 ; HSA-NEXT: s_getpc_b64 s[8:9]
952 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16_zeroext@rel32@lo+4
953 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16_zeroext@rel32@hi+12
954 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
955 ; HSA-NEXT: s_mov_b32 s32, 0
956 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
958 %var = load volatile i16, ptr addrspace(1) undef
959 call void @external_void_func_i16_zeroext(i16 zeroext %var)
963 define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
964 ; VI-LABEL: test_call_external_void_func_i32_imm:
966 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
967 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
968 ; VI-NEXT: s_mov_b32 s38, -1
969 ; VI-NEXT: s_mov_b32 s39, 0xe80000
970 ; VI-NEXT: s_add_u32 s36, s36, s5
971 ; VI-NEXT: s_addc_u32 s37, s37, 0
972 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
973 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
974 ; VI-NEXT: s_getpc_b64 s[4:5]
975 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
976 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
977 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
978 ; VI-NEXT: v_mov_b32_e32 v0, 42
979 ; VI-NEXT: s_mov_b32 s32, 0
980 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
983 ; CI-LABEL: test_call_external_void_func_i32_imm:
985 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
986 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
987 ; CI-NEXT: s_mov_b32 s38, -1
988 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
989 ; CI-NEXT: s_add_u32 s36, s36, s5
990 ; CI-NEXT: s_addc_u32 s37, s37, 0
991 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
992 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
993 ; CI-NEXT: s_getpc_b64 s[4:5]
994 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
995 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
996 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
997 ; CI-NEXT: v_mov_b32_e32 v0, 42
998 ; CI-NEXT: s_mov_b32 s32, 0
999 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1002 ; GFX9-LABEL: test_call_external_void_func_i32_imm:
1004 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1005 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1006 ; GFX9-NEXT: s_mov_b32 s38, -1
1007 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1008 ; GFX9-NEXT: s_add_u32 s36, s36, s5
1009 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1010 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1011 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1012 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1013 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
1014 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
1015 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1016 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
1017 ; GFX9-NEXT: s_mov_b32 s32, 0
1018 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1019 ; GFX9-NEXT: s_endpgm
1021 ; GFX11-LABEL: test_call_external_void_func_i32_imm:
1023 ; GFX11-NEXT: v_mov_b32_e32 v0, 42
1024 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1025 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i32@rel32@lo+4
1026 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i32@rel32@hi+12
1027 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1028 ; GFX11-NEXT: s_mov_b32 s32, 0
1029 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1030 ; GFX11-NEXT: s_endpgm
1032 ; HSA-LABEL: test_call_external_void_func_i32_imm:
1034 ; HSA-NEXT: s_add_i32 s8, s8, s11
1035 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
1036 ; HSA-NEXT: s_add_u32 s0, s0, s11
1037 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1038 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
1039 ; HSA-NEXT: s_getpc_b64 s[8:9]
1040 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i32@rel32@lo+4
1041 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i32@rel32@hi+12
1042 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1043 ; HSA-NEXT: v_mov_b32_e32 v0, 42
1044 ; HSA-NEXT: s_mov_b32 s32, 0
1045 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1046 ; HSA-NEXT: s_endpgm
1047 call void @external_void_func_i32(i32 42)
1051 define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
1052 ; VI-LABEL: test_call_external_void_func_i64_imm:
1054 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1055 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1056 ; VI-NEXT: s_mov_b32 s38, -1
1057 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1058 ; VI-NEXT: s_add_u32 s36, s36, s3
1059 ; VI-NEXT: s_addc_u32 s37, s37, 0
1060 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1061 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1062 ; VI-NEXT: s_getpc_b64 s[4:5]
1063 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1064 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1065 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1066 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
1067 ; VI-NEXT: v_mov_b32_e32 v1, 0
1068 ; VI-NEXT: s_mov_b32 s32, 0
1069 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1072 ; CI-LABEL: test_call_external_void_func_i64_imm:
1074 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1075 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1076 ; CI-NEXT: s_mov_b32 s38, -1
1077 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1078 ; CI-NEXT: s_add_u32 s36, s36, s3
1079 ; CI-NEXT: s_addc_u32 s37, s37, 0
1080 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1081 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1082 ; CI-NEXT: s_getpc_b64 s[4:5]
1083 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1084 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1085 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1086 ; CI-NEXT: v_mov_b32_e32 v0, 0x7b
1087 ; CI-NEXT: v_mov_b32_e32 v1, 0
1088 ; CI-NEXT: s_mov_b32 s32, 0
1089 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1092 ; GFX9-LABEL: test_call_external_void_func_i64_imm:
1094 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1095 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1096 ; GFX9-NEXT: s_mov_b32 s38, -1
1097 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1098 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1099 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1100 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1101 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1102 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1103 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1104 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1105 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1106 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
1107 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1108 ; GFX9-NEXT: s_mov_b32 s32, 0
1109 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1110 ; GFX9-NEXT: s_endpgm
1112 ; GFX11-LABEL: test_call_external_void_func_i64_imm:
1114 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
1115 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1116 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i64@rel32@lo+4
1117 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i64@rel32@hi+12
1118 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1119 ; GFX11-NEXT: s_mov_b32 s32, 0
1120 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1121 ; GFX11-NEXT: s_endpgm
1123 ; HSA-LABEL: test_call_external_void_func_i64_imm:
1125 ; HSA-NEXT: s_add_i32 s6, s6, s9
1126 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1127 ; HSA-NEXT: s_add_u32 s0, s0, s9
1128 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1129 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1130 ; HSA-NEXT: s_getpc_b64 s[8:9]
1131 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i64@rel32@lo+4
1132 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i64@rel32@hi+12
1133 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1134 ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b
1135 ; HSA-NEXT: v_mov_b32_e32 v1, 0
1136 ; HSA-NEXT: s_mov_b32 s32, 0
1137 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1138 ; HSA-NEXT: s_endpgm
1139 call void @external_void_func_i64(i64 123)
1143 define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
1144 ; VI-LABEL: test_call_external_void_func_v2i64:
1146 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1147 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1148 ; VI-NEXT: s_mov_b32 s38, -1
1149 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1150 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1151 ; VI-NEXT: s_mov_b32 s0, 0
1152 ; VI-NEXT: s_add_u32 s36, s36, s3
1153 ; VI-NEXT: s_mov_b32 s3, 0xf000
1154 ; VI-NEXT: s_mov_b32 s2, -1
1155 ; VI-NEXT: s_mov_b32 s1, s0
1156 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1157 ; VI-NEXT: s_addc_u32 s37, s37, 0
1158 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1159 ; VI-NEXT: s_getpc_b64 s[4:5]
1160 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1161 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1162 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1163 ; VI-NEXT: s_mov_b32 s32, 0
1164 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1167 ; CI-LABEL: test_call_external_void_func_v2i64:
1169 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1170 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1171 ; CI-NEXT: s_mov_b32 s38, -1
1172 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1173 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1174 ; CI-NEXT: s_mov_b32 s0, 0
1175 ; CI-NEXT: s_add_u32 s36, s36, s3
1176 ; CI-NEXT: s_mov_b32 s3, 0xf000
1177 ; CI-NEXT: s_mov_b32 s2, -1
1178 ; CI-NEXT: s_mov_b32 s1, s0
1179 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1180 ; CI-NEXT: s_addc_u32 s37, s37, 0
1181 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1182 ; CI-NEXT: s_getpc_b64 s[4:5]
1183 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1184 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1185 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1186 ; CI-NEXT: s_mov_b32 s32, 0
1187 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1190 ; GFX9-LABEL: test_call_external_void_func_v2i64:
1192 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1193 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1194 ; GFX9-NEXT: s_mov_b32 s38, -1
1195 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1196 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1197 ; GFX9-NEXT: s_mov_b32 s0, 0
1198 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1199 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
1200 ; GFX9-NEXT: s_mov_b32 s2, -1
1201 ; GFX9-NEXT: s_mov_b32 s1, s0
1202 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1203 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1204 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1205 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1206 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1207 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1208 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1209 ; GFX9-NEXT: s_mov_b32 s32, 0
1210 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1211 ; GFX9-NEXT: s_endpgm
1213 ; GFX11-LABEL: test_call_external_void_func_v2i64:
1215 ; GFX11-NEXT: s_mov_b32 s4, 0
1216 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
1217 ; GFX11-NEXT: s_mov_b32 s6, -1
1218 ; GFX11-NEXT: s_mov_b32 s5, s4
1219 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1220 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4
1221 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12
1222 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
1223 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1224 ; GFX11-NEXT: s_mov_b32 s32, 0
1225 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1226 ; GFX11-NEXT: s_endpgm
1228 ; HSA-LABEL: test_call_external_void_func_v2i64:
1230 ; HSA-NEXT: s_add_i32 s6, s6, s9
1231 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1232 ; HSA-NEXT: s_mov_b32 s8, 0
1233 ; HSA-NEXT: s_add_u32 s0, s0, s9
1234 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
1235 ; HSA-NEXT: s_mov_b32 s10, -1
1236 ; HSA-NEXT: s_mov_b32 s9, s8
1237 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1238 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1239 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1240 ; HSA-NEXT: s_getpc_b64 s[8:9]
1241 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4
1242 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12
1243 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1244 ; HSA-NEXT: s_mov_b32 s32, 0
1245 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1246 ; HSA-NEXT: s_endpgm
1247 %val = load <2 x i64>, ptr addrspace(1) null
1248 call void @external_void_func_v2i64(<2 x i64> %val)
1252 define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
1253 ; VI-LABEL: test_call_external_void_func_v2i64_imm:
1255 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1256 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1257 ; VI-NEXT: s_mov_b32 s38, -1
1258 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1259 ; VI-NEXT: s_add_u32 s36, s36, s3
1260 ; VI-NEXT: s_addc_u32 s37, s37, 0
1261 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1262 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1263 ; VI-NEXT: s_getpc_b64 s[4:5]
1264 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1265 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1266 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1267 ; VI-NEXT: v_mov_b32_e32 v0, 1
1268 ; VI-NEXT: v_mov_b32_e32 v1, 2
1269 ; VI-NEXT: v_mov_b32_e32 v2, 3
1270 ; VI-NEXT: v_mov_b32_e32 v3, 4
1271 ; VI-NEXT: s_mov_b32 s32, 0
1272 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1275 ; CI-LABEL: test_call_external_void_func_v2i64_imm:
1277 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1278 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1279 ; CI-NEXT: s_mov_b32 s38, -1
1280 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1281 ; CI-NEXT: s_add_u32 s36, s36, s3
1282 ; CI-NEXT: s_addc_u32 s37, s37, 0
1283 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1284 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1285 ; CI-NEXT: s_getpc_b64 s[4:5]
1286 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1287 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1288 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1289 ; CI-NEXT: v_mov_b32_e32 v0, 1
1290 ; CI-NEXT: v_mov_b32_e32 v1, 2
1291 ; CI-NEXT: v_mov_b32_e32 v2, 3
1292 ; CI-NEXT: v_mov_b32_e32 v3, 4
1293 ; CI-NEXT: s_mov_b32 s32, 0
1294 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1297 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
1299 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1300 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1301 ; GFX9-NEXT: s_mov_b32 s38, -1
1302 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1303 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1304 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1305 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1306 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1307 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1308 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1309 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1310 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1311 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
1312 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
1313 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
1314 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
1315 ; GFX9-NEXT: s_mov_b32 s32, 0
1316 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1317 ; GFX9-NEXT: s_endpgm
1319 ; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
1321 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
1322 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
1323 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1324 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4
1325 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12
1326 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1327 ; GFX11-NEXT: s_mov_b32 s32, 0
1328 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1329 ; GFX11-NEXT: s_endpgm
1331 ; HSA-LABEL: test_call_external_void_func_v2i64_imm:
1333 ; HSA-NEXT: s_add_i32 s6, s6, s9
1334 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1335 ; HSA-NEXT: s_add_u32 s0, s0, s9
1336 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1337 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1338 ; HSA-NEXT: s_getpc_b64 s[8:9]
1339 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4
1340 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12
1341 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1342 ; HSA-NEXT: v_mov_b32_e32 v0, 1
1343 ; HSA-NEXT: v_mov_b32_e32 v1, 2
1344 ; HSA-NEXT: v_mov_b32_e32 v2, 3
1345 ; HSA-NEXT: v_mov_b32_e32 v3, 4
1346 ; HSA-NEXT: s_mov_b32 s32, 0
1347 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1348 ; HSA-NEXT: s_endpgm
1349 call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
1353 define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
1354 ; VI-LABEL: test_call_external_void_func_v3i64:
1356 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1357 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1358 ; VI-NEXT: s_mov_b32 s38, -1
1359 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1360 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1361 ; VI-NEXT: s_mov_b32 s0, 0
1362 ; VI-NEXT: s_add_u32 s36, s36, s3
1363 ; VI-NEXT: s_mov_b32 s3, 0xf000
1364 ; VI-NEXT: s_mov_b32 s2, -1
1365 ; VI-NEXT: s_mov_b32 s1, s0
1366 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1367 ; VI-NEXT: s_addc_u32 s37, s37, 0
1368 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1369 ; VI-NEXT: s_getpc_b64 s[4:5]
1370 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1371 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1372 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1373 ; VI-NEXT: v_mov_b32_e32 v4, 1
1374 ; VI-NEXT: v_mov_b32_e32 v5, 2
1375 ; VI-NEXT: s_mov_b32 s32, 0
1376 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1379 ; CI-LABEL: test_call_external_void_func_v3i64:
1381 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1382 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1383 ; CI-NEXT: s_mov_b32 s38, -1
1384 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1385 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1386 ; CI-NEXT: s_mov_b32 s0, 0
1387 ; CI-NEXT: s_add_u32 s36, s36, s3
1388 ; CI-NEXT: s_mov_b32 s3, 0xf000
1389 ; CI-NEXT: s_mov_b32 s2, -1
1390 ; CI-NEXT: s_mov_b32 s1, s0
1391 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1392 ; CI-NEXT: s_addc_u32 s37, s37, 0
1393 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1394 ; CI-NEXT: s_getpc_b64 s[4:5]
1395 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1396 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1397 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1398 ; CI-NEXT: v_mov_b32_e32 v4, 1
1399 ; CI-NEXT: v_mov_b32_e32 v5, 2
1400 ; CI-NEXT: s_mov_b32 s32, 0
1401 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1404 ; GFX9-LABEL: test_call_external_void_func_v3i64:
1406 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1407 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1408 ; GFX9-NEXT: s_mov_b32 s38, -1
1409 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1410 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1411 ; GFX9-NEXT: s_mov_b32 s0, 0
1412 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1413 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
1414 ; GFX9-NEXT: s_mov_b32 s2, -1
1415 ; GFX9-NEXT: s_mov_b32 s1, s0
1416 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1417 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1418 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1419 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1420 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1421 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1422 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1423 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1424 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1425 ; GFX9-NEXT: s_mov_b32 s32, 0
1426 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1427 ; GFX9-NEXT: s_endpgm
1429 ; GFX11-LABEL: test_call_external_void_func_v3i64:
1431 ; GFX11-NEXT: s_mov_b32 s4, 0
1432 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
1433 ; GFX11-NEXT: s_mov_b32 s6, -1
1434 ; GFX11-NEXT: s_mov_b32 s5, s4
1435 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2
1436 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
1437 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1438 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i64@rel32@lo+4
1439 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i64@rel32@hi+12
1440 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1441 ; GFX11-NEXT: s_mov_b32 s32, 0
1442 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1443 ; GFX11-NEXT: s_endpgm
1445 ; HSA-LABEL: test_call_external_void_func_v3i64:
1447 ; HSA-NEXT: s_add_i32 s6, s6, s9
1448 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1449 ; HSA-NEXT: s_mov_b32 s8, 0
1450 ; HSA-NEXT: s_add_u32 s0, s0, s9
1451 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
1452 ; HSA-NEXT: s_mov_b32 s10, -1
1453 ; HSA-NEXT: s_mov_b32 s9, s8
1454 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1455 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1456 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1457 ; HSA-NEXT: s_getpc_b64 s[8:9]
1458 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i64@rel32@lo+4
1459 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i64@rel32@hi+12
1460 ; HSA-NEXT: v_mov_b32_e32 v4, 1
1461 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1462 ; HSA-NEXT: v_mov_b32_e32 v5, 2
1463 ; HSA-NEXT: s_mov_b32 s32, 0
1464 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1465 ; HSA-NEXT: s_endpgm
1466 %load = load <2 x i64>, ptr addrspace(1) null
1467 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
1469 call void @external_void_func_v3i64(<3 x i64> %val)
1473 define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
1474 ; VI-LABEL: test_call_external_void_func_v4i64:
1476 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1477 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1478 ; VI-NEXT: s_mov_b32 s38, -1
1479 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1480 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1481 ; VI-NEXT: s_mov_b32 s0, 0
1482 ; VI-NEXT: s_add_u32 s36, s36, s3
1483 ; VI-NEXT: s_mov_b32 s3, 0xf000
1484 ; VI-NEXT: s_mov_b32 s2, -1
1485 ; VI-NEXT: s_mov_b32 s1, s0
1486 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1487 ; VI-NEXT: s_addc_u32 s37, s37, 0
1488 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1489 ; VI-NEXT: s_getpc_b64 s[4:5]
1490 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1491 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1492 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1493 ; VI-NEXT: v_mov_b32_e32 v4, 1
1494 ; VI-NEXT: v_mov_b32_e32 v5, 2
1495 ; VI-NEXT: v_mov_b32_e32 v6, 3
1496 ; VI-NEXT: v_mov_b32_e32 v7, 4
1497 ; VI-NEXT: s_mov_b32 s32, 0
1498 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1501 ; CI-LABEL: test_call_external_void_func_v4i64:
1503 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1504 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1505 ; CI-NEXT: s_mov_b32 s38, -1
1506 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1507 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1508 ; CI-NEXT: s_mov_b32 s0, 0
1509 ; CI-NEXT: s_add_u32 s36, s36, s3
1510 ; CI-NEXT: s_mov_b32 s3, 0xf000
1511 ; CI-NEXT: s_mov_b32 s2, -1
1512 ; CI-NEXT: s_mov_b32 s1, s0
1513 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1514 ; CI-NEXT: s_addc_u32 s37, s37, 0
1515 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1516 ; CI-NEXT: s_getpc_b64 s[4:5]
1517 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1518 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1519 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1520 ; CI-NEXT: v_mov_b32_e32 v4, 1
1521 ; CI-NEXT: v_mov_b32_e32 v5, 2
1522 ; CI-NEXT: v_mov_b32_e32 v6, 3
1523 ; CI-NEXT: v_mov_b32_e32 v7, 4
1524 ; CI-NEXT: s_mov_b32 s32, 0
1525 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1528 ; GFX9-LABEL: test_call_external_void_func_v4i64:
1530 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1531 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1532 ; GFX9-NEXT: s_mov_b32 s38, -1
1533 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1534 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1535 ; GFX9-NEXT: s_mov_b32 s0, 0
1536 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1537 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
1538 ; GFX9-NEXT: s_mov_b32 s2, -1
1539 ; GFX9-NEXT: s_mov_b32 s1, s0
1540 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1541 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1542 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1543 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1544 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1545 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1546 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1547 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1548 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1549 ; GFX9-NEXT: v_mov_b32_e32 v6, 3
1550 ; GFX9-NEXT: v_mov_b32_e32 v7, 4
1551 ; GFX9-NEXT: s_mov_b32 s32, 0
1552 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1553 ; GFX9-NEXT: s_endpgm
1555 ; GFX11-LABEL: test_call_external_void_func_v4i64:
1557 ; GFX11-NEXT: s_mov_b32 s4, 0
1558 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
1559 ; GFX11-NEXT: s_mov_b32 s6, -1
1560 ; GFX11-NEXT: s_mov_b32 s5, s4
1561 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2
1562 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
1563 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4
1564 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1565 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i64@rel32@lo+4
1566 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i64@rel32@hi+12
1567 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1568 ; GFX11-NEXT: s_mov_b32 s32, 0
1569 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1570 ; GFX11-NEXT: s_endpgm
1572 ; HSA-LABEL: test_call_external_void_func_v4i64:
1574 ; HSA-NEXT: s_add_i32 s6, s6, s9
1575 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1576 ; HSA-NEXT: s_mov_b32 s8, 0
1577 ; HSA-NEXT: s_add_u32 s0, s0, s9
1578 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
1579 ; HSA-NEXT: s_mov_b32 s10, -1
1580 ; HSA-NEXT: s_mov_b32 s9, s8
1581 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1582 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1583 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1584 ; HSA-NEXT: s_getpc_b64 s[8:9]
1585 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i64@rel32@lo+4
1586 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i64@rel32@hi+12
1587 ; HSA-NEXT: v_mov_b32_e32 v4, 1
1588 ; HSA-NEXT: v_mov_b32_e32 v5, 2
1589 ; HSA-NEXT: v_mov_b32_e32 v6, 3
1590 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1591 ; HSA-NEXT: v_mov_b32_e32 v7, 4
1592 ; HSA-NEXT: s_mov_b32 s32, 0
1593 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1594 ; HSA-NEXT: s_endpgm
1595 %load = load <2 x i64>, ptr addrspace(1) null
1596 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1597 call void @external_void_func_v4i64(<4 x i64> %val)
1601 define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
1602 ; VI-LABEL: test_call_external_void_func_f16_imm:
1604 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1605 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1606 ; VI-NEXT: s_mov_b32 s38, -1
1607 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1608 ; VI-NEXT: s_add_u32 s36, s36, s3
1609 ; VI-NEXT: s_addc_u32 s37, s37, 0
1610 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1611 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1612 ; VI-NEXT: s_getpc_b64 s[4:5]
1613 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1614 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1615 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1616 ; VI-NEXT: v_mov_b32_e32 v0, 0x4400
1617 ; VI-NEXT: s_mov_b32 s32, 0
1618 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1621 ; CI-LABEL: test_call_external_void_func_f16_imm:
1623 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1624 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1625 ; CI-NEXT: s_mov_b32 s38, -1
1626 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1627 ; CI-NEXT: s_add_u32 s36, s36, s3
1628 ; CI-NEXT: s_addc_u32 s37, s37, 0
1629 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1630 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1631 ; CI-NEXT: s_getpc_b64 s[4:5]
1632 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1633 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1634 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1635 ; CI-NEXT: v_mov_b32_e32 v0, 4.0
1636 ; CI-NEXT: s_mov_b32 s32, 0
1637 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1640 ; GFX9-LABEL: test_call_external_void_func_f16_imm:
1642 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1643 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1644 ; GFX9-NEXT: s_mov_b32 s38, -1
1645 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1646 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1647 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1648 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1649 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1650 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1651 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1652 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1653 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1654 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
1655 ; GFX9-NEXT: s_mov_b32 s32, 0
1656 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1657 ; GFX9-NEXT: s_endpgm
1659 ; GFX11-LABEL: test_call_external_void_func_f16_imm:
1661 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400
1662 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1663 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
1664 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
1665 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1666 ; GFX11-NEXT: s_mov_b32 s32, 0
1667 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1668 ; GFX11-NEXT: s_endpgm
1670 ; HSA-LABEL: test_call_external_void_func_f16_imm:
1672 ; HSA-NEXT: s_add_i32 s6, s6, s9
1673 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1674 ; HSA-NEXT: s_add_u32 s0, s0, s9
1675 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1676 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1677 ; HSA-NEXT: s_getpc_b64 s[8:9]
1678 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f16@rel32@lo+4
1679 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f16@rel32@hi+12
1680 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1681 ; HSA-NEXT: v_mov_b32_e32 v0, 0x4400
1682 ; HSA-NEXT: s_mov_b32 s32, 0
1683 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1684 ; HSA-NEXT: s_endpgm
1685 call void @external_void_func_f16(half 4.0)
1689 define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
1690 ; VI-LABEL: test_call_external_void_func_f32_imm:
1692 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1693 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1694 ; VI-NEXT: s_mov_b32 s38, -1
1695 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1696 ; VI-NEXT: s_add_u32 s36, s36, s3
1697 ; VI-NEXT: s_addc_u32 s37, s37, 0
1698 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1699 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1700 ; VI-NEXT: s_getpc_b64 s[4:5]
1701 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1702 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1703 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1704 ; VI-NEXT: v_mov_b32_e32 v0, 4.0
1705 ; VI-NEXT: s_mov_b32 s32, 0
1706 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1709 ; CI-LABEL: test_call_external_void_func_f32_imm:
1711 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1712 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1713 ; CI-NEXT: s_mov_b32 s38, -1
1714 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1715 ; CI-NEXT: s_add_u32 s36, s36, s3
1716 ; CI-NEXT: s_addc_u32 s37, s37, 0
1717 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1718 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1719 ; CI-NEXT: s_getpc_b64 s[4:5]
1720 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1721 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1722 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1723 ; CI-NEXT: v_mov_b32_e32 v0, 4.0
1724 ; CI-NEXT: s_mov_b32 s32, 0
1725 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1728 ; GFX9-LABEL: test_call_external_void_func_f32_imm:
1730 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1731 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1732 ; GFX9-NEXT: s_mov_b32 s38, -1
1733 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1734 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1735 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1736 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1737 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1738 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1739 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1740 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1741 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1742 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
1743 ; GFX9-NEXT: s_mov_b32 s32, 0
1744 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1745 ; GFX9-NEXT: s_endpgm
1747 ; GFX11-LABEL: test_call_external_void_func_f32_imm:
1749 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
1750 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1751 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f32@rel32@lo+4
1752 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f32@rel32@hi+12
1753 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1754 ; GFX11-NEXT: s_mov_b32 s32, 0
1755 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1756 ; GFX11-NEXT: s_endpgm
1758 ; HSA-LABEL: test_call_external_void_func_f32_imm:
1760 ; HSA-NEXT: s_add_i32 s6, s6, s9
1761 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1762 ; HSA-NEXT: s_add_u32 s0, s0, s9
1763 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1764 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1765 ; HSA-NEXT: s_getpc_b64 s[8:9]
1766 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f32@rel32@lo+4
1767 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f32@rel32@hi+12
1768 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1769 ; HSA-NEXT: v_mov_b32_e32 v0, 4.0
1770 ; HSA-NEXT: s_mov_b32 s32, 0
1771 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1772 ; HSA-NEXT: s_endpgm
1773 call void @external_void_func_f32(float 4.0)
1777 define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
1778 ; VI-LABEL: test_call_external_void_func_v2f32_imm:
1780 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1781 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1782 ; VI-NEXT: s_mov_b32 s38, -1
1783 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1784 ; VI-NEXT: s_add_u32 s36, s36, s3
1785 ; VI-NEXT: s_addc_u32 s37, s37, 0
1786 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1787 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1788 ; VI-NEXT: s_getpc_b64 s[4:5]
1789 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1790 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1791 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1792 ; VI-NEXT: v_mov_b32_e32 v0, 1.0
1793 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
1794 ; VI-NEXT: s_mov_b32 s32, 0
1795 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1798 ; CI-LABEL: test_call_external_void_func_v2f32_imm:
1800 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1801 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1802 ; CI-NEXT: s_mov_b32 s38, -1
1803 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1804 ; CI-NEXT: s_add_u32 s36, s36, s3
1805 ; CI-NEXT: s_addc_u32 s37, s37, 0
1806 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1807 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1808 ; CI-NEXT: s_getpc_b64 s[4:5]
1809 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1810 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1811 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1812 ; CI-NEXT: v_mov_b32_e32 v0, 1.0
1813 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
1814 ; CI-NEXT: s_mov_b32 s32, 0
1815 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1818 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
1820 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1821 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1822 ; GFX9-NEXT: s_mov_b32 s38, -1
1823 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1824 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1825 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1826 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1827 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1828 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1829 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1830 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1831 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1832 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
1833 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
1834 ; GFX9-NEXT: s_mov_b32 s32, 0
1835 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1836 ; GFX9-NEXT: s_endpgm
1838 ; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
1840 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
1841 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1842 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f32@rel32@lo+4
1843 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f32@rel32@hi+12
1844 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1845 ; GFX11-NEXT: s_mov_b32 s32, 0
1846 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1847 ; GFX11-NEXT: s_endpgm
1849 ; HSA-LABEL: test_call_external_void_func_v2f32_imm:
1851 ; HSA-NEXT: s_add_i32 s6, s6, s9
1852 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1853 ; HSA-NEXT: s_add_u32 s0, s0, s9
1854 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1855 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1856 ; HSA-NEXT: s_getpc_b64 s[8:9]
1857 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f32@rel32@lo+4
1858 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f32@rel32@hi+12
1859 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1860 ; HSA-NEXT: v_mov_b32_e32 v0, 1.0
1861 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
1862 ; HSA-NEXT: s_mov_b32 s32, 0
1863 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1864 ; HSA-NEXT: s_endpgm
1865 call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
1869 define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
1870 ; VI-LABEL: test_call_external_void_func_v3f32_imm:
1872 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1873 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1874 ; VI-NEXT: s_mov_b32 s38, -1
1875 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1876 ; VI-NEXT: s_add_u32 s36, s36, s3
1877 ; VI-NEXT: s_addc_u32 s37, s37, 0
1878 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1879 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1880 ; VI-NEXT: s_getpc_b64 s[4:5]
1881 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1882 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1883 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1884 ; VI-NEXT: v_mov_b32_e32 v0, 1.0
1885 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
1886 ; VI-NEXT: v_mov_b32_e32 v2, 4.0
1887 ; VI-NEXT: s_mov_b32 s32, 0
1888 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1891 ; CI-LABEL: test_call_external_void_func_v3f32_imm:
1893 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1894 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1895 ; CI-NEXT: s_mov_b32 s38, -1
1896 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1897 ; CI-NEXT: s_add_u32 s36, s36, s3
1898 ; CI-NEXT: s_addc_u32 s37, s37, 0
1899 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1900 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1901 ; CI-NEXT: s_getpc_b64 s[4:5]
1902 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1903 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1904 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1905 ; CI-NEXT: v_mov_b32_e32 v0, 1.0
1906 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
1907 ; CI-NEXT: v_mov_b32_e32 v2, 4.0
1908 ; CI-NEXT: s_mov_b32 s32, 0
1909 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1912 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
1914 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1915 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1916 ; GFX9-NEXT: s_mov_b32 s38, -1
1917 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1918 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1919 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1920 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1921 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1922 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1923 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1924 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1925 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1926 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
1927 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
1928 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
1929 ; GFX9-NEXT: s_mov_b32 s32, 0
1930 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1931 ; GFX9-NEXT: s_endpgm
1933 ; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
1935 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
1936 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0
1937 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1938 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f32@rel32@lo+4
1939 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f32@rel32@hi+12
1940 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1941 ; GFX11-NEXT: s_mov_b32 s32, 0
1942 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1943 ; GFX11-NEXT: s_endpgm
1945 ; HSA-LABEL: test_call_external_void_func_v3f32_imm:
1947 ; HSA-NEXT: s_add_i32 s6, s6, s9
1948 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1949 ; HSA-NEXT: s_add_u32 s0, s0, s9
1950 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1951 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1952 ; HSA-NEXT: s_getpc_b64 s[8:9]
1953 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f32@rel32@lo+4
1954 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32@rel32@hi+12
1955 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1956 ; HSA-NEXT: v_mov_b32_e32 v0, 1.0
1957 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
1958 ; HSA-NEXT: v_mov_b32_e32 v2, 4.0
1959 ; HSA-NEXT: s_mov_b32 s32, 0
1960 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1961 ; HSA-NEXT: s_endpgm
1962 call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
1966 define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
1967 ; VI-LABEL: test_call_external_void_func_v5f32_imm:
1969 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1970 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1971 ; VI-NEXT: s_mov_b32 s38, -1
1972 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1973 ; VI-NEXT: s_add_u32 s36, s36, s3
1974 ; VI-NEXT: s_addc_u32 s37, s37, 0
1975 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1976 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1977 ; VI-NEXT: s_getpc_b64 s[4:5]
1978 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
1979 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
1980 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1981 ; VI-NEXT: v_mov_b32_e32 v0, 1.0
1982 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
1983 ; VI-NEXT: v_mov_b32_e32 v2, 4.0
1984 ; VI-NEXT: v_mov_b32_e32 v3, -1.0
1985 ; VI-NEXT: v_mov_b32_e32 v4, 0.5
1986 ; VI-NEXT: s_mov_b32 s32, 0
1987 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1990 ; CI-LABEL: test_call_external_void_func_v5f32_imm:
1992 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1993 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1994 ; CI-NEXT: s_mov_b32 s38, -1
1995 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1996 ; CI-NEXT: s_add_u32 s36, s36, s3
1997 ; CI-NEXT: s_addc_u32 s37, s37, 0
1998 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1999 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2000 ; CI-NEXT: s_getpc_b64 s[4:5]
2001 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
2002 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
2003 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2004 ; CI-NEXT: v_mov_b32_e32 v0, 1.0
2005 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
2006 ; CI-NEXT: v_mov_b32_e32 v2, 4.0
2007 ; CI-NEXT: v_mov_b32_e32 v3, -1.0
2008 ; CI-NEXT: v_mov_b32_e32 v4, 0.5
2009 ; CI-NEXT: s_mov_b32 s32, 0
2010 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2013 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
2015 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2016 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2017 ; GFX9-NEXT: s_mov_b32 s38, -1
2018 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2019 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2020 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2021 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2022 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2023 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2024 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
2025 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
2026 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2027 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
2028 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2029 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
2030 ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0
2031 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5
2032 ; GFX9-NEXT: s_mov_b32 s32, 0
2033 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2034 ; GFX9-NEXT: s_endpgm
2036 ; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
2038 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2039 ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0
2040 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5
2041 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2042 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v5f32@rel32@lo+4
2043 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v5f32@rel32@hi+12
2044 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2045 ; GFX11-NEXT: s_mov_b32 s32, 0
2046 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2047 ; GFX11-NEXT: s_endpgm
2049 ; HSA-LABEL: test_call_external_void_func_v5f32_imm:
2051 ; HSA-NEXT: s_add_i32 s6, s6, s9
2052 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2053 ; HSA-NEXT: s_add_u32 s0, s0, s9
2054 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2055 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2056 ; HSA-NEXT: s_getpc_b64 s[8:9]
2057 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v5f32@rel32@lo+4
2058 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v5f32@rel32@hi+12
2059 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2060 ; HSA-NEXT: v_mov_b32_e32 v0, 1.0
2061 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
2062 ; HSA-NEXT: v_mov_b32_e32 v2, 4.0
2063 ; HSA-NEXT: v_mov_b32_e32 v3, -1.0
2064 ; HSA-NEXT: v_mov_b32_e32 v4, 0.5
2065 ; HSA-NEXT: s_mov_b32 s32, 0
2066 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2067 ; HSA-NEXT: s_endpgm
2068 call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
2072 define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
2073 ; VI-LABEL: test_call_external_void_func_f64_imm:
2075 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2076 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2077 ; VI-NEXT: s_mov_b32 s38, -1
2078 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2079 ; VI-NEXT: s_add_u32 s36, s36, s3
2080 ; VI-NEXT: s_addc_u32 s37, s37, 0
2081 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2082 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2083 ; VI-NEXT: s_getpc_b64 s[4:5]
2084 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2085 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2086 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2087 ; VI-NEXT: v_mov_b32_e32 v0, 0
2088 ; VI-NEXT: v_mov_b32_e32 v1, 0x40100000
2089 ; VI-NEXT: s_mov_b32 s32, 0
2090 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2093 ; CI-LABEL: test_call_external_void_func_f64_imm:
2095 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2096 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2097 ; CI-NEXT: s_mov_b32 s38, -1
2098 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2099 ; CI-NEXT: s_add_u32 s36, s36, s3
2100 ; CI-NEXT: s_addc_u32 s37, s37, 0
2101 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2102 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2103 ; CI-NEXT: s_getpc_b64 s[4:5]
2104 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2105 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2106 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2107 ; CI-NEXT: v_mov_b32_e32 v0, 0
2108 ; CI-NEXT: v_mov_b32_e32 v1, 0x40100000
2109 ; CI-NEXT: s_mov_b32 s32, 0
2110 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2113 ; GFX9-LABEL: test_call_external_void_func_f64_imm:
2115 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2116 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2117 ; GFX9-NEXT: s_mov_b32 s38, -1
2118 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2119 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2120 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2121 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2122 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2123 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2124 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2125 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2126 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2127 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2128 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
2129 ; GFX9-NEXT: s_mov_b32 s32, 0
2130 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2131 ; GFX9-NEXT: s_endpgm
2133 ; GFX11-LABEL: test_call_external_void_func_f64_imm:
2135 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000
2136 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2137 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f64@rel32@lo+4
2138 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f64@rel32@hi+12
2139 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2140 ; GFX11-NEXT: s_mov_b32 s32, 0
2141 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2142 ; GFX11-NEXT: s_endpgm
2144 ; HSA-LABEL: test_call_external_void_func_f64_imm:
2146 ; HSA-NEXT: s_add_i32 s6, s6, s9
2147 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2148 ; HSA-NEXT: s_add_u32 s0, s0, s9
2149 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2150 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2151 ; HSA-NEXT: s_getpc_b64 s[8:9]
2152 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f64@rel32@lo+4
2153 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f64@rel32@hi+12
2154 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2155 ; HSA-NEXT: v_mov_b32_e32 v0, 0
2156 ; HSA-NEXT: v_mov_b32_e32 v1, 0x40100000
2157 ; HSA-NEXT: s_mov_b32 s32, 0
2158 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2159 ; HSA-NEXT: s_endpgm
2160 call void @external_void_func_f64(double 4.0)
2164 define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
2165 ; VI-LABEL: test_call_external_void_func_v2f64_imm:
2167 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2168 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2169 ; VI-NEXT: s_mov_b32 s38, -1
2170 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2171 ; VI-NEXT: s_add_u32 s36, s36, s3
2172 ; VI-NEXT: s_addc_u32 s37, s37, 0
2173 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2174 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2175 ; VI-NEXT: s_getpc_b64 s[4:5]
2176 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2177 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2178 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2179 ; VI-NEXT: v_mov_b32_e32 v0, 0
2180 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
2181 ; VI-NEXT: v_mov_b32_e32 v2, 0
2182 ; VI-NEXT: v_mov_b32_e32 v3, 0x40100000
2183 ; VI-NEXT: s_mov_b32 s32, 0
2184 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2187 ; CI-LABEL: test_call_external_void_func_v2f64_imm:
2189 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2190 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2191 ; CI-NEXT: s_mov_b32 s38, -1
2192 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2193 ; CI-NEXT: s_add_u32 s36, s36, s3
2194 ; CI-NEXT: s_addc_u32 s37, s37, 0
2195 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2196 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2197 ; CI-NEXT: s_getpc_b64 s[4:5]
2198 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2199 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2200 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2201 ; CI-NEXT: v_mov_b32_e32 v0, 0
2202 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
2203 ; CI-NEXT: v_mov_b32_e32 v2, 0
2204 ; CI-NEXT: v_mov_b32_e32 v3, 0x40100000
2205 ; CI-NEXT: s_mov_b32 s32, 0
2206 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2209 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
2211 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2212 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2213 ; GFX9-NEXT: s_mov_b32 s38, -1
2214 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2215 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2216 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2217 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2218 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2219 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2220 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2221 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2222 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2223 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2224 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2225 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2226 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2227 ; GFX9-NEXT: s_mov_b32 s32, 0
2228 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2229 ; GFX9-NEXT: s_endpgm
2231 ; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
2233 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2234 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2235 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2236 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f64@rel32@lo+4
2237 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f64@rel32@hi+12
2238 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2239 ; GFX11-NEXT: s_mov_b32 s32, 0
2240 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2241 ; GFX11-NEXT: s_endpgm
2243 ; HSA-LABEL: test_call_external_void_func_v2f64_imm:
2245 ; HSA-NEXT: s_add_i32 s6, s6, s9
2246 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2247 ; HSA-NEXT: s_add_u32 s0, s0, s9
2248 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2249 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2250 ; HSA-NEXT: s_getpc_b64 s[8:9]
2251 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f64@rel32@lo+4
2252 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64@rel32@hi+12
2253 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2254 ; HSA-NEXT: v_mov_b32_e32 v0, 0
2255 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
2256 ; HSA-NEXT: v_mov_b32_e32 v2, 0
2257 ; HSA-NEXT: v_mov_b32_e32 v3, 0x40100000
2258 ; HSA-NEXT: s_mov_b32 s32, 0
2259 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2260 ; HSA-NEXT: s_endpgm
2261 call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
2265 define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
2266 ; VI-LABEL: test_call_external_void_func_v3f64_imm:
2268 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2269 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2270 ; VI-NEXT: s_mov_b32 s38, -1
2271 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2272 ; VI-NEXT: s_add_u32 s36, s36, s3
2273 ; VI-NEXT: s_addc_u32 s37, s37, 0
2274 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2275 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2276 ; VI-NEXT: s_getpc_b64 s[4:5]
2277 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2278 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2279 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2280 ; VI-NEXT: v_mov_b32_e32 v0, 0
2281 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
2282 ; VI-NEXT: v_mov_b32_e32 v2, 0
2283 ; VI-NEXT: v_mov_b32_e32 v3, 0x40100000
2284 ; VI-NEXT: v_mov_b32_e32 v4, 0
2285 ; VI-NEXT: v_mov_b32_e32 v5, 0x40200000
2286 ; VI-NEXT: s_mov_b32 s32, 0
2287 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2290 ; CI-LABEL: test_call_external_void_func_v3f64_imm:
2292 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2293 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2294 ; CI-NEXT: s_mov_b32 s38, -1
2295 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2296 ; CI-NEXT: s_add_u32 s36, s36, s3
2297 ; CI-NEXT: s_addc_u32 s37, s37, 0
2298 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2299 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2300 ; CI-NEXT: s_getpc_b64 s[4:5]
2301 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2302 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2303 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2304 ; CI-NEXT: v_mov_b32_e32 v0, 0
2305 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
2306 ; CI-NEXT: v_mov_b32_e32 v2, 0
2307 ; CI-NEXT: v_mov_b32_e32 v3, 0x40100000
2308 ; CI-NEXT: v_mov_b32_e32 v4, 0
2309 ; CI-NEXT: v_mov_b32_e32 v5, 0x40200000
2310 ; CI-NEXT: s_mov_b32 s32, 0
2311 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2314 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
2316 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2317 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2318 ; GFX9-NEXT: s_mov_b32 s38, -1
2319 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2320 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2321 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2322 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2323 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2324 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2325 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2326 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2327 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2328 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2329 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2330 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2331 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2332 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
2333 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000
2334 ; GFX9-NEXT: s_mov_b32 s32, 0
2335 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2336 ; GFX9-NEXT: s_endpgm
2338 ; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
2340 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2341 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2342 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000
2343 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2344 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f64@rel32@lo+4
2345 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f64@rel32@hi+12
2346 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2347 ; GFX11-NEXT: s_mov_b32 s32, 0
2348 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2349 ; GFX11-NEXT: s_endpgm
2351 ; HSA-LABEL: test_call_external_void_func_v3f64_imm:
2353 ; HSA-NEXT: s_add_i32 s6, s6, s9
2354 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2355 ; HSA-NEXT: s_add_u32 s0, s0, s9
2356 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2357 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2358 ; HSA-NEXT: s_getpc_b64 s[8:9]
2359 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f64@rel32@lo+4
2360 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f64@rel32@hi+12
2361 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2362 ; HSA-NEXT: v_mov_b32_e32 v0, 0
2363 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
2364 ; HSA-NEXT: v_mov_b32_e32 v2, 0
2365 ; HSA-NEXT: v_mov_b32_e32 v3, 0x40100000
2366 ; HSA-NEXT: v_mov_b32_e32 v4, 0
2367 ; HSA-NEXT: v_mov_b32_e32 v5, 0x40200000
2368 ; HSA-NEXT: s_mov_b32 s32, 0
2369 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2370 ; HSA-NEXT: s_endpgm
2371 call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
2375 define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
2376 ; VI-LABEL: test_call_external_void_func_v2i16:
2378 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2379 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2380 ; VI-NEXT: s_mov_b32 s38, -1
2381 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2382 ; VI-NEXT: s_add_u32 s36, s36, s3
2383 ; VI-NEXT: s_mov_b32 s3, 0xf000
2384 ; VI-NEXT: s_mov_b32 s2, -1
2385 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
2386 ; VI-NEXT: s_addc_u32 s37, s37, 0
2387 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2388 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2389 ; VI-NEXT: s_getpc_b64 s[4:5]
2390 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2391 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2392 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2393 ; VI-NEXT: s_mov_b32 s32, 0
2394 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2397 ; CI-LABEL: test_call_external_void_func_v2i16:
2399 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2400 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2401 ; CI-NEXT: s_mov_b32 s38, -1
2402 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2403 ; CI-NEXT: s_add_u32 s36, s36, s3
2404 ; CI-NEXT: s_mov_b32 s3, 0xf000
2405 ; CI-NEXT: s_mov_b32 s2, -1
2406 ; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0
2407 ; CI-NEXT: s_addc_u32 s37, s37, 0
2408 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2409 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2410 ; CI-NEXT: s_getpc_b64 s[4:5]
2411 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2412 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2413 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2414 ; CI-NEXT: s_mov_b32 s32, 0
2415 ; CI-NEXT: s_waitcnt vmcnt(0)
2416 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2417 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2420 ; GFX9-LABEL: test_call_external_void_func_v2i16:
2422 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2423 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2424 ; GFX9-NEXT: s_mov_b32 s38, -1
2425 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2426 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2427 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
2428 ; GFX9-NEXT: s_mov_b32 s2, -1
2429 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
2430 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2431 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2432 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2433 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2434 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2435 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2436 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2437 ; GFX9-NEXT: s_mov_b32 s32, 0
2438 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2439 ; GFX9-NEXT: s_endpgm
2441 ; GFX11-LABEL: test_call_external_void_func_v2i16:
2443 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
2444 ; GFX11-NEXT: s_mov_b32 s2, -1
2445 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2446 ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
2447 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2448 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i16@rel32@lo+4
2449 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i16@rel32@hi+12
2450 ; GFX11-NEXT: s_mov_b32 s32, 0
2451 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2452 ; GFX11-NEXT: s_endpgm
2454 ; HSA-LABEL: test_call_external_void_func_v2i16:
2456 ; HSA-NEXT: s_add_i32 s6, s6, s9
2457 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2458 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2459 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
2460 ; HSA-NEXT: s_mov_b32 s6, -1
2461 ; HSA-NEXT: buffer_load_dword v0, off, s[4:7], 0
2462 ; HSA-NEXT: s_add_u32 s0, s0, s9
2463 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2464 ; HSA-NEXT: s_getpc_b64 s[8:9]
2465 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i16@rel32@lo+4
2466 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i16@rel32@hi+12
2467 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2468 ; HSA-NEXT: s_mov_b32 s32, 0
2469 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2470 ; HSA-NEXT: s_endpgm
2471 %val = load <2 x i16>, ptr addrspace(1) undef
2472 call void @external_void_func_v2i16(<2 x i16> %val)
2476 define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
2477 ; VI-LABEL: test_call_external_void_func_v3i16:
2479 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2480 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2481 ; VI-NEXT: s_mov_b32 s38, -1
2482 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2483 ; VI-NEXT: s_add_u32 s36, s36, s3
2484 ; VI-NEXT: s_mov_b32 s3, 0xf000
2485 ; VI-NEXT: s_mov_b32 s2, -1
2486 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2487 ; VI-NEXT: s_addc_u32 s37, s37, 0
2488 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2489 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2490 ; VI-NEXT: s_getpc_b64 s[4:5]
2491 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2492 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2493 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2494 ; VI-NEXT: s_mov_b32 s32, 0
2495 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2498 ; CI-LABEL: test_call_external_void_func_v3i16:
2500 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2501 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2502 ; CI-NEXT: s_mov_b32 s38, -1
2503 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2504 ; CI-NEXT: s_add_u32 s36, s36, s3
2505 ; CI-NEXT: s_mov_b32 s3, 0xf000
2506 ; CI-NEXT: s_mov_b32 s2, -1
2507 ; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0
2508 ; CI-NEXT: s_addc_u32 s37, s37, 0
2509 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2510 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2511 ; CI-NEXT: s_getpc_b64 s[4:5]
2512 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2513 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2514 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2515 ; CI-NEXT: s_mov_b32 s32, 0
2516 ; CI-NEXT: s_waitcnt vmcnt(0)
2517 ; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16
2518 ; CI-NEXT: v_mov_b32_e32 v0, v2
2519 ; CI-NEXT: v_mov_b32_e32 v2, v3
2520 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2523 ; GFX9-LABEL: test_call_external_void_func_v3i16:
2525 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2526 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2527 ; GFX9-NEXT: s_mov_b32 s38, -1
2528 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2529 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2530 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
2531 ; GFX9-NEXT: s_mov_b32 s2, -1
2532 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2533 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2534 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2535 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2536 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2537 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2538 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2539 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2540 ; GFX9-NEXT: s_mov_b32 s32, 0
2541 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2542 ; GFX9-NEXT: s_endpgm
2544 ; GFX11-LABEL: test_call_external_void_func_v3i16:
2546 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
2547 ; GFX11-NEXT: s_mov_b32 s2, -1
2548 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2549 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
2550 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2551 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4
2552 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12
2553 ; GFX11-NEXT: s_mov_b32 s32, 0
2554 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2555 ; GFX11-NEXT: s_endpgm
2557 ; HSA-LABEL: test_call_external_void_func_v3i16:
2559 ; HSA-NEXT: s_add_i32 s6, s6, s9
2560 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2561 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2562 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
2563 ; HSA-NEXT: s_mov_b32 s6, -1
2564 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2565 ; HSA-NEXT: s_add_u32 s0, s0, s9
2566 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2567 ; HSA-NEXT: s_getpc_b64 s[8:9]
2568 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4
2569 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12
2570 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2571 ; HSA-NEXT: s_mov_b32 s32, 0
2572 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2573 ; HSA-NEXT: s_endpgm
2574 %val = load <3 x i16>, ptr addrspace(1) undef
2575 call void @external_void_func_v3i16(<3 x i16> %val)
2579 define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
2580 ; VI-LABEL: test_call_external_void_func_v3f16:
2582 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2583 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2584 ; VI-NEXT: s_mov_b32 s38, -1
2585 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2586 ; VI-NEXT: s_add_u32 s36, s36, s3
2587 ; VI-NEXT: s_mov_b32 s3, 0xf000
2588 ; VI-NEXT: s_mov_b32 s2, -1
2589 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2590 ; VI-NEXT: s_addc_u32 s37, s37, 0
2591 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2592 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2593 ; VI-NEXT: s_getpc_b64 s[4:5]
2594 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2595 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2596 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2597 ; VI-NEXT: s_mov_b32 s32, 0
2598 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2601 ; CI-LABEL: test_call_external_void_func_v3f16:
2603 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2604 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2605 ; CI-NEXT: s_mov_b32 s38, -1
2606 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2607 ; CI-NEXT: s_add_u32 s36, s36, s3
2608 ; CI-NEXT: s_mov_b32 s3, 0xf000
2609 ; CI-NEXT: s_mov_b32 s2, -1
2610 ; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
2611 ; CI-NEXT: s_addc_u32 s37, s37, 0
2612 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2613 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2614 ; CI-NEXT: s_getpc_b64 s[4:5]
2615 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2616 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2617 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2618 ; CI-NEXT: s_mov_b32 s32, 0
2619 ; CI-NEXT: s_waitcnt vmcnt(0)
2620 ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1
2621 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2622 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2623 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2624 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2627 ; GFX9-LABEL: test_call_external_void_func_v3f16:
2629 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2630 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2631 ; GFX9-NEXT: s_mov_b32 s38, -1
2632 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2633 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2634 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
2635 ; GFX9-NEXT: s_mov_b32 s2, -1
2636 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2637 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2638 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2639 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2640 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2641 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2642 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2643 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2644 ; GFX9-NEXT: s_mov_b32 s32, 0
2645 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2646 ; GFX9-NEXT: s_endpgm
2648 ; GFX11-LABEL: test_call_external_void_func_v3f16:
2650 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
2651 ; GFX11-NEXT: s_mov_b32 s2, -1
2652 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2653 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
2654 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2655 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4
2656 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12
2657 ; GFX11-NEXT: s_mov_b32 s32, 0
2658 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2659 ; GFX11-NEXT: s_endpgm
2661 ; HSA-LABEL: test_call_external_void_func_v3f16:
2663 ; HSA-NEXT: s_add_i32 s6, s6, s9
2664 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2665 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2666 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
2667 ; HSA-NEXT: s_mov_b32 s6, -1
2668 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2669 ; HSA-NEXT: s_add_u32 s0, s0, s9
2670 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2671 ; HSA-NEXT: s_getpc_b64 s[8:9]
2672 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4
2673 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12
2674 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2675 ; HSA-NEXT: s_mov_b32 s32, 0
2676 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2677 ; HSA-NEXT: s_endpgm
2678 %val = load <3 x half>, ptr addrspace(1) undef
2679 call void @external_void_func_v3f16(<3 x half> %val)
2683 define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
2684 ; VI-LABEL: test_call_external_void_func_v3i16_imm:
2686 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2687 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2688 ; VI-NEXT: s_mov_b32 s38, -1
2689 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2690 ; VI-NEXT: s_add_u32 s36, s36, s3
2691 ; VI-NEXT: s_addc_u32 s37, s37, 0
2692 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2693 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2694 ; VI-NEXT: s_getpc_b64 s[4:5]
2695 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2696 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2697 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2698 ; VI-NEXT: v_mov_b32_e32 v0, 0x20001
2699 ; VI-NEXT: v_mov_b32_e32 v1, 3
2700 ; VI-NEXT: s_mov_b32 s32, 0
2701 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2704 ; CI-LABEL: test_call_external_void_func_v3i16_imm:
2706 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2707 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2708 ; CI-NEXT: s_mov_b32 s38, -1
2709 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2710 ; CI-NEXT: s_add_u32 s36, s36, s3
2711 ; CI-NEXT: s_addc_u32 s37, s37, 0
2712 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2713 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2714 ; CI-NEXT: s_getpc_b64 s[4:5]
2715 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2716 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2717 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2718 ; CI-NEXT: v_mov_b32_e32 v0, 1
2719 ; CI-NEXT: v_mov_b32_e32 v1, 2
2720 ; CI-NEXT: v_mov_b32_e32 v2, 3
2721 ; CI-NEXT: s_mov_b32 s32, 0
2722 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2725 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
2727 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2728 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2729 ; GFX9-NEXT: s_mov_b32 s38, -1
2730 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2731 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2732 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2733 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2734 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2735 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2736 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2737 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2738 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2739 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
2740 ; GFX9-NEXT: v_mov_b32_e32 v1, 3
2741 ; GFX9-NEXT: s_mov_b32 s32, 0
2742 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2743 ; GFX9-NEXT: s_endpgm
2745 ; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
2747 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
2748 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2749 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4
2750 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12
2751 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2752 ; GFX11-NEXT: s_mov_b32 s32, 0
2753 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2754 ; GFX11-NEXT: s_endpgm
2756 ; HSA-LABEL: test_call_external_void_func_v3i16_imm:
2758 ; HSA-NEXT: s_add_i32 s6, s6, s9
2759 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2760 ; HSA-NEXT: s_add_u32 s0, s0, s9
2761 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2762 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2763 ; HSA-NEXT: s_getpc_b64 s[8:9]
2764 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4
2765 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12
2766 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2767 ; HSA-NEXT: v_mov_b32_e32 v0, 0x20001
2768 ; HSA-NEXT: v_mov_b32_e32 v1, 3
2769 ; HSA-NEXT: s_mov_b32 s32, 0
2770 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2771 ; HSA-NEXT: s_endpgm
2772 call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
2776 define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
2777 ; VI-LABEL: test_call_external_void_func_v3f16_imm:
2779 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2780 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2781 ; VI-NEXT: s_mov_b32 s38, -1
2782 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2783 ; VI-NEXT: s_add_u32 s36, s36, s3
2784 ; VI-NEXT: s_addc_u32 s37, s37, 0
2785 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2786 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2787 ; VI-NEXT: s_getpc_b64 s[4:5]
2788 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2789 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2790 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2791 ; VI-NEXT: v_mov_b32_e32 v0, 0x40003c00
2792 ; VI-NEXT: v_mov_b32_e32 v1, 0x4400
2793 ; VI-NEXT: s_mov_b32 s32, 0
2794 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2797 ; CI-LABEL: test_call_external_void_func_v3f16_imm:
2799 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2800 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2801 ; CI-NEXT: s_mov_b32 s38, -1
2802 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2803 ; CI-NEXT: s_add_u32 s36, s36, s3
2804 ; CI-NEXT: s_addc_u32 s37, s37, 0
2805 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2806 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2807 ; CI-NEXT: s_getpc_b64 s[4:5]
2808 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2809 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2810 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2811 ; CI-NEXT: v_mov_b32_e32 v0, 1.0
2812 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
2813 ; CI-NEXT: v_mov_b32_e32 v2, 4.0
2814 ; CI-NEXT: s_mov_b32 s32, 0
2815 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2818 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
2820 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2821 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2822 ; GFX9-NEXT: s_mov_b32 s38, -1
2823 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2824 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2825 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2826 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2827 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2828 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2829 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2830 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2831 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2832 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
2833 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
2834 ; GFX9-NEXT: s_mov_b32 s32, 0
2835 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2836 ; GFX9-NEXT: s_endpgm
2838 ; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
2840 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00
2841 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400
2842 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2843 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4
2844 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12
2845 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2846 ; GFX11-NEXT: s_mov_b32 s32, 0
2847 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2848 ; GFX11-NEXT: s_endpgm
2850 ; HSA-LABEL: test_call_external_void_func_v3f16_imm:
2852 ; HSA-NEXT: s_add_i32 s6, s6, s9
2853 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2854 ; HSA-NEXT: s_add_u32 s0, s0, s9
2855 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2856 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2857 ; HSA-NEXT: s_getpc_b64 s[8:9]
2858 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4
2859 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12
2860 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2861 ; HSA-NEXT: v_mov_b32_e32 v0, 0x40003c00
2862 ; HSA-NEXT: v_mov_b32_e32 v1, 0x4400
2863 ; HSA-NEXT: s_mov_b32 s32, 0
2864 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2865 ; HSA-NEXT: s_endpgm
2866 call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
2870 define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
2871 ; VI-LABEL: test_call_external_void_func_v4i16:
2873 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2874 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2875 ; VI-NEXT: s_mov_b32 s38, -1
2876 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2877 ; VI-NEXT: s_add_u32 s36, s36, s3
2878 ; VI-NEXT: s_mov_b32 s3, 0xf000
2879 ; VI-NEXT: s_mov_b32 s2, -1
2880 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2881 ; VI-NEXT: s_addc_u32 s37, s37, 0
2882 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2883 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2884 ; VI-NEXT: s_getpc_b64 s[4:5]
2885 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2886 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2887 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2888 ; VI-NEXT: s_mov_b32 s32, 0
2889 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2892 ; CI-LABEL: test_call_external_void_func_v4i16:
2894 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2895 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2896 ; CI-NEXT: s_mov_b32 s38, -1
2897 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2898 ; CI-NEXT: s_add_u32 s36, s36, s3
2899 ; CI-NEXT: s_mov_b32 s3, 0xf000
2900 ; CI-NEXT: s_mov_b32 s2, -1
2901 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2902 ; CI-NEXT: s_addc_u32 s37, s37, 0
2903 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2904 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2905 ; CI-NEXT: s_getpc_b64 s[4:5]
2906 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2907 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2908 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2909 ; CI-NEXT: s_mov_b32 s32, 0
2910 ; CI-NEXT: s_waitcnt vmcnt(0)
2911 ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
2912 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1
2913 ; CI-NEXT: v_mov_b32_e32 v2, v1
2914 ; CI-NEXT: v_mov_b32_e32 v1, v4
2915 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2918 ; GFX9-LABEL: test_call_external_void_func_v4i16:
2920 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2921 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2922 ; GFX9-NEXT: s_mov_b32 s38, -1
2923 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2924 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2925 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
2926 ; GFX9-NEXT: s_mov_b32 s2, -1
2927 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2928 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2929 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2930 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2931 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2932 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2933 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2934 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2935 ; GFX9-NEXT: s_mov_b32 s32, 0
2936 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2937 ; GFX9-NEXT: s_endpgm
2939 ; GFX11-LABEL: test_call_external_void_func_v4i16:
2941 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
2942 ; GFX11-NEXT: s_mov_b32 s2, -1
2943 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2944 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
2945 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2946 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4
2947 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12
2948 ; GFX11-NEXT: s_mov_b32 s32, 0
2949 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2950 ; GFX11-NEXT: s_endpgm
2952 ; HSA-LABEL: test_call_external_void_func_v4i16:
2954 ; HSA-NEXT: s_add_i32 s6, s6, s9
2955 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2956 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2957 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
2958 ; HSA-NEXT: s_mov_b32 s6, -1
2959 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2960 ; HSA-NEXT: s_add_u32 s0, s0, s9
2961 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2962 ; HSA-NEXT: s_getpc_b64 s[8:9]
2963 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4
2964 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12
2965 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2966 ; HSA-NEXT: s_mov_b32 s32, 0
2967 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2968 ; HSA-NEXT: s_endpgm
2969 %val = load <4 x i16>, ptr addrspace(1) undef
2970 call void @external_void_func_v4i16(<4 x i16> %val)
2974 define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
2975 ; VI-LABEL: test_call_external_void_func_v4i16_imm:
2977 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2978 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2979 ; VI-NEXT: s_mov_b32 s38, -1
2980 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2981 ; VI-NEXT: s_add_u32 s36, s36, s3
2982 ; VI-NEXT: s_addc_u32 s37, s37, 0
2983 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2984 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2985 ; VI-NEXT: s_getpc_b64 s[4:5]
2986 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2987 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2988 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2989 ; VI-NEXT: v_mov_b32_e32 v0, 0x20001
2990 ; VI-NEXT: v_mov_b32_e32 v1, 0x40003
2991 ; VI-NEXT: s_mov_b32 s32, 0
2992 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2995 ; CI-LABEL: test_call_external_void_func_v4i16_imm:
2997 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2998 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2999 ; CI-NEXT: s_mov_b32 s38, -1
3000 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3001 ; CI-NEXT: s_add_u32 s36, s36, s3
3002 ; CI-NEXT: s_addc_u32 s37, s37, 0
3003 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3004 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3005 ; CI-NEXT: s_getpc_b64 s[4:5]
3006 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
3007 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
3008 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3009 ; CI-NEXT: v_mov_b32_e32 v0, 1
3010 ; CI-NEXT: v_mov_b32_e32 v1, 2
3011 ; CI-NEXT: v_mov_b32_e32 v2, 3
3012 ; CI-NEXT: v_mov_b32_e32 v3, 4
3013 ; CI-NEXT: s_mov_b32 s32, 0
3014 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3017 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
3019 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3020 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3021 ; GFX9-NEXT: s_mov_b32 s38, -1
3022 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3023 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3024 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3025 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3026 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3027 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3028 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
3029 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
3030 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3031 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
3032 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
3033 ; GFX9-NEXT: s_mov_b32 s32, 0
3034 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3035 ; GFX9-NEXT: s_endpgm
3037 ; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
3039 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
3040 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003
3041 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3042 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4
3043 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12
3044 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3045 ; GFX11-NEXT: s_mov_b32 s32, 0
3046 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3047 ; GFX11-NEXT: s_endpgm
3049 ; HSA-LABEL: test_call_external_void_func_v4i16_imm:
3051 ; HSA-NEXT: s_add_i32 s6, s6, s9
3052 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3053 ; HSA-NEXT: s_add_u32 s0, s0, s9
3054 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3055 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3056 ; HSA-NEXT: s_getpc_b64 s[8:9]
3057 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4
3058 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12
3059 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3060 ; HSA-NEXT: v_mov_b32_e32 v0, 0x20001
3061 ; HSA-NEXT: v_mov_b32_e32 v1, 0x40003
3062 ; HSA-NEXT: s_mov_b32 s32, 0
3063 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3064 ; HSA-NEXT: s_endpgm
3065 call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
3069 define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
3070 ; VI-LABEL: test_call_external_void_func_v2f16:
3072 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3073 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3074 ; VI-NEXT: s_mov_b32 s38, -1
3075 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3076 ; VI-NEXT: s_add_u32 s36, s36, s3
3077 ; VI-NEXT: s_mov_b32 s3, 0xf000
3078 ; VI-NEXT: s_mov_b32 s2, -1
3079 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
3080 ; VI-NEXT: s_addc_u32 s37, s37, 0
3081 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3082 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3083 ; VI-NEXT: s_getpc_b64 s[4:5]
3084 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3085 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3086 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3087 ; VI-NEXT: s_mov_b32 s32, 0
3088 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3091 ; CI-LABEL: test_call_external_void_func_v2f16:
3093 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3094 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3095 ; CI-NEXT: s_mov_b32 s38, -1
3096 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3097 ; CI-NEXT: s_add_u32 s36, s36, s3
3098 ; CI-NEXT: s_mov_b32 s3, 0xf000
3099 ; CI-NEXT: s_mov_b32 s2, -1
3100 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0
3101 ; CI-NEXT: s_addc_u32 s37, s37, 0
3102 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3103 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3104 ; CI-NEXT: s_getpc_b64 s[4:5]
3105 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3106 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3107 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3108 ; CI-NEXT: s_mov_b32 s32, 0
3109 ; CI-NEXT: s_waitcnt vmcnt(0)
3110 ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1
3111 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3112 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
3113 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3116 ; GFX9-LABEL: test_call_external_void_func_v2f16:
3118 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3119 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3120 ; GFX9-NEXT: s_mov_b32 s38, -1
3121 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3122 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3123 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
3124 ; GFX9-NEXT: s_mov_b32 s2, -1
3125 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
3126 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3127 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3128 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3129 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3130 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3131 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3132 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3133 ; GFX9-NEXT: s_mov_b32 s32, 0
3134 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3135 ; GFX9-NEXT: s_endpgm
3137 ; GFX11-LABEL: test_call_external_void_func_v2f16:
3139 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
3140 ; GFX11-NEXT: s_mov_b32 s2, -1
3141 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3142 ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
3143 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3144 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f16@rel32@lo+4
3145 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f16@rel32@hi+12
3146 ; GFX11-NEXT: s_mov_b32 s32, 0
3147 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3148 ; GFX11-NEXT: s_endpgm
3150 ; HSA-LABEL: test_call_external_void_func_v2f16:
3152 ; HSA-NEXT: s_add_i32 s6, s6, s9
3153 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3154 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3155 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
3156 ; HSA-NEXT: s_mov_b32 s6, -1
3157 ; HSA-NEXT: buffer_load_dword v0, off, s[4:7], 0
3158 ; HSA-NEXT: s_add_u32 s0, s0, s9
3159 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3160 ; HSA-NEXT: s_getpc_b64 s[8:9]
3161 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f16@rel32@lo+4
3162 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f16@rel32@hi+12
3163 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3164 ; HSA-NEXT: s_mov_b32 s32, 0
3165 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3166 ; HSA-NEXT: s_endpgm
3167 %val = load <2 x half>, ptr addrspace(1) undef
3168 call void @external_void_func_v2f16(<2 x half> %val)
3172 define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
3173 ; VI-LABEL: test_call_external_void_func_v2i32:
3175 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3176 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3177 ; VI-NEXT: s_mov_b32 s38, -1
3178 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3179 ; VI-NEXT: s_add_u32 s36, s36, s3
3180 ; VI-NEXT: s_mov_b32 s3, 0xf000
3181 ; VI-NEXT: s_mov_b32 s2, -1
3182 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3183 ; VI-NEXT: s_addc_u32 s37, s37, 0
3184 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3185 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3186 ; VI-NEXT: s_getpc_b64 s[4:5]
3187 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3188 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3189 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3190 ; VI-NEXT: s_mov_b32 s32, 0
3191 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3194 ; CI-LABEL: test_call_external_void_func_v2i32:
3196 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3197 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3198 ; CI-NEXT: s_mov_b32 s38, -1
3199 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3200 ; CI-NEXT: s_add_u32 s36, s36, s3
3201 ; CI-NEXT: s_mov_b32 s3, 0xf000
3202 ; CI-NEXT: s_mov_b32 s2, -1
3203 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3204 ; CI-NEXT: s_addc_u32 s37, s37, 0
3205 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3206 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3207 ; CI-NEXT: s_getpc_b64 s[4:5]
3208 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3209 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3210 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3211 ; CI-NEXT: s_mov_b32 s32, 0
3212 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3215 ; GFX9-LABEL: test_call_external_void_func_v2i32:
3217 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3218 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3219 ; GFX9-NEXT: s_mov_b32 s38, -1
3220 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3221 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3222 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
3223 ; GFX9-NEXT: s_mov_b32 s2, -1
3224 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3225 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3226 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3227 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3228 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3229 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3230 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3231 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3232 ; GFX9-NEXT: s_mov_b32 s32, 0
3233 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3234 ; GFX9-NEXT: s_endpgm
3236 ; GFX11-LABEL: test_call_external_void_func_v2i32:
3238 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
3239 ; GFX11-NEXT: s_mov_b32 s2, -1
3240 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3241 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
3242 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3243 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4
3244 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12
3245 ; GFX11-NEXT: s_mov_b32 s32, 0
3246 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3247 ; GFX11-NEXT: s_endpgm
3249 ; HSA-LABEL: test_call_external_void_func_v2i32:
3251 ; HSA-NEXT: s_add_i32 s6, s6, s9
3252 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3253 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3254 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
3255 ; HSA-NEXT: s_mov_b32 s6, -1
3256 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
3257 ; HSA-NEXT: s_add_u32 s0, s0, s9
3258 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3259 ; HSA-NEXT: s_getpc_b64 s[8:9]
3260 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4
3261 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12
3262 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3263 ; HSA-NEXT: s_mov_b32 s32, 0
3264 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3265 ; HSA-NEXT: s_endpgm
3266 %val = load <2 x i32>, ptr addrspace(1) undef
3267 call void @external_void_func_v2i32(<2 x i32> %val)
3271 define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
3272 ; VI-LABEL: test_call_external_void_func_v2i32_imm:
3274 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3275 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3276 ; VI-NEXT: s_mov_b32 s38, -1
3277 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3278 ; VI-NEXT: s_add_u32 s36, s36, s3
3279 ; VI-NEXT: s_addc_u32 s37, s37, 0
3280 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3281 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3282 ; VI-NEXT: s_getpc_b64 s[4:5]
3283 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3284 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3285 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3286 ; VI-NEXT: v_mov_b32_e32 v0, 1
3287 ; VI-NEXT: v_mov_b32_e32 v1, 2
3288 ; VI-NEXT: s_mov_b32 s32, 0
3289 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3292 ; CI-LABEL: test_call_external_void_func_v2i32_imm:
3294 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3295 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3296 ; CI-NEXT: s_mov_b32 s38, -1
3297 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3298 ; CI-NEXT: s_add_u32 s36, s36, s3
3299 ; CI-NEXT: s_addc_u32 s37, s37, 0
3300 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3301 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3302 ; CI-NEXT: s_getpc_b64 s[4:5]
3303 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3304 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3305 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3306 ; CI-NEXT: v_mov_b32_e32 v0, 1
3307 ; CI-NEXT: v_mov_b32_e32 v1, 2
3308 ; CI-NEXT: s_mov_b32 s32, 0
3309 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3312 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
3314 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3315 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3316 ; GFX9-NEXT: s_mov_b32 s38, -1
3317 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3318 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3319 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3320 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3321 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3322 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3323 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3324 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3325 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3326 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3327 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3328 ; GFX9-NEXT: s_mov_b32 s32, 0
3329 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3330 ; GFX9-NEXT: s_endpgm
3332 ; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
3334 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3335 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3336 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4
3337 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12
3338 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3339 ; GFX11-NEXT: s_mov_b32 s32, 0
3340 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3341 ; GFX11-NEXT: s_endpgm
3343 ; HSA-LABEL: test_call_external_void_func_v2i32_imm:
3345 ; HSA-NEXT: s_add_i32 s6, s6, s9
3346 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3347 ; HSA-NEXT: s_add_u32 s0, s0, s9
3348 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3349 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3350 ; HSA-NEXT: s_getpc_b64 s[8:9]
3351 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4
3352 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12
3353 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3354 ; HSA-NEXT: v_mov_b32_e32 v0, 1
3355 ; HSA-NEXT: v_mov_b32_e32 v1, 2
3356 ; HSA-NEXT: s_mov_b32 s32, 0
3357 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3358 ; HSA-NEXT: s_endpgm
3359 call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
3363 define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
3364 ; VI-LABEL: test_call_external_void_func_v3i32_imm:
3366 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3367 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3368 ; VI-NEXT: s_mov_b32 s38, -1
3369 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3370 ; VI-NEXT: s_add_u32 s36, s36, s5
3371 ; VI-NEXT: s_addc_u32 s37, s37, 0
3372 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3373 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3374 ; VI-NEXT: s_getpc_b64 s[4:5]
3375 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3376 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3377 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3378 ; VI-NEXT: v_mov_b32_e32 v0, 3
3379 ; VI-NEXT: v_mov_b32_e32 v1, 4
3380 ; VI-NEXT: v_mov_b32_e32 v2, 5
3381 ; VI-NEXT: s_mov_b32 s32, 0
3382 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3385 ; CI-LABEL: test_call_external_void_func_v3i32_imm:
3387 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3388 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3389 ; CI-NEXT: s_mov_b32 s38, -1
3390 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3391 ; CI-NEXT: s_add_u32 s36, s36, s5
3392 ; CI-NEXT: s_addc_u32 s37, s37, 0
3393 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3394 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3395 ; CI-NEXT: s_getpc_b64 s[4:5]
3396 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3397 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3398 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3399 ; CI-NEXT: v_mov_b32_e32 v0, 3
3400 ; CI-NEXT: v_mov_b32_e32 v1, 4
3401 ; CI-NEXT: v_mov_b32_e32 v2, 5
3402 ; CI-NEXT: s_mov_b32 s32, 0
3403 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3406 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
3408 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3409 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3410 ; GFX9-NEXT: s_mov_b32 s38, -1
3411 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3412 ; GFX9-NEXT: s_add_u32 s36, s36, s5
3413 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3414 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3415 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3416 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3417 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3418 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3419 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3420 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
3421 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
3422 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
3423 ; GFX9-NEXT: s_mov_b32 s32, 0
3424 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3425 ; GFX9-NEXT: s_endpgm
3427 ; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
3429 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
3430 ; GFX11-NEXT: v_mov_b32_e32 v2, 5
3431 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3432 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i32@rel32@lo+4
3433 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i32@rel32@hi+12
3434 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3435 ; GFX11-NEXT: s_mov_b32 s32, 0
3436 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3437 ; GFX11-NEXT: s_endpgm
3439 ; HSA-LABEL: test_call_external_void_func_v3i32_imm:
3441 ; HSA-NEXT: s_add_i32 s8, s8, s11
3442 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
3443 ; HSA-NEXT: s_add_u32 s0, s0, s11
3444 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3445 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
3446 ; HSA-NEXT: s_getpc_b64 s[8:9]
3447 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i32@rel32@lo+4
3448 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32@rel32@hi+12
3449 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3450 ; HSA-NEXT: v_mov_b32_e32 v0, 3
3451 ; HSA-NEXT: v_mov_b32_e32 v1, 4
3452 ; HSA-NEXT: v_mov_b32_e32 v2, 5
3453 ; HSA-NEXT: s_mov_b32 s32, 0
3454 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3455 ; HSA-NEXT: s_endpgm
3456 call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
3460 define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
3461 ; VI-LABEL: test_call_external_void_func_v3i32_i32:
3463 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3464 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3465 ; VI-NEXT: s_mov_b32 s38, -1
3466 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3467 ; VI-NEXT: s_add_u32 s36, s36, s5
3468 ; VI-NEXT: s_addc_u32 s37, s37, 0
3469 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3470 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3471 ; VI-NEXT: s_getpc_b64 s[4:5]
3472 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3473 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3474 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3475 ; VI-NEXT: v_mov_b32_e32 v0, 3
3476 ; VI-NEXT: v_mov_b32_e32 v1, 4
3477 ; VI-NEXT: v_mov_b32_e32 v2, 5
3478 ; VI-NEXT: v_mov_b32_e32 v3, 6
3479 ; VI-NEXT: s_mov_b32 s32, 0
3480 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3483 ; CI-LABEL: test_call_external_void_func_v3i32_i32:
3485 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3486 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3487 ; CI-NEXT: s_mov_b32 s38, -1
3488 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3489 ; CI-NEXT: s_add_u32 s36, s36, s5
3490 ; CI-NEXT: s_addc_u32 s37, s37, 0
3491 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3492 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3493 ; CI-NEXT: s_getpc_b64 s[4:5]
3494 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3495 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3496 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3497 ; CI-NEXT: v_mov_b32_e32 v0, 3
3498 ; CI-NEXT: v_mov_b32_e32 v1, 4
3499 ; CI-NEXT: v_mov_b32_e32 v2, 5
3500 ; CI-NEXT: v_mov_b32_e32 v3, 6
3501 ; CI-NEXT: s_mov_b32 s32, 0
3502 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3505 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
3507 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3508 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3509 ; GFX9-NEXT: s_mov_b32 s38, -1
3510 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3511 ; GFX9-NEXT: s_add_u32 s36, s36, s5
3512 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3513 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3514 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3515 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3516 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3517 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3518 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3519 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
3520 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
3521 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
3522 ; GFX9-NEXT: v_mov_b32_e32 v3, 6
3523 ; GFX9-NEXT: s_mov_b32 s32, 0
3524 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3525 ; GFX9-NEXT: s_endpgm
3527 ; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
3529 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
3530 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6
3531 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3532 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i32_i32@rel32@lo+4
3533 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i32_i32@rel32@hi+12
3534 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3535 ; GFX11-NEXT: s_mov_b32 s32, 0
3536 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3537 ; GFX11-NEXT: s_endpgm
3539 ; HSA-LABEL: test_call_external_void_func_v3i32_i32:
3541 ; HSA-NEXT: s_add_i32 s8, s8, s11
3542 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
3543 ; HSA-NEXT: s_add_u32 s0, s0, s11
3544 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3545 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
3546 ; HSA-NEXT: s_getpc_b64 s[8:9]
3547 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32@rel32@lo+4
3548 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32@rel32@hi+12
3549 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3550 ; HSA-NEXT: v_mov_b32_e32 v0, 3
3551 ; HSA-NEXT: v_mov_b32_e32 v1, 4
3552 ; HSA-NEXT: v_mov_b32_e32 v2, 5
3553 ; HSA-NEXT: v_mov_b32_e32 v3, 6
3554 ; HSA-NEXT: s_mov_b32 s32, 0
3555 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3556 ; HSA-NEXT: s_endpgm
3557 call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
3561 define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
3562 ; VI-LABEL: test_call_external_void_func_v4i32:
3564 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3565 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3566 ; VI-NEXT: s_mov_b32 s38, -1
3567 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3568 ; VI-NEXT: s_add_u32 s36, s36, s3
3569 ; VI-NEXT: s_mov_b32 s3, 0xf000
3570 ; VI-NEXT: s_mov_b32 s2, -1
3571 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3572 ; VI-NEXT: s_addc_u32 s37, s37, 0
3573 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3574 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3575 ; VI-NEXT: s_getpc_b64 s[4:5]
3576 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3577 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3578 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3579 ; VI-NEXT: s_mov_b32 s32, 0
3580 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3583 ; CI-LABEL: test_call_external_void_func_v4i32:
3585 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3586 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3587 ; CI-NEXT: s_mov_b32 s38, -1
3588 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3589 ; CI-NEXT: s_add_u32 s36, s36, s3
3590 ; CI-NEXT: s_mov_b32 s3, 0xf000
3591 ; CI-NEXT: s_mov_b32 s2, -1
3592 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3593 ; CI-NEXT: s_addc_u32 s37, s37, 0
3594 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3595 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3596 ; CI-NEXT: s_getpc_b64 s[4:5]
3597 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3598 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3599 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3600 ; CI-NEXT: s_mov_b32 s32, 0
3601 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3604 ; GFX9-LABEL: test_call_external_void_func_v4i32:
3606 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3607 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3608 ; GFX9-NEXT: s_mov_b32 s38, -1
3609 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3610 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3611 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
3612 ; GFX9-NEXT: s_mov_b32 s2, -1
3613 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3614 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3615 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3616 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3617 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3618 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3619 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3620 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3621 ; GFX9-NEXT: s_mov_b32 s32, 0
3622 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3623 ; GFX9-NEXT: s_endpgm
3625 ; GFX11-LABEL: test_call_external_void_func_v4i32:
3627 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
3628 ; GFX11-NEXT: s_mov_b32 s2, -1
3629 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3630 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
3631 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3632 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4
3633 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12
3634 ; GFX11-NEXT: s_mov_b32 s32, 0
3635 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3636 ; GFX11-NEXT: s_endpgm
3638 ; HSA-LABEL: test_call_external_void_func_v4i32:
3640 ; HSA-NEXT: s_add_i32 s6, s6, s9
3641 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3642 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3643 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
3644 ; HSA-NEXT: s_mov_b32 s6, -1
3645 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
3646 ; HSA-NEXT: s_add_u32 s0, s0, s9
3647 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3648 ; HSA-NEXT: s_getpc_b64 s[8:9]
3649 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4
3650 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12
3651 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3652 ; HSA-NEXT: s_mov_b32 s32, 0
3653 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3654 ; HSA-NEXT: s_endpgm
3655 %val = load <4 x i32>, ptr addrspace(1) undef
3656 call void @external_void_func_v4i32(<4 x i32> %val)
3660 define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
3661 ; VI-LABEL: test_call_external_void_func_v4i32_imm:
3663 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3664 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3665 ; VI-NEXT: s_mov_b32 s38, -1
3666 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3667 ; VI-NEXT: s_add_u32 s36, s36, s3
3668 ; VI-NEXT: s_addc_u32 s37, s37, 0
3669 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3670 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3671 ; VI-NEXT: s_getpc_b64 s[4:5]
3672 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3673 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3674 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3675 ; VI-NEXT: v_mov_b32_e32 v0, 1
3676 ; VI-NEXT: v_mov_b32_e32 v1, 2
3677 ; VI-NEXT: v_mov_b32_e32 v2, 3
3678 ; VI-NEXT: v_mov_b32_e32 v3, 4
3679 ; VI-NEXT: s_mov_b32 s32, 0
3680 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3683 ; CI-LABEL: test_call_external_void_func_v4i32_imm:
3685 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3686 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3687 ; CI-NEXT: s_mov_b32 s38, -1
3688 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3689 ; CI-NEXT: s_add_u32 s36, s36, s3
3690 ; CI-NEXT: s_addc_u32 s37, s37, 0
3691 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3692 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3693 ; CI-NEXT: s_getpc_b64 s[4:5]
3694 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3695 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3696 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3697 ; CI-NEXT: v_mov_b32_e32 v0, 1
3698 ; CI-NEXT: v_mov_b32_e32 v1, 2
3699 ; CI-NEXT: v_mov_b32_e32 v2, 3
3700 ; CI-NEXT: v_mov_b32_e32 v3, 4
3701 ; CI-NEXT: s_mov_b32 s32, 0
3702 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3705 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
3707 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3708 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3709 ; GFX9-NEXT: s_mov_b32 s38, -1
3710 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3711 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3712 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3713 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3714 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3715 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3716 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3717 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3718 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3719 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3720 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3721 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
3722 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
3723 ; GFX9-NEXT: s_mov_b32 s32, 0
3724 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3725 ; GFX9-NEXT: s_endpgm
3727 ; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
3729 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3730 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
3731 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3732 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4
3733 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12
3734 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3735 ; GFX11-NEXT: s_mov_b32 s32, 0
3736 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3737 ; GFX11-NEXT: s_endpgm
3739 ; HSA-LABEL: test_call_external_void_func_v4i32_imm:
3741 ; HSA-NEXT: s_add_i32 s6, s6, s9
3742 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3743 ; HSA-NEXT: s_add_u32 s0, s0, s9
3744 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3745 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3746 ; HSA-NEXT: s_getpc_b64 s[8:9]
3747 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4
3748 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12
3749 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3750 ; HSA-NEXT: v_mov_b32_e32 v0, 1
3751 ; HSA-NEXT: v_mov_b32_e32 v1, 2
3752 ; HSA-NEXT: v_mov_b32_e32 v2, 3
3753 ; HSA-NEXT: v_mov_b32_e32 v3, 4
3754 ; HSA-NEXT: s_mov_b32 s32, 0
3755 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3756 ; HSA-NEXT: s_endpgm
3757 call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
3761 define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
3762 ; VI-LABEL: test_call_external_void_func_v5i32_imm:
3764 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3765 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3766 ; VI-NEXT: s_mov_b32 s38, -1
3767 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3768 ; VI-NEXT: s_add_u32 s36, s36, s3
3769 ; VI-NEXT: s_addc_u32 s37, s37, 0
3770 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3771 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3772 ; VI-NEXT: s_getpc_b64 s[4:5]
3773 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3774 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3775 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3776 ; VI-NEXT: v_mov_b32_e32 v0, 1
3777 ; VI-NEXT: v_mov_b32_e32 v1, 2
3778 ; VI-NEXT: v_mov_b32_e32 v2, 3
3779 ; VI-NEXT: v_mov_b32_e32 v3, 4
3780 ; VI-NEXT: v_mov_b32_e32 v4, 5
3781 ; VI-NEXT: s_mov_b32 s32, 0
3782 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3785 ; CI-LABEL: test_call_external_void_func_v5i32_imm:
3787 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3788 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3789 ; CI-NEXT: s_mov_b32 s38, -1
3790 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3791 ; CI-NEXT: s_add_u32 s36, s36, s3
3792 ; CI-NEXT: s_addc_u32 s37, s37, 0
3793 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3794 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3795 ; CI-NEXT: s_getpc_b64 s[4:5]
3796 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3797 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3798 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3799 ; CI-NEXT: v_mov_b32_e32 v0, 1
3800 ; CI-NEXT: v_mov_b32_e32 v1, 2
3801 ; CI-NEXT: v_mov_b32_e32 v2, 3
3802 ; CI-NEXT: v_mov_b32_e32 v3, 4
3803 ; CI-NEXT: v_mov_b32_e32 v4, 5
3804 ; CI-NEXT: s_mov_b32 s32, 0
3805 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3808 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
3810 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3811 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3812 ; GFX9-NEXT: s_mov_b32 s38, -1
3813 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3814 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3815 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3816 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3817 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3818 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3819 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3820 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3821 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3822 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3823 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3824 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
3825 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
3826 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
3827 ; GFX9-NEXT: s_mov_b32 s32, 0
3828 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3829 ; GFX9-NEXT: s_endpgm
3831 ; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
3833 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3834 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
3835 ; GFX11-NEXT: v_mov_b32_e32 v4, 5
3836 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3837 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v5i32@rel32@lo+4
3838 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v5i32@rel32@hi+12
3839 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3840 ; GFX11-NEXT: s_mov_b32 s32, 0
3841 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3842 ; GFX11-NEXT: s_endpgm
3844 ; HSA-LABEL: test_call_external_void_func_v5i32_imm:
3846 ; HSA-NEXT: s_add_i32 s6, s6, s9
3847 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3848 ; HSA-NEXT: s_add_u32 s0, s0, s9
3849 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3850 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3851 ; HSA-NEXT: s_getpc_b64 s[8:9]
3852 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v5i32@rel32@lo+4
3853 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v5i32@rel32@hi+12
3854 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3855 ; HSA-NEXT: v_mov_b32_e32 v0, 1
3856 ; HSA-NEXT: v_mov_b32_e32 v1, 2
3857 ; HSA-NEXT: v_mov_b32_e32 v2, 3
3858 ; HSA-NEXT: v_mov_b32_e32 v3, 4
3859 ; HSA-NEXT: v_mov_b32_e32 v4, 5
3860 ; HSA-NEXT: s_mov_b32 s32, 0
3861 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3862 ; HSA-NEXT: s_endpgm
3863 call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
3867 define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
3868 ; VI-LABEL: test_call_external_void_func_v8i32:
3870 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3871 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3872 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
3873 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3874 ; VI-NEXT: s_mov_b32 s38, -1
3875 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3876 ; VI-NEXT: s_add_u32 s36, s36, s3
3877 ; VI-NEXT: s_mov_b32 s3, 0xf000
3878 ; VI-NEXT: s_mov_b32 s2, -1
3879 ; VI-NEXT: s_waitcnt lgkmcnt(0)
3880 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3881 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3882 ; VI-NEXT: s_addc_u32 s37, s37, 0
3883 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3884 ; VI-NEXT: s_getpc_b64 s[4:5]
3885 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3886 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3887 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3888 ; VI-NEXT: s_mov_b32 s32, 0
3889 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3892 ; CI-LABEL: test_call_external_void_func_v8i32:
3894 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3895 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3896 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
3897 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3898 ; CI-NEXT: s_mov_b32 s38, -1
3899 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3900 ; CI-NEXT: s_add_u32 s36, s36, s3
3901 ; CI-NEXT: s_mov_b32 s3, 0xf000
3902 ; CI-NEXT: s_mov_b32 s2, -1
3903 ; CI-NEXT: s_waitcnt lgkmcnt(0)
3904 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3905 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3906 ; CI-NEXT: s_addc_u32 s37, s37, 0
3907 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3908 ; CI-NEXT: s_getpc_b64 s[4:5]
3909 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3910 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3911 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3912 ; CI-NEXT: s_mov_b32 s32, 0
3913 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3916 ; GFX9-LABEL: test_call_external_void_func_v8i32:
3918 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3919 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3920 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
3921 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3922 ; GFX9-NEXT: s_mov_b32 s38, -1
3923 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3924 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3925 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
3926 ; GFX9-NEXT: s_mov_b32 s2, -1
3927 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3928 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3929 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3930 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3931 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3932 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3933 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3934 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3935 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3936 ; GFX9-NEXT: s_mov_b32 s32, 0
3937 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3938 ; GFX9-NEXT: s_endpgm
3940 ; GFX11-LABEL: test_call_external_void_func_v8i32:
3942 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
3943 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
3944 ; GFX11-NEXT: s_mov_b32 s6, -1
3945 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3946 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4
3947 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12
3948 ; GFX11-NEXT: s_mov_b32 s32, 0
3949 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3950 ; GFX11-NEXT: s_clause 0x1
3951 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
3952 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
3953 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3954 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3955 ; GFX11-NEXT: s_endpgm
3957 ; HSA-LABEL: test_call_external_void_func_v8i32:
3959 ; HSA-NEXT: s_add_i32 s6, s6, s9
3960 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3961 ; HSA-NEXT: s_add_u32 s0, s0, s9
3962 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
3963 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
3964 ; HSA-NEXT: s_mov_b32 s10, -1
3965 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3966 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
3967 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
3968 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
3969 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3970 ; HSA-NEXT: s_getpc_b64 s[8:9]
3971 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4
3972 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12
3973 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3974 ; HSA-NEXT: s_mov_b32 s32, 0
3975 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3976 ; HSA-NEXT: s_endpgm
3977 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
3978 %val = load <8 x i32>, ptr addrspace(1) %ptr
3979 call void @external_void_func_v8i32(<8 x i32> %val)
3983 define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
3984 ; VI-LABEL: test_call_external_void_func_v8i32_imm:
3986 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3987 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3988 ; VI-NEXT: s_mov_b32 s38, -1
3989 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3990 ; VI-NEXT: s_add_u32 s36, s36, s3
3991 ; VI-NEXT: s_addc_u32 s37, s37, 0
3992 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3993 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3994 ; VI-NEXT: s_getpc_b64 s[4:5]
3995 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3996 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3997 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3998 ; VI-NEXT: v_mov_b32_e32 v0, 1
3999 ; VI-NEXT: v_mov_b32_e32 v1, 2
4000 ; VI-NEXT: v_mov_b32_e32 v2, 3
4001 ; VI-NEXT: v_mov_b32_e32 v3, 4
4002 ; VI-NEXT: v_mov_b32_e32 v4, 5
4003 ; VI-NEXT: v_mov_b32_e32 v5, 6
4004 ; VI-NEXT: v_mov_b32_e32 v6, 7
4005 ; VI-NEXT: v_mov_b32_e32 v7, 8
4006 ; VI-NEXT: s_mov_b32 s32, 0
4007 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4010 ; CI-LABEL: test_call_external_void_func_v8i32_imm:
4012 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4013 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4014 ; CI-NEXT: s_mov_b32 s38, -1
4015 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4016 ; CI-NEXT: s_add_u32 s36, s36, s3
4017 ; CI-NEXT: s_addc_u32 s37, s37, 0
4018 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4019 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4020 ; CI-NEXT: s_getpc_b64 s[4:5]
4021 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
4022 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
4023 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4024 ; CI-NEXT: v_mov_b32_e32 v0, 1
4025 ; CI-NEXT: v_mov_b32_e32 v1, 2
4026 ; CI-NEXT: v_mov_b32_e32 v2, 3
4027 ; CI-NEXT: v_mov_b32_e32 v3, 4
4028 ; CI-NEXT: v_mov_b32_e32 v4, 5
4029 ; CI-NEXT: v_mov_b32_e32 v5, 6
4030 ; CI-NEXT: v_mov_b32_e32 v6, 7
4031 ; CI-NEXT: v_mov_b32_e32 v7, 8
4032 ; CI-NEXT: s_mov_b32 s32, 0
4033 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4036 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
4038 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4039 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4040 ; GFX9-NEXT: s_mov_b32 s38, -1
4041 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4042 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4043 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4044 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4045 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4046 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4047 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
4048 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
4049 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4050 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
4051 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
4052 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
4053 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
4054 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
4055 ; GFX9-NEXT: v_mov_b32_e32 v5, 6
4056 ; GFX9-NEXT: v_mov_b32_e32 v6, 7
4057 ; GFX9-NEXT: v_mov_b32_e32 v7, 8
4058 ; GFX9-NEXT: s_mov_b32 s32, 0
4059 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4060 ; GFX9-NEXT: s_endpgm
4062 ; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
4064 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
4065 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
4066 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6
4067 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8
4068 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4069 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4
4070 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12
4071 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4072 ; GFX11-NEXT: s_mov_b32 s32, 0
4073 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4074 ; GFX11-NEXT: s_endpgm
4076 ; HSA-LABEL: test_call_external_void_func_v8i32_imm:
4078 ; HSA-NEXT: s_add_i32 s6, s6, s9
4079 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4080 ; HSA-NEXT: s_add_u32 s0, s0, s9
4081 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4082 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4083 ; HSA-NEXT: s_getpc_b64 s[8:9]
4084 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4
4085 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12
4086 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4087 ; HSA-NEXT: v_mov_b32_e32 v0, 1
4088 ; HSA-NEXT: v_mov_b32_e32 v1, 2
4089 ; HSA-NEXT: v_mov_b32_e32 v2, 3
4090 ; HSA-NEXT: v_mov_b32_e32 v3, 4
4091 ; HSA-NEXT: v_mov_b32_e32 v4, 5
4092 ; HSA-NEXT: v_mov_b32_e32 v5, 6
4093 ; HSA-NEXT: v_mov_b32_e32 v6, 7
4094 ; HSA-NEXT: v_mov_b32_e32 v7, 8
4095 ; HSA-NEXT: s_mov_b32 s32, 0
4096 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4097 ; HSA-NEXT: s_endpgm
4098 call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
4102 define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
4103 ; VI-LABEL: test_call_external_void_func_v16i32:
4105 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4106 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4107 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4108 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4109 ; VI-NEXT: s_mov_b32 s38, -1
4110 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4111 ; VI-NEXT: s_add_u32 s36, s36, s3
4112 ; VI-NEXT: s_mov_b32 s3, 0xf000
4113 ; VI-NEXT: s_mov_b32 s2, -1
4114 ; VI-NEXT: s_waitcnt lgkmcnt(0)
4115 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4116 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4117 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4118 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4119 ; VI-NEXT: s_addc_u32 s37, s37, 0
4120 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4121 ; VI-NEXT: s_getpc_b64 s[4:5]
4122 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4123 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4124 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4125 ; VI-NEXT: s_mov_b32 s32, 0
4126 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4129 ; CI-LABEL: test_call_external_void_func_v16i32:
4131 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4132 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4133 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4134 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4135 ; CI-NEXT: s_mov_b32 s38, -1
4136 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4137 ; CI-NEXT: s_add_u32 s36, s36, s3
4138 ; CI-NEXT: s_mov_b32 s3, 0xf000
4139 ; CI-NEXT: s_mov_b32 s2, -1
4140 ; CI-NEXT: s_waitcnt lgkmcnt(0)
4141 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4142 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4143 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4144 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4145 ; CI-NEXT: s_addc_u32 s37, s37, 0
4146 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4147 ; CI-NEXT: s_getpc_b64 s[4:5]
4148 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4149 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4150 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4151 ; CI-NEXT: s_mov_b32 s32, 0
4152 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4155 ; GFX9-LABEL: test_call_external_void_func_v16i32:
4157 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4158 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4159 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4160 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4161 ; GFX9-NEXT: s_mov_b32 s38, -1
4162 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4163 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4164 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
4165 ; GFX9-NEXT: s_mov_b32 s2, -1
4166 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4167 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4168 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4169 ; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4170 ; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4171 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4172 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4173 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4174 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4175 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4176 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4177 ; GFX9-NEXT: s_mov_b32 s32, 0
4178 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4179 ; GFX9-NEXT: s_endpgm
4181 ; GFX11-LABEL: test_call_external_void_func_v16i32:
4183 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
4184 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
4185 ; GFX11-NEXT: s_mov_b32 s6, -1
4186 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4187 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v16i32@rel32@lo+4
4188 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v16i32@rel32@hi+12
4189 ; GFX11-NEXT: s_mov_b32 s32, 0
4190 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4191 ; GFX11-NEXT: s_clause 0x3
4192 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
4193 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4194 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4195 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4196 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4197 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4198 ; GFX11-NEXT: s_endpgm
4200 ; HSA-LABEL: test_call_external_void_func_v16i32:
4202 ; HSA-NEXT: s_add_i32 s6, s6, s9
4203 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4204 ; HSA-NEXT: s_add_u32 s0, s0, s9
4205 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4206 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4207 ; HSA-NEXT: s_mov_b32 s10, -1
4208 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4209 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4210 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4211 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4212 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4213 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4214 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4215 ; HSA-NEXT: s_getpc_b64 s[8:9]
4216 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v16i32@rel32@lo+4
4217 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v16i32@rel32@hi+12
4218 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4219 ; HSA-NEXT: s_mov_b32 s32, 0
4220 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4221 ; HSA-NEXT: s_endpgm
4222 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
4223 %val = load <16 x i32>, ptr addrspace(1) %ptr
4224 call void @external_void_func_v16i32(<16 x i32> %val)
4228 define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
4229 ; VI-LABEL: test_call_external_void_func_v32i32:
4231 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4232 ; VI-NEXT: s_mov_b32 s7, 0xf000
4233 ; VI-NEXT: s_mov_b32 s6, -1
4234 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4235 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4236 ; VI-NEXT: s_waitcnt lgkmcnt(0)
4237 ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4238 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4239 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4240 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4241 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4242 ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4243 ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4244 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4245 ; VI-NEXT: s_mov_b32 s38, -1
4246 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4247 ; VI-NEXT: s_add_u32 s36, s36, s3
4248 ; VI-NEXT: s_addc_u32 s37, s37, 0
4249 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4250 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4251 ; VI-NEXT: s_mov_b32 s32, 0
4252 ; VI-NEXT: s_getpc_b64 s[8:9]
4253 ; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4254 ; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4255 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4256 ; VI-NEXT: s_waitcnt vmcnt(7)
4257 ; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
4258 ; VI-NEXT: s_swappc_b64 s[30:31], s[8:9]
4261 ; CI-LABEL: test_call_external_void_func_v32i32:
4263 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4264 ; CI-NEXT: s_mov_b32 s7, 0xf000
4265 ; CI-NEXT: s_mov_b32 s6, -1
4266 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4267 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4268 ; CI-NEXT: s_waitcnt lgkmcnt(0)
4269 ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4270 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4271 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4272 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4273 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4274 ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4275 ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4276 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4277 ; CI-NEXT: s_mov_b32 s38, -1
4278 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4279 ; CI-NEXT: s_add_u32 s36, s36, s3
4280 ; CI-NEXT: s_addc_u32 s37, s37, 0
4281 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4282 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4283 ; CI-NEXT: s_mov_b32 s32, 0
4284 ; CI-NEXT: s_getpc_b64 s[8:9]
4285 ; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4286 ; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4287 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4288 ; CI-NEXT: s_waitcnt vmcnt(7)
4289 ; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
4290 ; CI-NEXT: s_swappc_b64 s[30:31], s[8:9]
4293 ; GFX9-LABEL: test_call_external_void_func_v32i32:
4295 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4296 ; GFX9-NEXT: s_mov_b32 s7, 0xf000
4297 ; GFX9-NEXT: s_mov_b32 s6, -1
4298 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4299 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4300 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4301 ; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4302 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4303 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4304 ; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4305 ; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4306 ; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4307 ; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4308 ; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4309 ; GFX9-NEXT: s_mov_b32 s38, -1
4310 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4311 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4312 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4313 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4314 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4315 ; GFX9-NEXT: s_mov_b32 s32, 0
4316 ; GFX9-NEXT: s_getpc_b64 s[8:9]
4317 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4318 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4319 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4320 ; GFX9-NEXT: s_waitcnt vmcnt(7)
4321 ; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
4322 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
4323 ; GFX9-NEXT: s_endpgm
4325 ; GFX11-LABEL: test_call_external_void_func_v32i32:
4327 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
4328 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
4329 ; GFX11-NEXT: s_mov_b32 s6, -1
4330 ; GFX11-NEXT: s_mov_b32 s32, 0
4331 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4332 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32@rel32@lo+4
4333 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32@rel32@hi+12
4334 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4335 ; GFX11-NEXT: s_clause 0x7
4336 ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
4337 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
4338 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4339 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4340 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4341 ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
4342 ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
4343 ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
4344 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4345 ; GFX11-NEXT: s_waitcnt vmcnt(7)
4346 ; GFX11-NEXT: scratch_store_b32 off, v31, s32
4347 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4348 ; GFX11-NEXT: s_endpgm
4350 ; HSA-LABEL: test_call_external_void_func_v32i32:
4352 ; HSA-NEXT: s_add_i32 s6, s6, s9
4353 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4354 ; HSA-NEXT: s_add_u32 s0, s0, s9
4355 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4356 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4357 ; HSA-NEXT: s_mov_b32 s10, -1
4358 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4359 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4360 ; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
4361 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4362 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4363 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4364 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4365 ; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
4366 ; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
4367 ; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
4368 ; HSA-NEXT: s_mov_b32 s32, 0
4369 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4370 ; HSA-NEXT: s_getpc_b64 s[12:13]
4371 ; HSA-NEXT: s_add_u32 s12, s12, external_void_func_v32i32@rel32@lo+4
4372 ; HSA-NEXT: s_addc_u32 s13, s13, external_void_func_v32i32@rel32@hi+12
4373 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4374 ; HSA-NEXT: s_waitcnt vmcnt(7)
4375 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
4376 ; HSA-NEXT: s_swappc_b64 s[30:31], s[12:13]
4377 ; HSA-NEXT: s_endpgm
4378 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
4379 %val = load <32 x i32>, ptr addrspace(1) %ptr
4380 call void @external_void_func_v32i32(<32 x i32> %val)
4384 define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
4385 ; VI-LABEL: test_call_external_void_func_v32i32_i32:
4387 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4388 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4389 ; VI-NEXT: s_mov_b32 s38, -1
4390 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4391 ; VI-NEXT: s_add_u32 s36, s36, s5
4392 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4393 ; VI-NEXT: s_mov_b32 s7, 0xf000
4394 ; VI-NEXT: s_mov_b32 s6, -1
4395 ; VI-NEXT: s_addc_u32 s37, s37, 0
4396 ; VI-NEXT: s_waitcnt lgkmcnt(0)
4397 ; VI-NEXT: buffer_load_dword v32, off, s[4:7], 0
4398 ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4399 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4400 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4401 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4402 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4403 ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4404 ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4405 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4406 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4407 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4408 ; VI-NEXT: s_mov_b32 s32, 0
4409 ; VI-NEXT: s_getpc_b64 s[4:5]
4410 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4411 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4412 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4413 ; VI-NEXT: s_waitcnt vmcnt(8)
4414 ; VI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
4415 ; VI-NEXT: s_waitcnt vmcnt(8)
4416 ; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
4417 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4420 ; CI-LABEL: test_call_external_void_func_v32i32_i32:
4422 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4423 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4424 ; CI-NEXT: s_mov_b32 s38, -1
4425 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4426 ; CI-NEXT: s_add_u32 s36, s36, s5
4427 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4428 ; CI-NEXT: s_mov_b32 s7, 0xf000
4429 ; CI-NEXT: s_mov_b32 s6, -1
4430 ; CI-NEXT: s_addc_u32 s37, s37, 0
4431 ; CI-NEXT: s_waitcnt lgkmcnt(0)
4432 ; CI-NEXT: buffer_load_dword v32, off, s[4:7], 0
4433 ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4434 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4435 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4436 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4437 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4438 ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4439 ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4440 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4441 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4442 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4443 ; CI-NEXT: s_mov_b32 s32, 0
4444 ; CI-NEXT: s_getpc_b64 s[4:5]
4445 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4446 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4447 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4448 ; CI-NEXT: s_waitcnt vmcnt(8)
4449 ; CI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
4450 ; CI-NEXT: s_waitcnt vmcnt(8)
4451 ; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
4452 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4455 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
4457 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4458 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4459 ; GFX9-NEXT: s_mov_b32 s38, -1
4460 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4461 ; GFX9-NEXT: s_add_u32 s36, s36, s5
4462 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4463 ; GFX9-NEXT: s_mov_b32 s7, 0xf000
4464 ; GFX9-NEXT: s_mov_b32 s6, -1
4465 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4466 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4467 ; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0
4468 ; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4469 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4470 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4471 ; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4472 ; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4473 ; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4474 ; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4475 ; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4476 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4477 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4478 ; GFX9-NEXT: s_mov_b32 s32, 0
4479 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4480 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4481 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4482 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4483 ; GFX9-NEXT: s_waitcnt vmcnt(8)
4484 ; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
4485 ; GFX9-NEXT: s_waitcnt vmcnt(8)
4486 ; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
4487 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4488 ; GFX9-NEXT: s_endpgm
4490 ; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
4492 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
4493 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
4494 ; GFX11-NEXT: s_mov_b32 s6, -1
4495 ; GFX11-NEXT: s_mov_b32 s32, 0
4496 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4497 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32_i32@rel32@lo+4
4498 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32_i32@rel32@hi+12
4499 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4500 ; GFX11-NEXT: s_clause 0x8
4501 ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
4502 ; GFX11-NEXT: buffer_load_b32 v32, off, s[4:7], 0
4503 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
4504 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4505 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4506 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4507 ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
4508 ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
4509 ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
4510 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4511 ; GFX11-NEXT: s_add_i32 s4, s32, 4
4512 ; GFX11-NEXT: s_waitcnt vmcnt(8)
4513 ; GFX11-NEXT: scratch_store_b32 off, v31, s32
4514 ; GFX11-NEXT: s_waitcnt vmcnt(7)
4515 ; GFX11-NEXT: scratch_store_b32 off, v32, s4
4516 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4517 ; GFX11-NEXT: s_endpgm
4519 ; HSA-LABEL: test_call_external_void_func_v32i32_i32:
4521 ; HSA-NEXT: s_add_i32 s8, s8, s11
4522 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
4523 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
4524 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4525 ; HSA-NEXT: s_add_u32 s0, s0, s11
4526 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4527 ; HSA-NEXT: s_mov_b32 s10, -1
4528 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4529 ; HSA-NEXT: buffer_load_dword v32, off, s[8:11], 0
4530 ; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
4531 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4532 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4533 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4534 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4535 ; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
4536 ; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
4537 ; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
4538 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4539 ; HSA-NEXT: s_mov_b32 s32, 0
4540 ; HSA-NEXT: s_getpc_b64 s[8:9]
4541 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v32i32_i32@rel32@lo+4
4542 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32_i32@rel32@hi+12
4543 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4544 ; HSA-NEXT: s_waitcnt vmcnt(8)
4545 ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
4546 ; HSA-NEXT: s_waitcnt vmcnt(8)
4547 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
4548 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4549 ; HSA-NEXT: s_endpgm
4550 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
4551 %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
4552 %val1 = load i32, ptr addrspace(1) undef
4553 call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
4557 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
4558 ; VI-LABEL: test_call_external_i32_func_i32_imm:
4560 ; VI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4561 ; VI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4562 ; VI-NEXT: s_mov_b32 s42, -1
4563 ; VI-NEXT: s_mov_b32 s43, 0xe80000
4564 ; VI-NEXT: s_add_u32 s40, s40, s5
4565 ; VI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
4566 ; VI-NEXT: s_addc_u32 s41, s41, 0
4567 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4568 ; VI-NEXT: s_mov_b64 s[0:1], s[40:41]
4569 ; VI-NEXT: s_getpc_b64 s[4:5]
4570 ; VI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4571 ; VI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4572 ; VI-NEXT: s_mov_b64 s[2:3], s[42:43]
4573 ; VI-NEXT: v_mov_b32_e32 v0, 42
4574 ; VI-NEXT: s_mov_b32 s32, 0
4575 ; VI-NEXT: s_mov_b32 s39, 0xf000
4576 ; VI-NEXT: s_mov_b32 s38, -1
4577 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4578 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0
4579 ; VI-NEXT: s_waitcnt vmcnt(0)
4582 ; CI-LABEL: test_call_external_i32_func_i32_imm:
4584 ; CI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4585 ; CI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4586 ; CI-NEXT: s_mov_b32 s42, -1
4587 ; CI-NEXT: s_mov_b32 s43, 0xe8f000
4588 ; CI-NEXT: s_add_u32 s40, s40, s5
4589 ; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x9
4590 ; CI-NEXT: s_addc_u32 s41, s41, 0
4591 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4592 ; CI-NEXT: s_mov_b64 s[0:1], s[40:41]
4593 ; CI-NEXT: s_getpc_b64 s[4:5]
4594 ; CI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4595 ; CI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4596 ; CI-NEXT: s_mov_b64 s[2:3], s[42:43]
4597 ; CI-NEXT: v_mov_b32_e32 v0, 42
4598 ; CI-NEXT: s_mov_b32 s32, 0
4599 ; CI-NEXT: s_mov_b32 s39, 0xf000
4600 ; CI-NEXT: s_mov_b32 s38, -1
4601 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4602 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0
4603 ; CI-NEXT: s_waitcnt vmcnt(0)
4606 ; GFX9-LABEL: test_call_external_i32_func_i32_imm:
4608 ; GFX9-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4609 ; GFX9-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4610 ; GFX9-NEXT: s_mov_b32 s42, -1
4611 ; GFX9-NEXT: s_mov_b32 s43, 0xe00000
4612 ; GFX9-NEXT: s_add_u32 s40, s40, s5
4613 ; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
4614 ; GFX9-NEXT: s_addc_u32 s41, s41, 0
4615 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4616 ; GFX9-NEXT: s_mov_b64 s[0:1], s[40:41]
4617 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4618 ; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4619 ; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4620 ; GFX9-NEXT: s_mov_b64 s[2:3], s[42:43]
4621 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
4622 ; GFX9-NEXT: s_mov_b32 s32, 0
4623 ; GFX9-NEXT: s_mov_b32 s39, 0xf000
4624 ; GFX9-NEXT: s_mov_b32 s38, -1
4625 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4626 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0
4627 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4628 ; GFX9-NEXT: s_endpgm
4630 ; GFX11-LABEL: test_call_external_i32_func_i32_imm:
4632 ; GFX11-NEXT: s_load_b64 s[36:37], s[2:3], 0x24
4633 ; GFX11-NEXT: v_mov_b32_e32 v0, 42
4634 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4635 ; GFX11-NEXT: s_add_u32 s2, s2, external_i32_func_i32@rel32@lo+4
4636 ; GFX11-NEXT: s_addc_u32 s3, s3, external_i32_func_i32@rel32@hi+12
4637 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4638 ; GFX11-NEXT: s_mov_b32 s32, 0
4639 ; GFX11-NEXT: s_mov_b32 s39, 0x31016000
4640 ; GFX11-NEXT: s_mov_b32 s38, -1
4641 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4642 ; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc
4643 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4644 ; GFX11-NEXT: s_endpgm
4646 ; HSA-LABEL: test_call_external_i32_func_i32_imm:
4648 ; HSA-NEXT: s_add_i32 s8, s8, s11
4649 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
4650 ; HSA-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x0
4651 ; HSA-NEXT: s_add_u32 s0, s0, s11
4652 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4653 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
4654 ; HSA-NEXT: s_getpc_b64 s[8:9]
4655 ; HSA-NEXT: s_add_u32 s8, s8, external_i32_func_i32@rel32@lo+4
4656 ; HSA-NEXT: s_addc_u32 s9, s9, external_i32_func_i32@rel32@hi+12
4657 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4658 ; HSA-NEXT: v_mov_b32_e32 v0, 42
4659 ; HSA-NEXT: s_mov_b32 s32, 0
4660 ; HSA-NEXT: s_mov_b32 s39, 0x1100f000
4661 ; HSA-NEXT: s_mov_b32 s38, -1
4662 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4663 ; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0
4664 ; HSA-NEXT: s_waitcnt vmcnt(0)
4665 ; HSA-NEXT: s_endpgm
4666 %val = call i32 @external_i32_func_i32(i32 42)
4667 store volatile i32 %val, ptr addrspace(1) %out
4671 define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
4672 ; VI-LABEL: test_call_external_void_func_struct_i8_i32:
4674 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4675 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4676 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4677 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4678 ; VI-NEXT: s_mov_b32 s38, -1
4679 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4680 ; VI-NEXT: s_add_u32 s36, s36, s3
4681 ; VI-NEXT: s_mov_b32 s3, 0xf000
4682 ; VI-NEXT: s_mov_b32 s2, -1
4683 ; VI-NEXT: s_waitcnt lgkmcnt(0)
4684 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
4685 ; VI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
4686 ; VI-NEXT: s_addc_u32 s37, s37, 0
4687 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4688 ; VI-NEXT: s_getpc_b64 s[4:5]
4689 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4690 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4691 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4692 ; VI-NEXT: s_mov_b32 s32, 0
4693 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4696 ; CI-LABEL: test_call_external_void_func_struct_i8_i32:
4698 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4699 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4700 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4701 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4702 ; CI-NEXT: s_mov_b32 s38, -1
4703 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4704 ; CI-NEXT: s_add_u32 s36, s36, s3
4705 ; CI-NEXT: s_mov_b32 s3, 0xf000
4706 ; CI-NEXT: s_mov_b32 s2, -1
4707 ; CI-NEXT: s_waitcnt lgkmcnt(0)
4708 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
4709 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
4710 ; CI-NEXT: s_addc_u32 s37, s37, 0
4711 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4712 ; CI-NEXT: s_getpc_b64 s[4:5]
4713 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4714 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4715 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4716 ; CI-NEXT: s_mov_b32 s32, 0
4717 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4720 ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
4722 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4723 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4724 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4725 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4726 ; GFX9-NEXT: s_mov_b32 s38, -1
4727 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4728 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4729 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
4730 ; GFX9-NEXT: s_mov_b32 s2, -1
4731 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4732 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
4733 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
4734 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4735 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4736 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4737 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4738 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4739 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4740 ; GFX9-NEXT: s_mov_b32 s32, 0
4741 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4742 ; GFX9-NEXT: s_endpgm
4744 ; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
4746 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
4747 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
4748 ; GFX11-NEXT: s_mov_b32 s6, -1
4749 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4750 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_struct_i8_i32@rel32@lo+4
4751 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_struct_i8_i32@rel32@hi+12
4752 ; GFX11-NEXT: s_mov_b32 s32, 0
4753 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4754 ; GFX11-NEXT: s_clause 0x1
4755 ; GFX11-NEXT: buffer_load_u8 v0, off, s[4:7], 0
4756 ; GFX11-NEXT: buffer_load_b32 v1, off, s[4:7], 0 offset:4
4757 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4758 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4759 ; GFX11-NEXT: s_endpgm
4761 ; HSA-LABEL: test_call_external_void_func_struct_i8_i32:
4763 ; HSA-NEXT: s_add_i32 s6, s6, s9
4764 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4765 ; HSA-NEXT: s_add_u32 s0, s0, s9
4766 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4767 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4768 ; HSA-NEXT: s_mov_b32 s10, -1
4769 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4770 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4771 ; HSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4772 ; HSA-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:4
4773 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4774 ; HSA-NEXT: s_getpc_b64 s[8:9]
4775 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_struct_i8_i32@rel32@lo+4
4776 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_struct_i8_i32@rel32@hi+12
4777 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4778 ; HSA-NEXT: s_mov_b32 s32, 0
4779 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4780 ; HSA-NEXT: s_endpgm
4781 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
4782 %val = load { i8, i32 }, ptr addrspace(1) %ptr0
4783 call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
4787 define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
4788 ; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4790 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4791 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4792 ; VI-NEXT: s_mov_b32 s38, -1
4793 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4794 ; VI-NEXT: s_add_u32 s36, s36, s3
4795 ; VI-NEXT: s_addc_u32 s37, s37, 0
4796 ; VI-NEXT: v_mov_b32_e32 v0, 3
4797 ; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0
4798 ; VI-NEXT: v_mov_b32_e32 v0, 8
4799 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
4800 ; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
4801 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0
4802 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4803 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4804 ; VI-NEXT: s_movk_i32 s32, 0x400
4805 ; VI-NEXT: s_getpc_b64 s[4:5]
4806 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4807 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4808 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4809 ; VI-NEXT: s_waitcnt vmcnt(1)
4810 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4811 ; VI-NEXT: s_waitcnt vmcnt(1)
4812 ; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32
4813 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4816 ; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4818 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4819 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4820 ; CI-NEXT: s_mov_b32 s38, -1
4821 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4822 ; CI-NEXT: s_add_u32 s36, s36, s3
4823 ; CI-NEXT: s_addc_u32 s37, s37, 0
4824 ; CI-NEXT: v_mov_b32_e32 v0, 3
4825 ; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0
4826 ; CI-NEXT: v_mov_b32_e32 v0, 8
4827 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
4828 ; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
4829 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0
4830 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4831 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4832 ; CI-NEXT: s_movk_i32 s32, 0x400
4833 ; CI-NEXT: s_getpc_b64 s[4:5]
4834 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4835 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4836 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4837 ; CI-NEXT: s_waitcnt vmcnt(1)
4838 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4839 ; CI-NEXT: s_waitcnt vmcnt(1)
4840 ; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32
4841 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4844 ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4846 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4847 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4848 ; GFX9-NEXT: s_mov_b32 s38, -1
4849 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4850 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4851 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4852 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
4853 ; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0
4854 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
4855 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
4856 ; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
4857 ; GFX9-NEXT: s_nop 0
4858 ; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0
4859 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4860 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4861 ; GFX9-NEXT: s_movk_i32 s32, 0x400
4862 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4863 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4864 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4865 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4866 ; GFX9-NEXT: s_waitcnt vmcnt(1)
4867 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4868 ; GFX9-NEXT: s_waitcnt vmcnt(1)
4869 ; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
4870 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4871 ; GFX9-NEXT: s_endpgm
4873 ; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4875 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
4876 ; GFX11-NEXT: s_mov_b32 s32, 16
4877 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4878 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_byval_struct_i8_i32@rel32@lo+4
4879 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_byval_struct_i8_i32@rel32@hi+12
4880 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4881 ; GFX11-NEXT: s_clause 0x1
4882 ; GFX11-NEXT: scratch_store_b8 off, v0, off
4883 ; GFX11-NEXT: scratch_store_b32 off, v1, off offset:4
4884 ; GFX11-NEXT: scratch_load_b64 v[0:1], off, off
4885 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4886 ; GFX11-NEXT: scratch_store_b64 off, v[0:1], s32
4887 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4888 ; GFX11-NEXT: s_endpgm
4890 ; HSA-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4892 ; HSA-NEXT: s_add_i32 s6, s6, s9
4893 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4894 ; HSA-NEXT: s_add_u32 s0, s0, s9
4895 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4896 ; HSA-NEXT: v_mov_b32_e32 v0, 3
4897 ; HSA-NEXT: buffer_store_byte v0, off, s[0:3], 0
4898 ; HSA-NEXT: v_mov_b32_e32 v0, 8
4899 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
4900 ; HSA-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
4901 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0
4902 ; HSA-NEXT: s_movk_i32 s32, 0x400
4903 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4904 ; HSA-NEXT: s_getpc_b64 s[8:9]
4905 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_byval_struct_i8_i32@rel32@lo+4
4906 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_byval_struct_i8_i32@rel32@hi+12
4907 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4908 ; HSA-NEXT: s_waitcnt vmcnt(1)
4909 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
4910 ; HSA-NEXT: s_waitcnt vmcnt(1)
4911 ; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32
4912 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4913 ; HSA-NEXT: s_endpgm
4914 %val = alloca { i8, i32 }, align 8, addrspace(5)
4915 %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
4916 %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
4917 store i8 3, ptr addrspace(5) %gep0
4918 store i32 8, ptr addrspace(5) %gep1
4919 call void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val)
4923 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
4924 ; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
4926 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4927 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4928 ; VI-NEXT: s_mov_b32 s38, -1
4929 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4930 ; VI-NEXT: s_add_u32 s36, s36, s5
4931 ; VI-NEXT: s_addc_u32 s37, s37, 0
4932 ; VI-NEXT: v_mov_b32_e32 v0, 3
4933 ; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0
4934 ; VI-NEXT: v_mov_b32_e32 v0, 8
4935 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
4936 ; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
4937 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0
4938 ; VI-NEXT: s_movk_i32 s32, 0x800
4939 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4940 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4941 ; VI-NEXT: s_getpc_b64 s[4:5]
4942 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
4943 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
4944 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4945 ; VI-NEXT: s_waitcnt vmcnt(1)
4946 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4947 ; VI-NEXT: s_waitcnt vmcnt(1)
4948 ; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32
4949 ; VI-NEXT: v_mov_b32_e32 v0, 8
4950 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4951 ; VI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
4952 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
4953 ; VI-NEXT: s_mov_b32 s3, 0xf000
4954 ; VI-NEXT: s_mov_b32 s2, -1
4955 ; VI-NEXT: s_waitcnt vmcnt(1)
4956 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
4957 ; VI-NEXT: s_waitcnt vmcnt(0)
4958 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
4959 ; VI-NEXT: s_waitcnt vmcnt(0)
4962 ; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
4964 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4965 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4966 ; CI-NEXT: s_mov_b32 s38, -1
4967 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4968 ; CI-NEXT: s_add_u32 s36, s36, s5
4969 ; CI-NEXT: s_addc_u32 s37, s37, 0
4970 ; CI-NEXT: v_mov_b32_e32 v0, 3
4971 ; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0
4972 ; CI-NEXT: v_mov_b32_e32 v0, 8
4973 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
4974 ; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
4975 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0
4976 ; CI-NEXT: s_movk_i32 s32, 0x800
4977 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4978 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4979 ; CI-NEXT: s_getpc_b64 s[4:5]
4980 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
4981 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
4982 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4983 ; CI-NEXT: s_waitcnt vmcnt(1)
4984 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4985 ; CI-NEXT: s_waitcnt vmcnt(1)
4986 ; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32
4987 ; CI-NEXT: v_mov_b32_e32 v0, 8
4988 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4989 ; CI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
4990 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
4991 ; CI-NEXT: s_mov_b32 s3, 0xf000
4992 ; CI-NEXT: s_mov_b32 s2, -1
4993 ; CI-NEXT: s_waitcnt vmcnt(1)
4994 ; CI-NEXT: buffer_store_byte v0, off, s[0:3], 0
4995 ; CI-NEXT: s_waitcnt vmcnt(0)
4996 ; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0
4997 ; CI-NEXT: s_waitcnt vmcnt(0)
5000 ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5002 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5003 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5004 ; GFX9-NEXT: s_mov_b32 s38, -1
5005 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
5006 ; GFX9-NEXT: s_add_u32 s36, s36, s5
5007 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
5008 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
5009 ; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0
5010 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
5011 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
5012 ; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
5013 ; GFX9-NEXT: s_nop 0
5014 ; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0
5015 ; GFX9-NEXT: s_movk_i32 s32, 0x800
5016 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
5017 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
5018 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5019 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5020 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5021 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
5022 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5023 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
5024 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5025 ; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
5026 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
5027 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
5028 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
5029 ; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
5030 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
5031 ; GFX9-NEXT: s_mov_b32 s2, -1
5032 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5033 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
5034 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5035 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], 0
5036 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5037 ; GFX9-NEXT: s_endpgm
5039 ; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5041 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
5042 ; GFX11-NEXT: s_mov_b32 s32, 32
5043 ; GFX11-NEXT: s_getpc_b64 s[2:3]
5044 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5045 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5046 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
5047 ; GFX11-NEXT: s_clause 0x1
5048 ; GFX11-NEXT: scratch_store_b8 off, v0, off
5049 ; GFX11-NEXT: scratch_store_b32 off, v1, off offset:4
5050 ; GFX11-NEXT: scratch_load_b64 v[0:1], off, off
5051 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5052 ; GFX11-NEXT: scratch_store_b64 off, v[0:1], s32
5053 ; GFX11-NEXT: v_mov_b32_e32 v0, 8
5054 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
5055 ; GFX11-NEXT: s_clause 0x1
5056 ; GFX11-NEXT: scratch_load_u8 v0, off, off offset:8
5057 ; GFX11-NEXT: scratch_load_b32 v1, off, off offset:12
5058 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
5059 ; GFX11-NEXT: s_mov_b32 s2, -1
5060 ; GFX11-NEXT: s_waitcnt vmcnt(1)
5061 ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc
5062 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
5063 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5064 ; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0 dlc
5065 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
5066 ; GFX11-NEXT: s_nop 0
5067 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5068 ; GFX11-NEXT: s_endpgm
5070 ; HSA-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5072 ; HSA-NEXT: s_add_i32 s8, s8, s11
5073 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
5074 ; HSA-NEXT: s_add_u32 s0, s0, s11
5075 ; HSA-NEXT: s_addc_u32 s1, s1, 0
5076 ; HSA-NEXT: v_mov_b32_e32 v0, 3
5077 ; HSA-NEXT: buffer_store_byte v0, off, s[0:3], 0
5078 ; HSA-NEXT: v_mov_b32_e32 v0, 8
5079 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
5080 ; HSA-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
5081 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0
5082 ; HSA-NEXT: s_movk_i32 s32, 0x800
5083 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
5084 ; HSA-NEXT: s_getpc_b64 s[8:9]
5085 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5086 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5087 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
5088 ; HSA-NEXT: s_waitcnt vmcnt(1)
5089 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5090 ; HSA-NEXT: s_waitcnt vmcnt(1)
5091 ; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32
5092 ; HSA-NEXT: v_mov_b32_e32 v0, 8
5093 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
5094 ; HSA-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:8
5095 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12
5096 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
5097 ; HSA-NEXT: s_mov_b32 s6, -1
5098 ; HSA-NEXT: s_waitcnt vmcnt(1)
5099 ; HSA-NEXT: buffer_store_byte v0, off, s[4:7], 0
5100 ; HSA-NEXT: s_waitcnt vmcnt(0)
5101 ; HSA-NEXT: buffer_store_dword v1, off, s[4:7], 0
5102 ; HSA-NEXT: s_waitcnt vmcnt(0)
5103 ; HSA-NEXT: s_endpgm
5104 %in.val = alloca { i8, i32 }, align 8, addrspace(5)
5105 %out.val = alloca { i8, i32 }, align 8, addrspace(5)
5106 %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
5107 %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
5108 store i8 3, ptr addrspace(5) %in.gep0
5109 store i32 8, ptr addrspace(5) %in.gep1
5110 call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) byval({ i8, i32 }) %in.val)
5111 %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
5112 %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
5113 %out.val0 = load i8, ptr addrspace(5) %out.gep0
5114 %out.val1 = load i32, ptr addrspace(5) %out.gep1
5116 store volatile i8 %out.val0, ptr addrspace(1) undef
5117 store volatile i32 %out.val1, ptr addrspace(1) undef
5121 define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
5122 ; VI-LABEL: test_call_external_void_func_v16i8:
5124 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5125 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
5126 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
5127 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5128 ; VI-NEXT: s_mov_b32 s38, -1
5129 ; VI-NEXT: s_mov_b32 s39, 0xe80000
5130 ; VI-NEXT: s_add_u32 s36, s36, s3
5131 ; VI-NEXT: s_mov_b32 s3, 0xf000
5132 ; VI-NEXT: s_mov_b32 s2, -1
5133 ; VI-NEXT: s_waitcnt lgkmcnt(0)
5134 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5135 ; VI-NEXT: s_addc_u32 s37, s37, 0
5136 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
5137 ; VI-NEXT: s_getpc_b64 s[4:5]
5138 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5139 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5140 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
5141 ; VI-NEXT: s_mov_b32 s32, 0
5142 ; VI-NEXT: s_waitcnt vmcnt(0)
5143 ; VI-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5144 ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5145 ; VI-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5146 ; VI-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5147 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5148 ; VI-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5149 ; VI-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5150 ; VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5151 ; VI-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5152 ; VI-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5153 ; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5154 ; VI-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5155 ; VI-NEXT: v_mov_b32_e32 v4, v1
5156 ; VI-NEXT: v_mov_b32_e32 v8, v2
5157 ; VI-NEXT: v_mov_b32_e32 v12, v3
5158 ; VI-NEXT: v_mov_b32_e32 v1, v16
5159 ; VI-NEXT: v_mov_b32_e32 v2, v17
5160 ; VI-NEXT: v_mov_b32_e32 v3, v18
5161 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5164 ; CI-LABEL: test_call_external_void_func_v16i8:
5166 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5167 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
5168 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
5169 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5170 ; CI-NEXT: s_mov_b32 s38, -1
5171 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
5172 ; CI-NEXT: s_add_u32 s36, s36, s3
5173 ; CI-NEXT: s_mov_b32 s3, 0xf000
5174 ; CI-NEXT: s_mov_b32 s2, -1
5175 ; CI-NEXT: s_waitcnt lgkmcnt(0)
5176 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5177 ; CI-NEXT: s_addc_u32 s37, s37, 0
5178 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
5179 ; CI-NEXT: s_getpc_b64 s[4:5]
5180 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5181 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5182 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
5183 ; CI-NEXT: s_mov_b32 s32, 0
5184 ; CI-NEXT: s_waitcnt vmcnt(0)
5185 ; CI-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5186 ; CI-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5187 ; CI-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5188 ; CI-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5189 ; CI-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5190 ; CI-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5191 ; CI-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5192 ; CI-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5193 ; CI-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5194 ; CI-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5195 ; CI-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5196 ; CI-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5197 ; CI-NEXT: v_mov_b32_e32 v4, v1
5198 ; CI-NEXT: v_mov_b32_e32 v8, v2
5199 ; CI-NEXT: v_mov_b32_e32 v12, v3
5200 ; CI-NEXT: v_mov_b32_e32 v1, v16
5201 ; CI-NEXT: v_mov_b32_e32 v2, v17
5202 ; CI-NEXT: v_mov_b32_e32 v3, v18
5203 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5206 ; GFX9-LABEL: test_call_external_void_func_v16i8:
5208 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5209 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
5210 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
5211 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5212 ; GFX9-NEXT: s_mov_b32 s38, -1
5213 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
5214 ; GFX9-NEXT: s_add_u32 s36, s36, s3
5215 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
5216 ; GFX9-NEXT: s_mov_b32 s2, -1
5217 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5218 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5219 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
5220 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
5221 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5222 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5223 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5224 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
5225 ; GFX9-NEXT: s_mov_b32 s32, 0
5226 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5227 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5228 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5229 ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5230 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5231 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5232 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5233 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5234 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5235 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5236 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5237 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5238 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5239 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
5240 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
5241 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
5242 ; GFX9-NEXT: v_mov_b32_e32 v1, v16
5243 ; GFX9-NEXT: v_mov_b32_e32 v2, v17
5244 ; GFX9-NEXT: v_mov_b32_e32 v3, v18
5245 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
5246 ; GFX9-NEXT: s_endpgm
5248 ; GFX11-LABEL: test_call_external_void_func_v16i8:
5250 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
5251 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
5252 ; GFX11-NEXT: s_mov_b32 s6, -1
5253 ; GFX11-NEXT: s_getpc_b64 s[2:3]
5254 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v16i8@rel32@lo+4
5255 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v16i8@rel32@hi+12
5256 ; GFX11-NEXT: s_mov_b32 s32, 0
5257 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5258 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
5259 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
5260 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5261 ; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5262 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5263 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5264 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5265 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5266 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5267 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5268 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5269 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5270 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5271 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5272 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5273 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16
5274 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
5275 ; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
5276 ; GFX11-NEXT: v_mov_b32_e32 v2, v17
5277 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
5278 ; GFX11-NEXT: s_endpgm
5280 ; HSA-LABEL: test_call_external_void_func_v16i8:
5282 ; HSA-NEXT: s_add_i32 s6, s6, s9
5283 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
5284 ; HSA-NEXT: s_add_u32 s0, s0, s9
5285 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
5286 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
5287 ; HSA-NEXT: s_mov_b32 s10, -1
5288 ; HSA-NEXT: s_addc_u32 s1, s1, 0
5289 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
5290 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
5291 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
5292 ; HSA-NEXT: s_getpc_b64 s[8:9]
5293 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v16i8@rel32@lo+4
5294 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v16i8@rel32@hi+12
5295 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
5296 ; HSA-NEXT: s_mov_b32 s32, 0
5297 ; HSA-NEXT: s_waitcnt vmcnt(0)
5298 ; HSA-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5299 ; HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5300 ; HSA-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5301 ; HSA-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5302 ; HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5303 ; HSA-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5304 ; HSA-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5305 ; HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5306 ; HSA-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5307 ; HSA-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5308 ; HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5309 ; HSA-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5310 ; HSA-NEXT: v_mov_b32_e32 v4, v1
5311 ; HSA-NEXT: v_mov_b32_e32 v8, v2
5312 ; HSA-NEXT: v_mov_b32_e32 v12, v3
5313 ; HSA-NEXT: v_mov_b32_e32 v1, v16
5314 ; HSA-NEXT: v_mov_b32_e32 v2, v17
5315 ; HSA-NEXT: v_mov_b32_e32 v3, v18
5316 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
5317 ; HSA-NEXT: s_endpgm
5318 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
5319 %val = load <16 x i8>, ptr addrspace(1) %ptr
5320 call void @external_void_func_v16i8(<16 x i8> %val)
5324 define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
5325 ; VI-LABEL: stack_passed_arg_alignment_v32i32_f64:
5326 ; VI: ; %bb.0: ; %entry
5327 ; VI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5328 ; VI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5329 ; VI-NEXT: s_mov_b32 s54, -1
5330 ; VI-NEXT: s_mov_b32 s55, 0xe80000
5331 ; VI-NEXT: s_add_u32 s52, s52, s5
5332 ; VI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
5333 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
5334 ; VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
5335 ; VI-NEXT: s_mov_b32 s32, 0
5336 ; VI-NEXT: s_addc_u32 s53, s53, 0
5337 ; VI-NEXT: s_waitcnt lgkmcnt(0)
5338 ; VI-NEXT: v_mov_b32_e32 v0, s23
5339 ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32
5340 ; VI-NEXT: v_mov_b32_e32 v0, s4
5341 ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
5342 ; VI-NEXT: v_mov_b32_e32 v0, s5
5343 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
5344 ; VI-NEXT: s_mov_b64 s[0:1], s[52:53]
5345 ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
5346 ; VI-NEXT: s_getpc_b64 s[4:5]
5347 ; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5348 ; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5349 ; VI-NEXT: s_mov_b64 s[2:3], s[54:55]
5350 ; VI-NEXT: v_mov_b32_e32 v0, s36
5351 ; VI-NEXT: v_mov_b32_e32 v1, s37
5352 ; VI-NEXT: v_mov_b32_e32 v2, s38
5353 ; VI-NEXT: v_mov_b32_e32 v3, s39
5354 ; VI-NEXT: v_mov_b32_e32 v4, s40
5355 ; VI-NEXT: v_mov_b32_e32 v5, s41
5356 ; VI-NEXT: v_mov_b32_e32 v6, s42
5357 ; VI-NEXT: v_mov_b32_e32 v7, s43
5358 ; VI-NEXT: v_mov_b32_e32 v8, s44
5359 ; VI-NEXT: v_mov_b32_e32 v9, s45
5360 ; VI-NEXT: v_mov_b32_e32 v10, s46
5361 ; VI-NEXT: v_mov_b32_e32 v11, s47
5362 ; VI-NEXT: v_mov_b32_e32 v12, s48
5363 ; VI-NEXT: v_mov_b32_e32 v13, s49
5364 ; VI-NEXT: v_mov_b32_e32 v14, s50
5365 ; VI-NEXT: v_mov_b32_e32 v15, s51
5366 ; VI-NEXT: v_mov_b32_e32 v16, s8
5367 ; VI-NEXT: v_mov_b32_e32 v17, s9
5368 ; VI-NEXT: v_mov_b32_e32 v18, s10
5369 ; VI-NEXT: v_mov_b32_e32 v19, s11
5370 ; VI-NEXT: v_mov_b32_e32 v20, s12
5371 ; VI-NEXT: v_mov_b32_e32 v21, s13
5372 ; VI-NEXT: v_mov_b32_e32 v22, s14
5373 ; VI-NEXT: v_mov_b32_e32 v23, s15
5374 ; VI-NEXT: v_mov_b32_e32 v24, s16
5375 ; VI-NEXT: v_mov_b32_e32 v25, s17
5376 ; VI-NEXT: v_mov_b32_e32 v26, s18
5377 ; VI-NEXT: v_mov_b32_e32 v27, s19
5378 ; VI-NEXT: v_mov_b32_e32 v28, s20
5379 ; VI-NEXT: v_mov_b32_e32 v29, s21
5380 ; VI-NEXT: v_mov_b32_e32 v30, s22
5381 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5384 ; CI-LABEL: stack_passed_arg_alignment_v32i32_f64:
5385 ; CI: ; %bb.0: ; %entry
5386 ; CI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5387 ; CI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5388 ; CI-NEXT: s_mov_b32 s54, -1
5389 ; CI-NEXT: s_mov_b32 s55, 0xe8f000
5390 ; CI-NEXT: s_add_u32 s52, s52, s5
5391 ; CI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x19
5392 ; CI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x29
5393 ; CI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x9
5394 ; CI-NEXT: s_mov_b32 s32, 0
5395 ; CI-NEXT: s_addc_u32 s53, s53, 0
5396 ; CI-NEXT: s_waitcnt lgkmcnt(0)
5397 ; CI-NEXT: v_mov_b32_e32 v0, s23
5398 ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32
5399 ; CI-NEXT: v_mov_b32_e32 v0, s4
5400 ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
5401 ; CI-NEXT: v_mov_b32_e32 v0, s5
5402 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
5403 ; CI-NEXT: s_mov_b64 s[0:1], s[52:53]
5404 ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
5405 ; CI-NEXT: s_getpc_b64 s[4:5]
5406 ; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5407 ; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5408 ; CI-NEXT: s_mov_b64 s[2:3], s[54:55]
5409 ; CI-NEXT: v_mov_b32_e32 v0, s36
5410 ; CI-NEXT: v_mov_b32_e32 v1, s37
5411 ; CI-NEXT: v_mov_b32_e32 v2, s38
5412 ; CI-NEXT: v_mov_b32_e32 v3, s39
5413 ; CI-NEXT: v_mov_b32_e32 v4, s40
5414 ; CI-NEXT: v_mov_b32_e32 v5, s41
5415 ; CI-NEXT: v_mov_b32_e32 v6, s42
5416 ; CI-NEXT: v_mov_b32_e32 v7, s43
5417 ; CI-NEXT: v_mov_b32_e32 v8, s44
5418 ; CI-NEXT: v_mov_b32_e32 v9, s45
5419 ; CI-NEXT: v_mov_b32_e32 v10, s46
5420 ; CI-NEXT: v_mov_b32_e32 v11, s47
5421 ; CI-NEXT: v_mov_b32_e32 v12, s48
5422 ; CI-NEXT: v_mov_b32_e32 v13, s49
5423 ; CI-NEXT: v_mov_b32_e32 v14, s50
5424 ; CI-NEXT: v_mov_b32_e32 v15, s51
5425 ; CI-NEXT: v_mov_b32_e32 v16, s8
5426 ; CI-NEXT: v_mov_b32_e32 v17, s9
5427 ; CI-NEXT: v_mov_b32_e32 v18, s10
5428 ; CI-NEXT: v_mov_b32_e32 v19, s11
5429 ; CI-NEXT: v_mov_b32_e32 v20, s12
5430 ; CI-NEXT: v_mov_b32_e32 v21, s13
5431 ; CI-NEXT: v_mov_b32_e32 v22, s14
5432 ; CI-NEXT: v_mov_b32_e32 v23, s15
5433 ; CI-NEXT: v_mov_b32_e32 v24, s16
5434 ; CI-NEXT: v_mov_b32_e32 v25, s17
5435 ; CI-NEXT: v_mov_b32_e32 v26, s18
5436 ; CI-NEXT: v_mov_b32_e32 v27, s19
5437 ; CI-NEXT: v_mov_b32_e32 v28, s20
5438 ; CI-NEXT: v_mov_b32_e32 v29, s21
5439 ; CI-NEXT: v_mov_b32_e32 v30, s22
5440 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5443 ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
5444 ; GFX9: ; %bb.0: ; %entry
5445 ; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5446 ; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5447 ; GFX9-NEXT: s_mov_b32 s54, -1
5448 ; GFX9-NEXT: s_mov_b32 s55, 0xe00000
5449 ; GFX9-NEXT: s_add_u32 s52, s52, s5
5450 ; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
5451 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
5452 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
5453 ; GFX9-NEXT: s_mov_b32 s32, 0
5454 ; GFX9-NEXT: s_addc_u32 s53, s53, 0
5455 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5456 ; GFX9-NEXT: v_mov_b32_e32 v0, s23
5457 ; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32
5458 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
5459 ; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
5460 ; GFX9-NEXT: v_mov_b32_e32 v0, s5
5461 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
5462 ; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53]
5463 ; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
5464 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5465 ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5466 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5467 ; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55]
5468 ; GFX9-NEXT: v_mov_b32_e32 v0, s36
5469 ; GFX9-NEXT: v_mov_b32_e32 v1, s37
5470 ; GFX9-NEXT: v_mov_b32_e32 v2, s38
5471 ; GFX9-NEXT: v_mov_b32_e32 v3, s39
5472 ; GFX9-NEXT: v_mov_b32_e32 v4, s40
5473 ; GFX9-NEXT: v_mov_b32_e32 v5, s41
5474 ; GFX9-NEXT: v_mov_b32_e32 v6, s42
5475 ; GFX9-NEXT: v_mov_b32_e32 v7, s43
5476 ; GFX9-NEXT: v_mov_b32_e32 v8, s44
5477 ; GFX9-NEXT: v_mov_b32_e32 v9, s45
5478 ; GFX9-NEXT: v_mov_b32_e32 v10, s46
5479 ; GFX9-NEXT: v_mov_b32_e32 v11, s47
5480 ; GFX9-NEXT: v_mov_b32_e32 v12, s48
5481 ; GFX9-NEXT: v_mov_b32_e32 v13, s49
5482 ; GFX9-NEXT: v_mov_b32_e32 v14, s50
5483 ; GFX9-NEXT: v_mov_b32_e32 v15, s51
5484 ; GFX9-NEXT: v_mov_b32_e32 v16, s8
5485 ; GFX9-NEXT: v_mov_b32_e32 v17, s9
5486 ; GFX9-NEXT: v_mov_b32_e32 v18, s10
5487 ; GFX9-NEXT: v_mov_b32_e32 v19, s11
5488 ; GFX9-NEXT: v_mov_b32_e32 v20, s12
5489 ; GFX9-NEXT: v_mov_b32_e32 v21, s13
5490 ; GFX9-NEXT: v_mov_b32_e32 v22, s14
5491 ; GFX9-NEXT: v_mov_b32_e32 v23, s15
5492 ; GFX9-NEXT: v_mov_b32_e32 v24, s16
5493 ; GFX9-NEXT: v_mov_b32_e32 v25, s17
5494 ; GFX9-NEXT: v_mov_b32_e32 v26, s18
5495 ; GFX9-NEXT: v_mov_b32_e32 v27, s19
5496 ; GFX9-NEXT: v_mov_b32_e32 v28, s20
5497 ; GFX9-NEXT: v_mov_b32_e32 v29, s21
5498 ; GFX9-NEXT: v_mov_b32_e32 v30, s22
5499 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
5500 ; GFX9-NEXT: s_endpgm
5502 ; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
5503 ; GFX11: ; %bb.0: ; %entry
5504 ; GFX11-NEXT: s_clause 0x2
5505 ; GFX11-NEXT: s_load_b64 s[20:21], s[2:3], 0xa4
5506 ; GFX11-NEXT: s_load_b512 s[4:19], s[2:3], 0x64
5507 ; GFX11-NEXT: s_load_b512 s[36:51], s[2:3], 0x24
5508 ; GFX11-NEXT: s_mov_b32 s32, 0
5509 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5510 ; GFX11-NEXT: s_add_i32 s22, s32, 8
5511 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5512 ; GFX11-NEXT: v_dual_mov_b32 v0, s21 :: v_dual_mov_b32 v1, s20
5513 ; GFX11-NEXT: v_mov_b32_e32 v2, s19
5514 ; GFX11-NEXT: s_add_i32 s19, s32, 4
5515 ; GFX11-NEXT: v_dual_mov_b32 v4, s40 :: v_dual_mov_b32 v7, s43
5516 ; GFX11-NEXT: scratch_store_b32 off, v0, s22
5517 ; GFX11-NEXT: scratch_store_b32 off, v1, s19
5518 ; GFX11-NEXT: scratch_store_b32 off, v2, s32
5519 ; GFX11-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s39
5520 ; GFX11-NEXT: v_dual_mov_b32 v1, s37 :: v_dual_mov_b32 v2, s38
5521 ; GFX11-NEXT: v_dual_mov_b32 v5, s41 :: v_dual_mov_b32 v6, s42
5522 ; GFX11-NEXT: v_dual_mov_b32 v9, s45 :: v_dual_mov_b32 v8, s44
5523 ; GFX11-NEXT: v_dual_mov_b32 v11, s47 :: v_dual_mov_b32 v10, s46
5524 ; GFX11-NEXT: v_dual_mov_b32 v13, s49 :: v_dual_mov_b32 v12, s48
5525 ; GFX11-NEXT: v_dual_mov_b32 v15, s51 :: v_dual_mov_b32 v14, s50
5526 ; GFX11-NEXT: v_dual_mov_b32 v17, s5 :: v_dual_mov_b32 v16, s4
5527 ; GFX11-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6
5528 ; GFX11-NEXT: v_dual_mov_b32 v21, s9 :: v_dual_mov_b32 v20, s8
5529 ; GFX11-NEXT: v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v22, s10
5530 ; GFX11-NEXT: v_dual_mov_b32 v25, s13 :: v_dual_mov_b32 v24, s12
5531 ; GFX11-NEXT: v_dual_mov_b32 v27, s15 :: v_dual_mov_b32 v26, s14
5532 ; GFX11-NEXT: v_dual_mov_b32 v29, s17 :: v_dual_mov_b32 v28, s16
5533 ; GFX11-NEXT: v_mov_b32_e32 v30, s18
5534 ; GFX11-NEXT: s_getpc_b64 s[2:3]
5535 ; GFX11-NEXT: s_add_u32 s2, s2, stack_passed_f64_arg@rel32@lo+4
5536 ; GFX11-NEXT: s_addc_u32 s3, s3, stack_passed_f64_arg@rel32@hi+12
5537 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
5538 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
5539 ; GFX11-NEXT: s_endpgm
5541 ; HSA-LABEL: stack_passed_arg_alignment_v32i32_f64:
5542 ; HSA: ; %bb.0: ; %entry
5543 ; HSA-NEXT: s_add_i32 s8, s8, s11
5544 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
5545 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
5546 ; HSA-NEXT: s_add_u32 s0, s0, s11
5547 ; HSA-NEXT: s_load_dwordx16 s[8:23], s[6:7], 0x40
5548 ; HSA-NEXT: s_load_dwordx2 s[24:25], s[6:7], 0x80
5549 ; HSA-NEXT: s_load_dwordx16 s[36:51], s[6:7], 0x0
5550 ; HSA-NEXT: s_mov_b32 s32, 0
5551 ; HSA-NEXT: s_addc_u32 s1, s1, 0
5552 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
5553 ; HSA-NEXT: v_mov_b32_e32 v0, s23
5554 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
5555 ; HSA-NEXT: v_mov_b32_e32 v0, s24
5556 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5557 ; HSA-NEXT: v_mov_b32_e32 v0, s25
5558 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
5559 ; HSA-NEXT: s_getpc_b64 s[24:25]
5560 ; HSA-NEXT: s_add_u32 s24, s24, stack_passed_f64_arg@rel32@lo+4
5561 ; HSA-NEXT: s_addc_u32 s25, s25, stack_passed_f64_arg@rel32@hi+12
5562 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
5563 ; HSA-NEXT: v_mov_b32_e32 v0, s36
5564 ; HSA-NEXT: v_mov_b32_e32 v1, s37
5565 ; HSA-NEXT: v_mov_b32_e32 v2, s38
5566 ; HSA-NEXT: v_mov_b32_e32 v3, s39
5567 ; HSA-NEXT: v_mov_b32_e32 v4, s40
5568 ; HSA-NEXT: v_mov_b32_e32 v5, s41
5569 ; HSA-NEXT: v_mov_b32_e32 v6, s42
5570 ; HSA-NEXT: v_mov_b32_e32 v7, s43
5571 ; HSA-NEXT: v_mov_b32_e32 v8, s44
5572 ; HSA-NEXT: v_mov_b32_e32 v9, s45
5573 ; HSA-NEXT: v_mov_b32_e32 v10, s46
5574 ; HSA-NEXT: v_mov_b32_e32 v11, s47
5575 ; HSA-NEXT: v_mov_b32_e32 v12, s48
5576 ; HSA-NEXT: v_mov_b32_e32 v13, s49
5577 ; HSA-NEXT: v_mov_b32_e32 v14, s50
5578 ; HSA-NEXT: v_mov_b32_e32 v15, s51
5579 ; HSA-NEXT: v_mov_b32_e32 v16, s8
5580 ; HSA-NEXT: v_mov_b32_e32 v17, s9
5581 ; HSA-NEXT: v_mov_b32_e32 v18, s10
5582 ; HSA-NEXT: v_mov_b32_e32 v19, s11
5583 ; HSA-NEXT: v_mov_b32_e32 v20, s12
5584 ; HSA-NEXT: v_mov_b32_e32 v21, s13
5585 ; HSA-NEXT: v_mov_b32_e32 v22, s14
5586 ; HSA-NEXT: v_mov_b32_e32 v23, s15
5587 ; HSA-NEXT: v_mov_b32_e32 v24, s16
5588 ; HSA-NEXT: v_mov_b32_e32 v25, s17
5589 ; HSA-NEXT: v_mov_b32_e32 v26, s18
5590 ; HSA-NEXT: v_mov_b32_e32 v27, s19
5591 ; HSA-NEXT: v_mov_b32_e32 v28, s20
5592 ; HSA-NEXT: v_mov_b32_e32 v29, s21
5593 ; HSA-NEXT: v_mov_b32_e32 v30, s22
5594 ; HSA-NEXT: s_swappc_b64 s[30:31], s[24:25]
5595 ; HSA-NEXT: s_endpgm
5597 call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
5601 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
5602 ; VI-LABEL: tail_call_byval_align16:
5603 ; VI: ; %bb.0: ; %entry
5604 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5605 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
5606 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32
5607 ; VI-NEXT: s_getpc_b64 s[4:5]
5608 ; VI-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5609 ; VI-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5610 ; VI-NEXT: s_waitcnt vmcnt(1)
5611 ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
5612 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
5613 ; VI-NEXT: s_waitcnt vmcnt(2)
5614 ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32
5615 ; VI-NEXT: s_waitcnt vmcnt(1)
5616 ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
5617 ; VI-NEXT: s_setpc_b64 s[4:5]
5619 ; CI-LABEL: tail_call_byval_align16:
5620 ; CI: ; %bb.0: ; %entry
5621 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5622 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
5623 ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32
5624 ; CI-NEXT: s_getpc_b64 s[4:5]
5625 ; CI-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5626 ; CI-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5627 ; CI-NEXT: s_waitcnt vmcnt(1)
5628 ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
5629 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
5630 ; CI-NEXT: s_waitcnt vmcnt(2)
5631 ; CI-NEXT: buffer_store_dword v32, off, s[0:3], s32
5632 ; CI-NEXT: s_waitcnt vmcnt(1)
5633 ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
5634 ; CI-NEXT: s_setpc_b64 s[4:5]
5636 ; GFX9-LABEL: tail_call_byval_align16:
5637 ; GFX9: ; %bb.0: ; %entry
5638 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5639 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
5640 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32
5641 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5642 ; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5643 ; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5644 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5645 ; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
5646 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
5647 ; GFX9-NEXT: s_waitcnt vmcnt(2)
5648 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
5649 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5650 ; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
5651 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5653 ; GFX11-LABEL: tail_call_byval_align16:
5654 ; GFX11: ; %bb.0: ; %entry
5655 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5656 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
5657 ; GFX11-NEXT: s_getpc_b64 s[0:1]
5658 ; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
5659 ; GFX11-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
5660 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5661 ; GFX11-NEXT: scratch_store_b32 off, v31, s32
5662 ; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:24
5663 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5664 ; GFX11-NEXT: scratch_store_b64 off, v[31:32], s32 offset:16
5665 ; GFX11-NEXT: s_setpc_b64 s[0:1]
5667 ; HSA-LABEL: tail_call_byval_align16:
5668 ; HSA: ; %bb.0: ; %entry
5669 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5670 ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
5671 ; HSA-NEXT: buffer_load_dword v32, off, s[0:3], s32
5672 ; HSA-NEXT: s_getpc_b64 s[4:5]
5673 ; HSA-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5674 ; HSA-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5675 ; HSA-NEXT: s_waitcnt vmcnt(1)
5676 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
5677 ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
5678 ; HSA-NEXT: s_waitcnt vmcnt(2)
5679 ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32
5680 ; HSA-NEXT: s_waitcnt vmcnt(1)
5681 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
5682 ; HSA-NEXT: s_setpc_b64 s[4:5]
5684 %alloca = alloca double, align 8, addrspace(5)
5685 tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca)
5689 define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
5690 ; VI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5691 ; VI: ; %bb.0: ; %entry
5692 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5693 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
5694 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
5695 ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
5696 ; VI-NEXT: s_getpc_b64 s[4:5]
5697 ; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5698 ; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5699 ; VI-NEXT: s_waitcnt vmcnt(2)
5700 ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32
5701 ; VI-NEXT: s_waitcnt vmcnt(2)
5702 ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
5703 ; VI-NEXT: s_waitcnt vmcnt(2)
5704 ; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8
5705 ; VI-NEXT: s_setpc_b64 s[4:5]
5707 ; CI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5708 ; CI: ; %bb.0: ; %entry
5709 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5710 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
5711 ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
5712 ; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
5713 ; CI-NEXT: s_getpc_b64 s[4:5]
5714 ; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5715 ; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5716 ; CI-NEXT: s_waitcnt vmcnt(2)
5717 ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32
5718 ; CI-NEXT: s_waitcnt vmcnt(2)
5719 ; CI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
5720 ; CI-NEXT: s_waitcnt vmcnt(2)
5721 ; CI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8
5722 ; CI-NEXT: s_setpc_b64 s[4:5]
5724 ; GFX9-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5725 ; GFX9: ; %bb.0: ; %entry
5726 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5727 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
5728 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
5729 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
5730 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5731 ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5732 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5733 ; GFX9-NEXT: s_waitcnt vmcnt(2)
5734 ; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32
5735 ; GFX9-NEXT: s_waitcnt vmcnt(2)
5736 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
5737 ; GFX9-NEXT: s_waitcnt vmcnt(2)
5738 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8
5739 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5741 ; GFX11-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5742 ; GFX11: ; %bb.0: ; %entry
5743 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5744 ; GFX11-NEXT: s_clause 0x1
5745 ; GFX11-NEXT: scratch_load_b32 v33, off, s32
5746 ; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:4
5747 ; GFX11-NEXT: s_getpc_b64 s[0:1]
5748 ; GFX11-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4
5749 ; GFX11-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12
5750 ; GFX11-NEXT: s_waitcnt vmcnt(1)
5751 ; GFX11-NEXT: scratch_store_b32 off, v33, s32
5752 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5753 ; GFX11-NEXT: scratch_store_b64 off, v[31:32], s32 offset:4
5754 ; GFX11-NEXT: s_setpc_b64 s[0:1]
5756 ; HSA-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5757 ; HSA: ; %bb.0: ; %entry
5758 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5759 ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32
5760 ; HSA-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
5761 ; HSA-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
5762 ; HSA-NEXT: s_getpc_b64 s[4:5]
5763 ; HSA-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5764 ; HSA-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5765 ; HSA-NEXT: s_waitcnt vmcnt(2)
5766 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
5767 ; HSA-NEXT: s_waitcnt vmcnt(2)
5768 ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
5769 ; HSA-NEXT: s_waitcnt vmcnt(2)
5770 ; HSA-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8
5771 ; HSA-NEXT: s_setpc_b64 s[4:5]
5773 tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
5777 define void @stack_12xv3i32() #0 {
5778 ; VI-LABEL: stack_12xv3i32:
5779 ; VI: ; %bb.0: ; %entry
5780 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5781 ; VI-NEXT: s_mov_b32 s4, s33
5782 ; VI-NEXT: s_mov_b32 s33, s32
5783 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1
5784 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5785 ; VI-NEXT: s_mov_b64 exec, s[8:9]
5786 ; VI-NEXT: s_addk_i32 s32, 0x400
5787 ; VI-NEXT: v_mov_b32_e32 v0, 11
5788 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32
5789 ; VI-NEXT: v_mov_b32_e32 v0, 12
5790 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5791 ; VI-NEXT: v_mov_b32_e32 v0, 13
5792 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
5793 ; VI-NEXT: v_mov_b32_e32 v0, 14
5794 ; VI-NEXT: v_writelane_b32 v40, s4, 2
5795 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
5796 ; VI-NEXT: v_mov_b32_e32 v0, 15
5797 ; VI-NEXT: v_writelane_b32 v40, s30, 0
5798 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
5799 ; VI-NEXT: s_getpc_b64 s[4:5]
5800 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
5801 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
5802 ; VI-NEXT: v_mov_b32_e32 v0, 0
5803 ; VI-NEXT: v_mov_b32_e32 v1, 0
5804 ; VI-NEXT: v_mov_b32_e32 v2, 0
5805 ; VI-NEXT: v_mov_b32_e32 v3, 1
5806 ; VI-NEXT: v_mov_b32_e32 v4, 1
5807 ; VI-NEXT: v_mov_b32_e32 v5, 1
5808 ; VI-NEXT: v_mov_b32_e32 v6, 2
5809 ; VI-NEXT: v_mov_b32_e32 v7, 2
5810 ; VI-NEXT: v_mov_b32_e32 v8, 2
5811 ; VI-NEXT: v_mov_b32_e32 v9, 3
5812 ; VI-NEXT: v_mov_b32_e32 v10, 3
5813 ; VI-NEXT: v_mov_b32_e32 v11, 3
5814 ; VI-NEXT: v_mov_b32_e32 v12, 4
5815 ; VI-NEXT: v_mov_b32_e32 v13, 4
5816 ; VI-NEXT: v_mov_b32_e32 v14, 4
5817 ; VI-NEXT: v_mov_b32_e32 v15, 5
5818 ; VI-NEXT: v_mov_b32_e32 v16, 5
5819 ; VI-NEXT: v_mov_b32_e32 v17, 5
5820 ; VI-NEXT: v_mov_b32_e32 v18, 6
5821 ; VI-NEXT: v_mov_b32_e32 v19, 6
5822 ; VI-NEXT: v_mov_b32_e32 v20, 6
5823 ; VI-NEXT: v_mov_b32_e32 v21, 7
5824 ; VI-NEXT: v_mov_b32_e32 v22, 7
5825 ; VI-NEXT: v_mov_b32_e32 v23, 7
5826 ; VI-NEXT: v_mov_b32_e32 v24, 8
5827 ; VI-NEXT: v_mov_b32_e32 v25, 8
5828 ; VI-NEXT: v_mov_b32_e32 v26, 8
5829 ; VI-NEXT: v_mov_b32_e32 v27, 9
5830 ; VI-NEXT: v_mov_b32_e32 v28, 9
5831 ; VI-NEXT: v_mov_b32_e32 v29, 9
5832 ; VI-NEXT: v_mov_b32_e32 v30, 10
5833 ; VI-NEXT: v_writelane_b32 v40, s31, 1
5834 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5835 ; VI-NEXT: v_readlane_b32 s31, v40, 1
5836 ; VI-NEXT: v_readlane_b32 s30, v40, 0
5837 ; VI-NEXT: s_mov_b32 s32, s33
5838 ; VI-NEXT: v_readlane_b32 s4, v40, 2
5839 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1
5840 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5841 ; VI-NEXT: s_mov_b64 exec, s[6:7]
5842 ; VI-NEXT: s_mov_b32 s33, s4
5843 ; VI-NEXT: s_waitcnt vmcnt(0)
5844 ; VI-NEXT: s_setpc_b64 s[30:31]
5846 ; CI-LABEL: stack_12xv3i32:
5847 ; CI: ; %bb.0: ; %entry
5848 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5849 ; CI-NEXT: s_mov_b32 s4, s33
5850 ; CI-NEXT: s_mov_b32 s33, s32
5851 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1
5852 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5853 ; CI-NEXT: s_mov_b64 exec, s[8:9]
5854 ; CI-NEXT: s_addk_i32 s32, 0x400
5855 ; CI-NEXT: v_mov_b32_e32 v0, 11
5856 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32
5857 ; CI-NEXT: v_mov_b32_e32 v0, 12
5858 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5859 ; CI-NEXT: v_mov_b32_e32 v0, 13
5860 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
5861 ; CI-NEXT: v_mov_b32_e32 v0, 14
5862 ; CI-NEXT: v_writelane_b32 v40, s4, 2
5863 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
5864 ; CI-NEXT: v_mov_b32_e32 v0, 15
5865 ; CI-NEXT: v_writelane_b32 v40, s30, 0
5866 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
5867 ; CI-NEXT: s_getpc_b64 s[4:5]
5868 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
5869 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
5870 ; CI-NEXT: v_mov_b32_e32 v0, 0
5871 ; CI-NEXT: v_mov_b32_e32 v1, 0
5872 ; CI-NEXT: v_mov_b32_e32 v2, 0
5873 ; CI-NEXT: v_mov_b32_e32 v3, 1
5874 ; CI-NEXT: v_mov_b32_e32 v4, 1
5875 ; CI-NEXT: v_mov_b32_e32 v5, 1
5876 ; CI-NEXT: v_mov_b32_e32 v6, 2
5877 ; CI-NEXT: v_mov_b32_e32 v7, 2
5878 ; CI-NEXT: v_mov_b32_e32 v8, 2
5879 ; CI-NEXT: v_mov_b32_e32 v9, 3
5880 ; CI-NEXT: v_mov_b32_e32 v10, 3
5881 ; CI-NEXT: v_mov_b32_e32 v11, 3
5882 ; CI-NEXT: v_mov_b32_e32 v12, 4
5883 ; CI-NEXT: v_mov_b32_e32 v13, 4
5884 ; CI-NEXT: v_mov_b32_e32 v14, 4
5885 ; CI-NEXT: v_mov_b32_e32 v15, 5
5886 ; CI-NEXT: v_mov_b32_e32 v16, 5
5887 ; CI-NEXT: v_mov_b32_e32 v17, 5
5888 ; CI-NEXT: v_mov_b32_e32 v18, 6
5889 ; CI-NEXT: v_mov_b32_e32 v19, 6
5890 ; CI-NEXT: v_mov_b32_e32 v20, 6
5891 ; CI-NEXT: v_mov_b32_e32 v21, 7
5892 ; CI-NEXT: v_mov_b32_e32 v22, 7
5893 ; CI-NEXT: v_mov_b32_e32 v23, 7
5894 ; CI-NEXT: v_mov_b32_e32 v24, 8
5895 ; CI-NEXT: v_mov_b32_e32 v25, 8
5896 ; CI-NEXT: v_mov_b32_e32 v26, 8
5897 ; CI-NEXT: v_mov_b32_e32 v27, 9
5898 ; CI-NEXT: v_mov_b32_e32 v28, 9
5899 ; CI-NEXT: v_mov_b32_e32 v29, 9
5900 ; CI-NEXT: v_mov_b32_e32 v30, 10
5901 ; CI-NEXT: v_writelane_b32 v40, s31, 1
5902 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5903 ; CI-NEXT: v_readlane_b32 s31, v40, 1
5904 ; CI-NEXT: v_readlane_b32 s30, v40, 0
5905 ; CI-NEXT: s_mov_b32 s32, s33
5906 ; CI-NEXT: v_readlane_b32 s4, v40, 2
5907 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
5908 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5909 ; CI-NEXT: s_mov_b64 exec, s[6:7]
5910 ; CI-NEXT: s_mov_b32 s33, s4
5911 ; CI-NEXT: s_waitcnt vmcnt(0)
5912 ; CI-NEXT: s_setpc_b64 s[30:31]
5914 ; GFX9-LABEL: stack_12xv3i32:
5915 ; GFX9: ; %bb.0: ; %entry
5916 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5917 ; GFX9-NEXT: s_mov_b32 s4, s33
5918 ; GFX9-NEXT: s_mov_b32 s33, s32
5919 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
5920 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5921 ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
5922 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5923 ; GFX9-NEXT: v_mov_b32_e32 v0, 11
5924 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
5925 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
5926 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5927 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
5928 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
5929 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
5930 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2
5931 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
5932 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
5933 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5934 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
5935 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5936 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
5937 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
5938 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
5939 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
5940 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
5941 ; GFX9-NEXT: v_mov_b32_e32 v3, 1
5942 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
5943 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
5944 ; GFX9-NEXT: v_mov_b32_e32 v6, 2
5945 ; GFX9-NEXT: v_mov_b32_e32 v7, 2
5946 ; GFX9-NEXT: v_mov_b32_e32 v8, 2
5947 ; GFX9-NEXT: v_mov_b32_e32 v9, 3
5948 ; GFX9-NEXT: v_mov_b32_e32 v10, 3
5949 ; GFX9-NEXT: v_mov_b32_e32 v11, 3
5950 ; GFX9-NEXT: v_mov_b32_e32 v12, 4
5951 ; GFX9-NEXT: v_mov_b32_e32 v13, 4
5952 ; GFX9-NEXT: v_mov_b32_e32 v14, 4
5953 ; GFX9-NEXT: v_mov_b32_e32 v15, 5
5954 ; GFX9-NEXT: v_mov_b32_e32 v16, 5
5955 ; GFX9-NEXT: v_mov_b32_e32 v17, 5
5956 ; GFX9-NEXT: v_mov_b32_e32 v18, 6
5957 ; GFX9-NEXT: v_mov_b32_e32 v19, 6
5958 ; GFX9-NEXT: v_mov_b32_e32 v20, 6
5959 ; GFX9-NEXT: v_mov_b32_e32 v21, 7
5960 ; GFX9-NEXT: v_mov_b32_e32 v22, 7
5961 ; GFX9-NEXT: v_mov_b32_e32 v23, 7
5962 ; GFX9-NEXT: v_mov_b32_e32 v24, 8
5963 ; GFX9-NEXT: v_mov_b32_e32 v25, 8
5964 ; GFX9-NEXT: v_mov_b32_e32 v26, 8
5965 ; GFX9-NEXT: v_mov_b32_e32 v27, 9
5966 ; GFX9-NEXT: v_mov_b32_e32 v28, 9
5967 ; GFX9-NEXT: v_mov_b32_e32 v29, 9
5968 ; GFX9-NEXT: v_mov_b32_e32 v30, 10
5969 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5970 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
5971 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5972 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5973 ; GFX9-NEXT: s_mov_b32 s32, s33
5974 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
5975 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5976 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5977 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5978 ; GFX9-NEXT: s_mov_b32 s33, s4
5979 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5980 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5982 ; GFX11-LABEL: stack_12xv3i32:
5983 ; GFX11: ; %bb.0: ; %entry
5984 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5985 ; GFX11-NEXT: s_mov_b32 s0, s33
5986 ; GFX11-NEXT: s_mov_b32 s33, s32
5987 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5988 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
5989 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5990 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5991 ; GFX11-NEXT: v_dual_mov_b32 v0, 11 :: v_dual_mov_b32 v1, 12
5992 ; GFX11-NEXT: v_dual_mov_b32 v2, 13 :: v_dual_mov_b32 v3, 14
5993 ; GFX11-NEXT: v_mov_b32_e32 v4, 15
5994 ; GFX11-NEXT: s_add_i32 s32, s32, 16
5995 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5996 ; GFX11-NEXT: s_add_i32 s0, s32, 16
5997 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
5998 ; GFX11-NEXT: scratch_store_b32 off, v4, s0
5999 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0
6000 ; GFX11-NEXT: v_dual_mov_b32 v3, 1 :: v_dual_mov_b32 v2, 0
6001 ; GFX11-NEXT: v_dual_mov_b32 v5, 1 :: v_dual_mov_b32 v4, 1
6002 ; GFX11-NEXT: v_dual_mov_b32 v7, 2 :: v_dual_mov_b32 v6, 2
6003 ; GFX11-NEXT: v_dual_mov_b32 v9, 3 :: v_dual_mov_b32 v8, 2
6004 ; GFX11-NEXT: v_dual_mov_b32 v11, 3 :: v_dual_mov_b32 v10, 3
6005 ; GFX11-NEXT: v_dual_mov_b32 v13, 4 :: v_dual_mov_b32 v12, 4
6006 ; GFX11-NEXT: v_dual_mov_b32 v15, 5 :: v_dual_mov_b32 v14, 4
6007 ; GFX11-NEXT: v_dual_mov_b32 v17, 5 :: v_dual_mov_b32 v16, 5
6008 ; GFX11-NEXT: v_dual_mov_b32 v19, 6 :: v_dual_mov_b32 v18, 6
6009 ; GFX11-NEXT: v_dual_mov_b32 v21, 7 :: v_dual_mov_b32 v20, 6
6010 ; GFX11-NEXT: v_dual_mov_b32 v23, 7 :: v_dual_mov_b32 v22, 7
6011 ; GFX11-NEXT: v_dual_mov_b32 v25, 8 :: v_dual_mov_b32 v24, 8
6012 ; GFX11-NEXT: v_dual_mov_b32 v27, 9 :: v_dual_mov_b32 v26, 8
6013 ; GFX11-NEXT: v_dual_mov_b32 v29, 9 :: v_dual_mov_b32 v28, 9
6014 ; GFX11-NEXT: v_mov_b32_e32 v30, 10
6015 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6016 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
6017 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
6018 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6019 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6020 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6021 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6022 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6023 ; GFX11-NEXT: s_mov_b32 s32, s33
6024 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6025 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6026 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6027 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6028 ; GFX11-NEXT: s_mov_b32 s33, s0
6029 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6030 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6032 ; HSA-LABEL: stack_12xv3i32:
6033 ; HSA: ; %bb.0: ; %entry
6034 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6035 ; HSA-NEXT: s_mov_b32 s4, s33
6036 ; HSA-NEXT: s_mov_b32 s33, s32
6037 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1
6038 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6039 ; HSA-NEXT: s_mov_b64 exec, s[8:9]
6040 ; HSA-NEXT: s_addk_i32 s32, 0x400
6041 ; HSA-NEXT: v_mov_b32_e32 v0, 11
6042 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
6043 ; HSA-NEXT: v_mov_b32_e32 v0, 12
6044 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6045 ; HSA-NEXT: v_mov_b32_e32 v0, 13
6046 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6047 ; HSA-NEXT: v_mov_b32_e32 v0, 14
6048 ; HSA-NEXT: v_writelane_b32 v40, s4, 2
6049 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6050 ; HSA-NEXT: v_mov_b32_e32 v0, 15
6051 ; HSA-NEXT: v_writelane_b32 v40, s30, 0
6052 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6053 ; HSA-NEXT: s_getpc_b64 s[4:5]
6054 ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
6055 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
6056 ; HSA-NEXT: v_mov_b32_e32 v0, 0
6057 ; HSA-NEXT: v_mov_b32_e32 v1, 0
6058 ; HSA-NEXT: v_mov_b32_e32 v2, 0
6059 ; HSA-NEXT: v_mov_b32_e32 v3, 1
6060 ; HSA-NEXT: v_mov_b32_e32 v4, 1
6061 ; HSA-NEXT: v_mov_b32_e32 v5, 1
6062 ; HSA-NEXT: v_mov_b32_e32 v6, 2
6063 ; HSA-NEXT: v_mov_b32_e32 v7, 2
6064 ; HSA-NEXT: v_mov_b32_e32 v8, 2
6065 ; HSA-NEXT: v_mov_b32_e32 v9, 3
6066 ; HSA-NEXT: v_mov_b32_e32 v10, 3
6067 ; HSA-NEXT: v_mov_b32_e32 v11, 3
6068 ; HSA-NEXT: v_mov_b32_e32 v12, 4
6069 ; HSA-NEXT: v_mov_b32_e32 v13, 4
6070 ; HSA-NEXT: v_mov_b32_e32 v14, 4
6071 ; HSA-NEXT: v_mov_b32_e32 v15, 5
6072 ; HSA-NEXT: v_mov_b32_e32 v16, 5
6073 ; HSA-NEXT: v_mov_b32_e32 v17, 5
6074 ; HSA-NEXT: v_mov_b32_e32 v18, 6
6075 ; HSA-NEXT: v_mov_b32_e32 v19, 6
6076 ; HSA-NEXT: v_mov_b32_e32 v20, 6
6077 ; HSA-NEXT: v_mov_b32_e32 v21, 7
6078 ; HSA-NEXT: v_mov_b32_e32 v22, 7
6079 ; HSA-NEXT: v_mov_b32_e32 v23, 7
6080 ; HSA-NEXT: v_mov_b32_e32 v24, 8
6081 ; HSA-NEXT: v_mov_b32_e32 v25, 8
6082 ; HSA-NEXT: v_mov_b32_e32 v26, 8
6083 ; HSA-NEXT: v_mov_b32_e32 v27, 9
6084 ; HSA-NEXT: v_mov_b32_e32 v28, 9
6085 ; HSA-NEXT: v_mov_b32_e32 v29, 9
6086 ; HSA-NEXT: v_mov_b32_e32 v30, 10
6087 ; HSA-NEXT: v_writelane_b32 v40, s31, 1
6088 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5]
6089 ; HSA-NEXT: v_readlane_b32 s31, v40, 1
6090 ; HSA-NEXT: v_readlane_b32 s30, v40, 0
6091 ; HSA-NEXT: s_mov_b32 s32, s33
6092 ; HSA-NEXT: v_readlane_b32 s4, v40, 2
6093 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1
6094 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6095 ; HSA-NEXT: s_mov_b64 exec, s[6:7]
6096 ; HSA-NEXT: s_mov_b32 s33, s4
6097 ; HSA-NEXT: s_waitcnt vmcnt(0)
6098 ; HSA-NEXT: s_setpc_b64 s[30:31]
6100 call void @external_void_func_12xv3i32(
6101 <3 x i32><i32 0, i32 0, i32 0>,
6102 <3 x i32><i32 1, i32 1, i32 1>,
6103 <3 x i32><i32 2, i32 2, i32 2>,
6104 <3 x i32><i32 3, i32 3, i32 3>,
6105 <3 x i32><i32 4, i32 4, i32 4>,
6106 <3 x i32><i32 5, i32 5, i32 5>,
6107 <3 x i32><i32 6, i32 6, i32 6>,
6108 <3 x i32><i32 7, i32 7, i32 7>,
6109 <3 x i32><i32 8, i32 8, i32 8>,
6110 <3 x i32><i32 9, i32 9, i32 9>,
6111 <3 x i32><i32 10, i32 11, i32 12>,
6112 <3 x i32><i32 13, i32 14, i32 15>)
6116 define void @stack_12xv3f32() #0 {
6117 ; VI-LABEL: stack_12xv3f32:
6118 ; VI: ; %bb.0: ; %entry
6119 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6120 ; VI-NEXT: s_mov_b32 s4, s33
6121 ; VI-NEXT: s_mov_b32 s33, s32
6122 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1
6123 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6124 ; VI-NEXT: s_mov_b64 exec, s[8:9]
6125 ; VI-NEXT: s_addk_i32 s32, 0x400
6126 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000
6127 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6128 ; VI-NEXT: v_mov_b32_e32 v0, 0x41400000
6129 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6130 ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000
6131 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6132 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000
6133 ; VI-NEXT: v_writelane_b32 v40, s4, 2
6134 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6135 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000
6136 ; VI-NEXT: v_writelane_b32 v40, s30, 0
6137 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6138 ; VI-NEXT: s_getpc_b64 s[4:5]
6139 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6140 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6141 ; VI-NEXT: v_mov_b32_e32 v0, 0
6142 ; VI-NEXT: v_mov_b32_e32 v1, 0
6143 ; VI-NEXT: v_mov_b32_e32 v2, 0
6144 ; VI-NEXT: v_mov_b32_e32 v3, 1.0
6145 ; VI-NEXT: v_mov_b32_e32 v4, 1.0
6146 ; VI-NEXT: v_mov_b32_e32 v5, 1.0
6147 ; VI-NEXT: v_mov_b32_e32 v6, 2.0
6148 ; VI-NEXT: v_mov_b32_e32 v7, 2.0
6149 ; VI-NEXT: v_mov_b32_e32 v8, 2.0
6150 ; VI-NEXT: v_mov_b32_e32 v9, 0x40400000
6151 ; VI-NEXT: v_mov_b32_e32 v10, 0x40400000
6152 ; VI-NEXT: v_mov_b32_e32 v11, 0x40400000
6153 ; VI-NEXT: v_mov_b32_e32 v12, 4.0
6154 ; VI-NEXT: v_mov_b32_e32 v13, 4.0
6155 ; VI-NEXT: v_mov_b32_e32 v14, 4.0
6156 ; VI-NEXT: v_mov_b32_e32 v15, 0x40a00000
6157 ; VI-NEXT: v_mov_b32_e32 v16, 0x40a00000
6158 ; VI-NEXT: v_mov_b32_e32 v17, 0x40a00000
6159 ; VI-NEXT: v_mov_b32_e32 v18, 0x40c00000
6160 ; VI-NEXT: v_mov_b32_e32 v19, 0x40c00000
6161 ; VI-NEXT: v_mov_b32_e32 v20, 0x40c00000
6162 ; VI-NEXT: v_mov_b32_e32 v21, 0x40e00000
6163 ; VI-NEXT: v_mov_b32_e32 v22, 0x40e00000
6164 ; VI-NEXT: v_mov_b32_e32 v23, 0x40e00000
6165 ; VI-NEXT: v_mov_b32_e32 v24, 0x41000000
6166 ; VI-NEXT: v_mov_b32_e32 v25, 0x41000000
6167 ; VI-NEXT: v_mov_b32_e32 v26, 0x41000000
6168 ; VI-NEXT: v_mov_b32_e32 v27, 0x41100000
6169 ; VI-NEXT: v_mov_b32_e32 v28, 0x41100000
6170 ; VI-NEXT: v_mov_b32_e32 v29, 0x41100000
6171 ; VI-NEXT: v_mov_b32_e32 v30, 0x41200000
6172 ; VI-NEXT: v_writelane_b32 v40, s31, 1
6173 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6174 ; VI-NEXT: v_readlane_b32 s31, v40, 1
6175 ; VI-NEXT: v_readlane_b32 s30, v40, 0
6176 ; VI-NEXT: s_mov_b32 s32, s33
6177 ; VI-NEXT: v_readlane_b32 s4, v40, 2
6178 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1
6179 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6180 ; VI-NEXT: s_mov_b64 exec, s[6:7]
6181 ; VI-NEXT: s_mov_b32 s33, s4
6182 ; VI-NEXT: s_waitcnt vmcnt(0)
6183 ; VI-NEXT: s_setpc_b64 s[30:31]
6185 ; CI-LABEL: stack_12xv3f32:
6186 ; CI: ; %bb.0: ; %entry
6187 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6188 ; CI-NEXT: s_mov_b32 s4, s33
6189 ; CI-NEXT: s_mov_b32 s33, s32
6190 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1
6191 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6192 ; CI-NEXT: s_mov_b64 exec, s[8:9]
6193 ; CI-NEXT: s_addk_i32 s32, 0x400
6194 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000
6195 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6196 ; CI-NEXT: v_mov_b32_e32 v0, 0x41400000
6197 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6198 ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000
6199 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6200 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000
6201 ; CI-NEXT: v_writelane_b32 v40, s4, 2
6202 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6203 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000
6204 ; CI-NEXT: v_writelane_b32 v40, s30, 0
6205 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6206 ; CI-NEXT: s_getpc_b64 s[4:5]
6207 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6208 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6209 ; CI-NEXT: v_mov_b32_e32 v0, 0
6210 ; CI-NEXT: v_mov_b32_e32 v1, 0
6211 ; CI-NEXT: v_mov_b32_e32 v2, 0
6212 ; CI-NEXT: v_mov_b32_e32 v3, 1.0
6213 ; CI-NEXT: v_mov_b32_e32 v4, 1.0
6214 ; CI-NEXT: v_mov_b32_e32 v5, 1.0
6215 ; CI-NEXT: v_mov_b32_e32 v6, 2.0
6216 ; CI-NEXT: v_mov_b32_e32 v7, 2.0
6217 ; CI-NEXT: v_mov_b32_e32 v8, 2.0
6218 ; CI-NEXT: v_mov_b32_e32 v9, 0x40400000
6219 ; CI-NEXT: v_mov_b32_e32 v10, 0x40400000
6220 ; CI-NEXT: v_mov_b32_e32 v11, 0x40400000
6221 ; CI-NEXT: v_mov_b32_e32 v12, 4.0
6222 ; CI-NEXT: v_mov_b32_e32 v13, 4.0
6223 ; CI-NEXT: v_mov_b32_e32 v14, 4.0
6224 ; CI-NEXT: v_mov_b32_e32 v15, 0x40a00000
6225 ; CI-NEXT: v_mov_b32_e32 v16, 0x40a00000
6226 ; CI-NEXT: v_mov_b32_e32 v17, 0x40a00000
6227 ; CI-NEXT: v_mov_b32_e32 v18, 0x40c00000
6228 ; CI-NEXT: v_mov_b32_e32 v19, 0x40c00000
6229 ; CI-NEXT: v_mov_b32_e32 v20, 0x40c00000
6230 ; CI-NEXT: v_mov_b32_e32 v21, 0x40e00000
6231 ; CI-NEXT: v_mov_b32_e32 v22, 0x40e00000
6232 ; CI-NEXT: v_mov_b32_e32 v23, 0x40e00000
6233 ; CI-NEXT: v_mov_b32_e32 v24, 0x41000000
6234 ; CI-NEXT: v_mov_b32_e32 v25, 0x41000000
6235 ; CI-NEXT: v_mov_b32_e32 v26, 0x41000000
6236 ; CI-NEXT: v_mov_b32_e32 v27, 0x41100000
6237 ; CI-NEXT: v_mov_b32_e32 v28, 0x41100000
6238 ; CI-NEXT: v_mov_b32_e32 v29, 0x41100000
6239 ; CI-NEXT: v_mov_b32_e32 v30, 0x41200000
6240 ; CI-NEXT: v_writelane_b32 v40, s31, 1
6241 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6242 ; CI-NEXT: v_readlane_b32 s31, v40, 1
6243 ; CI-NEXT: v_readlane_b32 s30, v40, 0
6244 ; CI-NEXT: s_mov_b32 s32, s33
6245 ; CI-NEXT: v_readlane_b32 s4, v40, 2
6246 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
6247 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6248 ; CI-NEXT: s_mov_b64 exec, s[6:7]
6249 ; CI-NEXT: s_mov_b32 s33, s4
6250 ; CI-NEXT: s_waitcnt vmcnt(0)
6251 ; CI-NEXT: s_setpc_b64 s[30:31]
6253 ; GFX9-LABEL: stack_12xv3f32:
6254 ; GFX9: ; %bb.0: ; %entry
6255 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6256 ; GFX9-NEXT: s_mov_b32 s4, s33
6257 ; GFX9-NEXT: s_mov_b32 s33, s32
6258 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
6259 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6260 ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
6261 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6262 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000
6263 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
6264 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000
6265 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6266 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000
6267 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6268 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000
6269 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2
6270 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6271 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000
6272 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6273 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6274 ; GFX9-NEXT: s_getpc_b64 s[4:5]
6275 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6276 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6277 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
6278 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
6279 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
6280 ; GFX9-NEXT: v_mov_b32_e32 v3, 1.0
6281 ; GFX9-NEXT: v_mov_b32_e32 v4, 1.0
6282 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0
6283 ; GFX9-NEXT: v_mov_b32_e32 v6, 2.0
6284 ; GFX9-NEXT: v_mov_b32_e32 v7, 2.0
6285 ; GFX9-NEXT: v_mov_b32_e32 v8, 2.0
6286 ; GFX9-NEXT: v_mov_b32_e32 v9, 0x40400000
6287 ; GFX9-NEXT: v_mov_b32_e32 v10, 0x40400000
6288 ; GFX9-NEXT: v_mov_b32_e32 v11, 0x40400000
6289 ; GFX9-NEXT: v_mov_b32_e32 v12, 4.0
6290 ; GFX9-NEXT: v_mov_b32_e32 v13, 4.0
6291 ; GFX9-NEXT: v_mov_b32_e32 v14, 4.0
6292 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40a00000
6293 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40a00000
6294 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40a00000
6295 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40c00000
6296 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40c00000
6297 ; GFX9-NEXT: v_mov_b32_e32 v20, 0x40c00000
6298 ; GFX9-NEXT: v_mov_b32_e32 v21, 0x40e00000
6299 ; GFX9-NEXT: v_mov_b32_e32 v22, 0x40e00000
6300 ; GFX9-NEXT: v_mov_b32_e32 v23, 0x40e00000
6301 ; GFX9-NEXT: v_mov_b32_e32 v24, 0x41000000
6302 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x41000000
6303 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x41000000
6304 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x41100000
6305 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x41100000
6306 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x41100000
6307 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x41200000
6308 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6309 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
6310 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6311 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6312 ; GFX9-NEXT: s_mov_b32 s32, s33
6313 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
6314 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6315 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6316 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6317 ; GFX9-NEXT: s_mov_b32 s33, s4
6318 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6319 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6321 ; GFX11-LABEL: stack_12xv3f32:
6322 ; GFX11: ; %bb.0: ; %entry
6323 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6324 ; GFX11-NEXT: s_mov_b32 s0, s33
6325 ; GFX11-NEXT: s_mov_b32 s33, s32
6326 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6327 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6328 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6329 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6330 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41300000
6331 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41400000
6332 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41500000
6333 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41600000
6334 ; GFX11-NEXT: v_dual_mov_b32 v4, 0x41700000 :: v_dual_mov_b32 v5, 1.0
6335 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6336 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6337 ; GFX11-NEXT: s_add_i32 s0, s32, 16
6338 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
6339 ; GFX11-NEXT: scratch_store_b32 off, v4, s0
6340 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
6341 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1.0
6342 ; GFX11-NEXT: v_dual_mov_b32 v4, 1.0 :: v_dual_mov_b32 v7, 2.0
6343 ; GFX11-NEXT: v_dual_mov_b32 v6, 2.0 :: v_dual_mov_b32 v9, 0x40400000
6344 ; GFX11-NEXT: v_dual_mov_b32 v8, 2.0 :: v_dual_mov_b32 v11, 0x40400000
6345 ; GFX11-NEXT: v_dual_mov_b32 v10, 0x40400000 :: v_dual_mov_b32 v13, 4.0
6346 ; GFX11-NEXT: v_dual_mov_b32 v12, 4.0 :: v_dual_mov_b32 v15, 0x40a00000
6347 ; GFX11-NEXT: v_dual_mov_b32 v14, 4.0 :: v_dual_mov_b32 v17, 0x40a00000
6348 ; GFX11-NEXT: v_mov_b32_e32 v16, 0x40a00000
6349 ; GFX11-NEXT: v_dual_mov_b32 v18, 0x40c00000 :: v_dual_mov_b32 v19, 0x40c00000
6350 ; GFX11-NEXT: v_mov_b32_e32 v20, 0x40c00000
6351 ; GFX11-NEXT: v_dual_mov_b32 v21, 0x40e00000 :: v_dual_mov_b32 v22, 0x40e00000
6352 ; GFX11-NEXT: v_mov_b32_e32 v23, 0x40e00000
6353 ; GFX11-NEXT: v_dual_mov_b32 v24, 0x41000000 :: v_dual_mov_b32 v25, 0x41000000
6354 ; GFX11-NEXT: v_mov_b32_e32 v26, 0x41000000
6355 ; GFX11-NEXT: v_dual_mov_b32 v27, 0x41100000 :: v_dual_mov_b32 v28, 0x41100000
6356 ; GFX11-NEXT: v_mov_b32_e32 v29, 0x41100000
6357 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x41200000
6358 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6359 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3f32@rel32@lo+4
6360 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3f32@rel32@hi+12
6361 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6362 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6363 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6364 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6365 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6366 ; GFX11-NEXT: s_mov_b32 s32, s33
6367 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6368 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6369 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6370 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6371 ; GFX11-NEXT: s_mov_b32 s33, s0
6372 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6373 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6375 ; HSA-LABEL: stack_12xv3f32:
6376 ; HSA: ; %bb.0: ; %entry
6377 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6378 ; HSA-NEXT: s_mov_b32 s4, s33
6379 ; HSA-NEXT: s_mov_b32 s33, s32
6380 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1
6381 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6382 ; HSA-NEXT: s_mov_b64 exec, s[8:9]
6383 ; HSA-NEXT: s_addk_i32 s32, 0x400
6384 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000
6385 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
6386 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000
6387 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6388 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000
6389 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6390 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000
6391 ; HSA-NEXT: v_writelane_b32 v40, s4, 2
6392 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6393 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000
6394 ; HSA-NEXT: v_writelane_b32 v40, s30, 0
6395 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6396 ; HSA-NEXT: s_getpc_b64 s[4:5]
6397 ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6398 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6399 ; HSA-NEXT: v_mov_b32_e32 v0, 0
6400 ; HSA-NEXT: v_mov_b32_e32 v1, 0
6401 ; HSA-NEXT: v_mov_b32_e32 v2, 0
6402 ; HSA-NEXT: v_mov_b32_e32 v3, 1.0
6403 ; HSA-NEXT: v_mov_b32_e32 v4, 1.0
6404 ; HSA-NEXT: v_mov_b32_e32 v5, 1.0
6405 ; HSA-NEXT: v_mov_b32_e32 v6, 2.0
6406 ; HSA-NEXT: v_mov_b32_e32 v7, 2.0
6407 ; HSA-NEXT: v_mov_b32_e32 v8, 2.0
6408 ; HSA-NEXT: v_mov_b32_e32 v9, 0x40400000
6409 ; HSA-NEXT: v_mov_b32_e32 v10, 0x40400000
6410 ; HSA-NEXT: v_mov_b32_e32 v11, 0x40400000
6411 ; HSA-NEXT: v_mov_b32_e32 v12, 4.0
6412 ; HSA-NEXT: v_mov_b32_e32 v13, 4.0
6413 ; HSA-NEXT: v_mov_b32_e32 v14, 4.0
6414 ; HSA-NEXT: v_mov_b32_e32 v15, 0x40a00000
6415 ; HSA-NEXT: v_mov_b32_e32 v16, 0x40a00000
6416 ; HSA-NEXT: v_mov_b32_e32 v17, 0x40a00000
6417 ; HSA-NEXT: v_mov_b32_e32 v18, 0x40c00000
6418 ; HSA-NEXT: v_mov_b32_e32 v19, 0x40c00000
6419 ; HSA-NEXT: v_mov_b32_e32 v20, 0x40c00000
6420 ; HSA-NEXT: v_mov_b32_e32 v21, 0x40e00000
6421 ; HSA-NEXT: v_mov_b32_e32 v22, 0x40e00000
6422 ; HSA-NEXT: v_mov_b32_e32 v23, 0x40e00000
6423 ; HSA-NEXT: v_mov_b32_e32 v24, 0x41000000
6424 ; HSA-NEXT: v_mov_b32_e32 v25, 0x41000000
6425 ; HSA-NEXT: v_mov_b32_e32 v26, 0x41000000
6426 ; HSA-NEXT: v_mov_b32_e32 v27, 0x41100000
6427 ; HSA-NEXT: v_mov_b32_e32 v28, 0x41100000
6428 ; HSA-NEXT: v_mov_b32_e32 v29, 0x41100000
6429 ; HSA-NEXT: v_mov_b32_e32 v30, 0x41200000
6430 ; HSA-NEXT: v_writelane_b32 v40, s31, 1
6431 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5]
6432 ; HSA-NEXT: v_readlane_b32 s31, v40, 1
6433 ; HSA-NEXT: v_readlane_b32 s30, v40, 0
6434 ; HSA-NEXT: s_mov_b32 s32, s33
6435 ; HSA-NEXT: v_readlane_b32 s4, v40, 2
6436 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1
6437 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6438 ; HSA-NEXT: s_mov_b64 exec, s[6:7]
6439 ; HSA-NEXT: s_mov_b32 s33, s4
6440 ; HSA-NEXT: s_waitcnt vmcnt(0)
6441 ; HSA-NEXT: s_setpc_b64 s[30:31]
6443 call void @external_void_func_12xv3f32(
6444 <3 x float><float 0.0, float 0.0, float 0.0>,
6445 <3 x float><float 1.0, float 1.0, float 1.0>,
6446 <3 x float><float 2.0, float 2.0, float 2.0>,
6447 <3 x float><float 3.0, float 3.0, float 3.0>,
6448 <3 x float><float 4.0, float 4.0, float 4.0>,
6449 <3 x float><float 5.0, float 5.0, float 5.0>,
6450 <3 x float><float 6.0, float 6.0, float 6.0>,
6451 <3 x float><float 7.0, float 7.0, float 7.0>,
6452 <3 x float><float 8.0, float 8.0, float 8.0>,
6453 <3 x float><float 9.0, float 9.0, float 9.0>,
6454 <3 x float><float 10.0, float 11.0, float 12.0>,
6455 <3 x float><float 13.0, float 14.0, float 15.0>)
6459 define void @stack_8xv5i32() #0 {
6460 ; VI-LABEL: stack_8xv5i32:
6461 ; VI: ; %bb.0: ; %entry
6462 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6463 ; VI-NEXT: s_mov_b32 s4, s33
6464 ; VI-NEXT: s_mov_b32 s33, s32
6465 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1
6466 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6467 ; VI-NEXT: s_mov_b64 exec, s[8:9]
6468 ; VI-NEXT: s_addk_i32 s32, 0x400
6469 ; VI-NEXT: v_mov_b32_e32 v0, 7
6470 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6471 ; VI-NEXT: v_mov_b32_e32 v0, 8
6472 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6473 ; VI-NEXT: v_mov_b32_e32 v0, 9
6474 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6475 ; VI-NEXT: v_mov_b32_e32 v0, 10
6476 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6477 ; VI-NEXT: v_mov_b32_e32 v0, 11
6478 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6479 ; VI-NEXT: v_mov_b32_e32 v0, 12
6480 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6481 ; VI-NEXT: v_mov_b32_e32 v0, 13
6482 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6483 ; VI-NEXT: v_mov_b32_e32 v0, 14
6484 ; VI-NEXT: v_writelane_b32 v40, s4, 2
6485 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6486 ; VI-NEXT: v_mov_b32_e32 v0, 15
6487 ; VI-NEXT: v_writelane_b32 v40, s30, 0
6488 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6489 ; VI-NEXT: s_getpc_b64 s[4:5]
6490 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6491 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6492 ; VI-NEXT: v_mov_b32_e32 v0, 0
6493 ; VI-NEXT: v_mov_b32_e32 v1, 0
6494 ; VI-NEXT: v_mov_b32_e32 v2, 0
6495 ; VI-NEXT: v_mov_b32_e32 v3, 0
6496 ; VI-NEXT: v_mov_b32_e32 v4, 0
6497 ; VI-NEXT: v_mov_b32_e32 v5, 1
6498 ; VI-NEXT: v_mov_b32_e32 v6, 1
6499 ; VI-NEXT: v_mov_b32_e32 v7, 1
6500 ; VI-NEXT: v_mov_b32_e32 v8, 1
6501 ; VI-NEXT: v_mov_b32_e32 v9, 1
6502 ; VI-NEXT: v_mov_b32_e32 v10, 2
6503 ; VI-NEXT: v_mov_b32_e32 v11, 2
6504 ; VI-NEXT: v_mov_b32_e32 v12, 2
6505 ; VI-NEXT: v_mov_b32_e32 v13, 2
6506 ; VI-NEXT: v_mov_b32_e32 v14, 2
6507 ; VI-NEXT: v_mov_b32_e32 v15, 3
6508 ; VI-NEXT: v_mov_b32_e32 v16, 3
6509 ; VI-NEXT: v_mov_b32_e32 v17, 3
6510 ; VI-NEXT: v_mov_b32_e32 v18, 3
6511 ; VI-NEXT: v_mov_b32_e32 v19, 3
6512 ; VI-NEXT: v_mov_b32_e32 v20, 4
6513 ; VI-NEXT: v_mov_b32_e32 v21, 4
6514 ; VI-NEXT: v_mov_b32_e32 v22, 4
6515 ; VI-NEXT: v_mov_b32_e32 v23, 4
6516 ; VI-NEXT: v_mov_b32_e32 v24, 4
6517 ; VI-NEXT: v_mov_b32_e32 v25, 5
6518 ; VI-NEXT: v_mov_b32_e32 v26, 5
6519 ; VI-NEXT: v_mov_b32_e32 v27, 5
6520 ; VI-NEXT: v_mov_b32_e32 v28, 5
6521 ; VI-NEXT: v_mov_b32_e32 v29, 5
6522 ; VI-NEXT: v_mov_b32_e32 v30, 6
6523 ; VI-NEXT: v_writelane_b32 v40, s31, 1
6524 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6525 ; VI-NEXT: v_readlane_b32 s31, v40, 1
6526 ; VI-NEXT: v_readlane_b32 s30, v40, 0
6527 ; VI-NEXT: s_mov_b32 s32, s33
6528 ; VI-NEXT: v_readlane_b32 s4, v40, 2
6529 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1
6530 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6531 ; VI-NEXT: s_mov_b64 exec, s[6:7]
6532 ; VI-NEXT: s_mov_b32 s33, s4
6533 ; VI-NEXT: s_waitcnt vmcnt(0)
6534 ; VI-NEXT: s_setpc_b64 s[30:31]
6536 ; CI-LABEL: stack_8xv5i32:
6537 ; CI: ; %bb.0: ; %entry
6538 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6539 ; CI-NEXT: s_mov_b32 s4, s33
6540 ; CI-NEXT: s_mov_b32 s33, s32
6541 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1
6542 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6543 ; CI-NEXT: s_mov_b64 exec, s[8:9]
6544 ; CI-NEXT: s_addk_i32 s32, 0x400
6545 ; CI-NEXT: v_mov_b32_e32 v0, 7
6546 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6547 ; CI-NEXT: v_mov_b32_e32 v0, 8
6548 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6549 ; CI-NEXT: v_mov_b32_e32 v0, 9
6550 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6551 ; CI-NEXT: v_mov_b32_e32 v0, 10
6552 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6553 ; CI-NEXT: v_mov_b32_e32 v0, 11
6554 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6555 ; CI-NEXT: v_mov_b32_e32 v0, 12
6556 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6557 ; CI-NEXT: v_mov_b32_e32 v0, 13
6558 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6559 ; CI-NEXT: v_mov_b32_e32 v0, 14
6560 ; CI-NEXT: v_writelane_b32 v40, s4, 2
6561 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6562 ; CI-NEXT: v_mov_b32_e32 v0, 15
6563 ; CI-NEXT: v_writelane_b32 v40, s30, 0
6564 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6565 ; CI-NEXT: s_getpc_b64 s[4:5]
6566 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6567 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6568 ; CI-NEXT: v_mov_b32_e32 v0, 0
6569 ; CI-NEXT: v_mov_b32_e32 v1, 0
6570 ; CI-NEXT: v_mov_b32_e32 v2, 0
6571 ; CI-NEXT: v_mov_b32_e32 v3, 0
6572 ; CI-NEXT: v_mov_b32_e32 v4, 0
6573 ; CI-NEXT: v_mov_b32_e32 v5, 1
6574 ; CI-NEXT: v_mov_b32_e32 v6, 1
6575 ; CI-NEXT: v_mov_b32_e32 v7, 1
6576 ; CI-NEXT: v_mov_b32_e32 v8, 1
6577 ; CI-NEXT: v_mov_b32_e32 v9, 1
6578 ; CI-NEXT: v_mov_b32_e32 v10, 2
6579 ; CI-NEXT: v_mov_b32_e32 v11, 2
6580 ; CI-NEXT: v_mov_b32_e32 v12, 2
6581 ; CI-NEXT: v_mov_b32_e32 v13, 2
6582 ; CI-NEXT: v_mov_b32_e32 v14, 2
6583 ; CI-NEXT: v_mov_b32_e32 v15, 3
6584 ; CI-NEXT: v_mov_b32_e32 v16, 3
6585 ; CI-NEXT: v_mov_b32_e32 v17, 3
6586 ; CI-NEXT: v_mov_b32_e32 v18, 3
6587 ; CI-NEXT: v_mov_b32_e32 v19, 3
6588 ; CI-NEXT: v_mov_b32_e32 v20, 4
6589 ; CI-NEXT: v_mov_b32_e32 v21, 4
6590 ; CI-NEXT: v_mov_b32_e32 v22, 4
6591 ; CI-NEXT: v_mov_b32_e32 v23, 4
6592 ; CI-NEXT: v_mov_b32_e32 v24, 4
6593 ; CI-NEXT: v_mov_b32_e32 v25, 5
6594 ; CI-NEXT: v_mov_b32_e32 v26, 5
6595 ; CI-NEXT: v_mov_b32_e32 v27, 5
6596 ; CI-NEXT: v_mov_b32_e32 v28, 5
6597 ; CI-NEXT: v_mov_b32_e32 v29, 5
6598 ; CI-NEXT: v_mov_b32_e32 v30, 6
6599 ; CI-NEXT: v_writelane_b32 v40, s31, 1
6600 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6601 ; CI-NEXT: v_readlane_b32 s31, v40, 1
6602 ; CI-NEXT: v_readlane_b32 s30, v40, 0
6603 ; CI-NEXT: s_mov_b32 s32, s33
6604 ; CI-NEXT: v_readlane_b32 s4, v40, 2
6605 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
6606 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6607 ; CI-NEXT: s_mov_b64 exec, s[6:7]
6608 ; CI-NEXT: s_mov_b32 s33, s4
6609 ; CI-NEXT: s_waitcnt vmcnt(0)
6610 ; CI-NEXT: s_setpc_b64 s[30:31]
6612 ; GFX9-LABEL: stack_8xv5i32:
6613 ; GFX9: ; %bb.0: ; %entry
6614 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6615 ; GFX9-NEXT: s_mov_b32 s4, s33
6616 ; GFX9-NEXT: s_mov_b32 s33, s32
6617 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
6618 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6619 ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
6620 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6621 ; GFX9-NEXT: v_mov_b32_e32 v0, 7
6622 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
6623 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
6624 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6625 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
6626 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6627 ; GFX9-NEXT: v_mov_b32_e32 v0, 10
6628 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6629 ; GFX9-NEXT: v_mov_b32_e32 v0, 11
6630 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6631 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
6632 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6633 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
6634 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6635 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
6636 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2
6637 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6638 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
6639 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6640 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6641 ; GFX9-NEXT: s_getpc_b64 s[4:5]
6642 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6643 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6644 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
6645 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
6646 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
6647 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
6648 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
6649 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
6650 ; GFX9-NEXT: v_mov_b32_e32 v6, 1
6651 ; GFX9-NEXT: v_mov_b32_e32 v7, 1
6652 ; GFX9-NEXT: v_mov_b32_e32 v8, 1
6653 ; GFX9-NEXT: v_mov_b32_e32 v9, 1
6654 ; GFX9-NEXT: v_mov_b32_e32 v10, 2
6655 ; GFX9-NEXT: v_mov_b32_e32 v11, 2
6656 ; GFX9-NEXT: v_mov_b32_e32 v12, 2
6657 ; GFX9-NEXT: v_mov_b32_e32 v13, 2
6658 ; GFX9-NEXT: v_mov_b32_e32 v14, 2
6659 ; GFX9-NEXT: v_mov_b32_e32 v15, 3
6660 ; GFX9-NEXT: v_mov_b32_e32 v16, 3
6661 ; GFX9-NEXT: v_mov_b32_e32 v17, 3
6662 ; GFX9-NEXT: v_mov_b32_e32 v18, 3
6663 ; GFX9-NEXT: v_mov_b32_e32 v19, 3
6664 ; GFX9-NEXT: v_mov_b32_e32 v20, 4
6665 ; GFX9-NEXT: v_mov_b32_e32 v21, 4
6666 ; GFX9-NEXT: v_mov_b32_e32 v22, 4
6667 ; GFX9-NEXT: v_mov_b32_e32 v23, 4
6668 ; GFX9-NEXT: v_mov_b32_e32 v24, 4
6669 ; GFX9-NEXT: v_mov_b32_e32 v25, 5
6670 ; GFX9-NEXT: v_mov_b32_e32 v26, 5
6671 ; GFX9-NEXT: v_mov_b32_e32 v27, 5
6672 ; GFX9-NEXT: v_mov_b32_e32 v28, 5
6673 ; GFX9-NEXT: v_mov_b32_e32 v29, 5
6674 ; GFX9-NEXT: v_mov_b32_e32 v30, 6
6675 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6676 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
6677 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6678 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6679 ; GFX9-NEXT: s_mov_b32 s32, s33
6680 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
6681 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6682 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6683 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6684 ; GFX9-NEXT: s_mov_b32 s33, s4
6685 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6686 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6688 ; GFX11-LABEL: stack_8xv5i32:
6689 ; GFX11: ; %bb.0: ; %entry
6690 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6691 ; GFX11-NEXT: s_mov_b32 s0, s33
6692 ; GFX11-NEXT: s_mov_b32 s33, s32
6693 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6694 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6695 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6696 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6697 ; GFX11-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, 8
6698 ; GFX11-NEXT: v_dual_mov_b32 v2, 9 :: v_dual_mov_b32 v3, 10
6699 ; GFX11-NEXT: v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v5, 12
6700 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6701 ; GFX11-NEXT: v_dual_mov_b32 v4, 11 :: v_dual_mov_b32 v7, 14
6702 ; GFX11-NEXT: v_mov_b32_e32 v6, 13
6703 ; GFX11-NEXT: s_add_i32 s0, s32, 32
6704 ; GFX11-NEXT: s_add_i32 s1, s32, 16
6705 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6706 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
6707 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
6708 ; GFX11-NEXT: scratch_store_b32 off, v8, s0
6709 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1
6710 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, 0
6711 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 1
6712 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 1
6713 ; GFX11-NEXT: v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v9, 1
6714 ; GFX11-NEXT: v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v11, 2
6715 ; GFX11-NEXT: v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v13, 2
6716 ; GFX11-NEXT: v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v15, 3
6717 ; GFX11-NEXT: v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v17, 3
6718 ; GFX11-NEXT: v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v19, 3
6719 ; GFX11-NEXT: v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v21, 4
6720 ; GFX11-NEXT: v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v23, 4
6721 ; GFX11-NEXT: v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v25, 5
6722 ; GFX11-NEXT: v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v27, 5
6723 ; GFX11-NEXT: v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v29, 5
6724 ; GFX11-NEXT: v_mov_b32_e32 v28, 5
6725 ; GFX11-NEXT: v_mov_b32_e32 v30, 6
6726 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6727 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
6728 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
6729 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6730 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6731 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6732 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6733 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6734 ; GFX11-NEXT: s_mov_b32 s32, s33
6735 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6736 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6737 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6738 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6739 ; GFX11-NEXT: s_mov_b32 s33, s0
6740 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6741 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6743 ; HSA-LABEL: stack_8xv5i32:
6744 ; HSA: ; %bb.0: ; %entry
6745 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6746 ; HSA-NEXT: s_mov_b32 s4, s33
6747 ; HSA-NEXT: s_mov_b32 s33, s32
6748 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1
6749 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6750 ; HSA-NEXT: s_mov_b64 exec, s[8:9]
6751 ; HSA-NEXT: s_addk_i32 s32, 0x400
6752 ; HSA-NEXT: v_mov_b32_e32 v0, 7
6753 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
6754 ; HSA-NEXT: v_mov_b32_e32 v0, 8
6755 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6756 ; HSA-NEXT: v_mov_b32_e32 v0, 9
6757 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6758 ; HSA-NEXT: v_mov_b32_e32 v0, 10
6759 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6760 ; HSA-NEXT: v_mov_b32_e32 v0, 11
6761 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6762 ; HSA-NEXT: v_mov_b32_e32 v0, 12
6763 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6764 ; HSA-NEXT: v_mov_b32_e32 v0, 13
6765 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6766 ; HSA-NEXT: v_mov_b32_e32 v0, 14
6767 ; HSA-NEXT: v_writelane_b32 v40, s4, 2
6768 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6769 ; HSA-NEXT: v_mov_b32_e32 v0, 15
6770 ; HSA-NEXT: v_writelane_b32 v40, s30, 0
6771 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6772 ; HSA-NEXT: s_getpc_b64 s[4:5]
6773 ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6774 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6775 ; HSA-NEXT: v_mov_b32_e32 v0, 0
6776 ; HSA-NEXT: v_mov_b32_e32 v1, 0
6777 ; HSA-NEXT: v_mov_b32_e32 v2, 0
6778 ; HSA-NEXT: v_mov_b32_e32 v3, 0
6779 ; HSA-NEXT: v_mov_b32_e32 v4, 0
6780 ; HSA-NEXT: v_mov_b32_e32 v5, 1
6781 ; HSA-NEXT: v_mov_b32_e32 v6, 1
6782 ; HSA-NEXT: v_mov_b32_e32 v7, 1
6783 ; HSA-NEXT: v_mov_b32_e32 v8, 1
6784 ; HSA-NEXT: v_mov_b32_e32 v9, 1
6785 ; HSA-NEXT: v_mov_b32_e32 v10, 2
6786 ; HSA-NEXT: v_mov_b32_e32 v11, 2
6787 ; HSA-NEXT: v_mov_b32_e32 v12, 2
6788 ; HSA-NEXT: v_mov_b32_e32 v13, 2
6789 ; HSA-NEXT: v_mov_b32_e32 v14, 2
6790 ; HSA-NEXT: v_mov_b32_e32 v15, 3
6791 ; HSA-NEXT: v_mov_b32_e32 v16, 3
6792 ; HSA-NEXT: v_mov_b32_e32 v17, 3
6793 ; HSA-NEXT: v_mov_b32_e32 v18, 3
6794 ; HSA-NEXT: v_mov_b32_e32 v19, 3
6795 ; HSA-NEXT: v_mov_b32_e32 v20, 4
6796 ; HSA-NEXT: v_mov_b32_e32 v21, 4
6797 ; HSA-NEXT: v_mov_b32_e32 v22, 4
6798 ; HSA-NEXT: v_mov_b32_e32 v23, 4
6799 ; HSA-NEXT: v_mov_b32_e32 v24, 4
6800 ; HSA-NEXT: v_mov_b32_e32 v25, 5
6801 ; HSA-NEXT: v_mov_b32_e32 v26, 5
6802 ; HSA-NEXT: v_mov_b32_e32 v27, 5
6803 ; HSA-NEXT: v_mov_b32_e32 v28, 5
6804 ; HSA-NEXT: v_mov_b32_e32 v29, 5
6805 ; HSA-NEXT: v_mov_b32_e32 v30, 6
6806 ; HSA-NEXT: v_writelane_b32 v40, s31, 1
6807 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5]
6808 ; HSA-NEXT: v_readlane_b32 s31, v40, 1
6809 ; HSA-NEXT: v_readlane_b32 s30, v40, 0
6810 ; HSA-NEXT: s_mov_b32 s32, s33
6811 ; HSA-NEXT: v_readlane_b32 s4, v40, 2
6812 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1
6813 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6814 ; HSA-NEXT: s_mov_b64 exec, s[6:7]
6815 ; HSA-NEXT: s_mov_b32 s33, s4
6816 ; HSA-NEXT: s_waitcnt vmcnt(0)
6817 ; HSA-NEXT: s_setpc_b64 s[30:31]
6819 call void @external_void_func_8xv5i32(
6820 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
6821 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
6822 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
6823 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
6824 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
6825 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
6826 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
6827 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
6831 define void @stack_8xv5f32() #0 {
6832 ; VI-LABEL: stack_8xv5f32:
6833 ; VI: ; %bb.0: ; %entry
6834 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6835 ; VI-NEXT: s_mov_b32 s4, s33
6836 ; VI-NEXT: s_mov_b32 s33, s32
6837 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1
6838 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6839 ; VI-NEXT: s_mov_b64 exec, s[8:9]
6840 ; VI-NEXT: s_addk_i32 s32, 0x400
6841 ; VI-NEXT: v_mov_b32_e32 v0, 0x40e00000
6842 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6843 ; VI-NEXT: v_mov_b32_e32 v0, 0x41000000
6844 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6845 ; VI-NEXT: v_mov_b32_e32 v0, 0x41100000
6846 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6847 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
6848 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6849 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000
6850 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6851 ; VI-NEXT: v_mov_b32_e32 v0, 0x41400000
6852 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6853 ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000
6854 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6855 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000
6856 ; VI-NEXT: v_writelane_b32 v40, s4, 2
6857 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6858 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000
6859 ; VI-NEXT: v_writelane_b32 v40, s30, 0
6860 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6861 ; VI-NEXT: s_getpc_b64 s[4:5]
6862 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
6863 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
6864 ; VI-NEXT: v_mov_b32_e32 v0, 0
6865 ; VI-NEXT: v_mov_b32_e32 v1, 0
6866 ; VI-NEXT: v_mov_b32_e32 v2, 0
6867 ; VI-NEXT: v_mov_b32_e32 v3, 0
6868 ; VI-NEXT: v_mov_b32_e32 v4, 0
6869 ; VI-NEXT: v_mov_b32_e32 v5, 1.0
6870 ; VI-NEXT: v_mov_b32_e32 v6, 1.0
6871 ; VI-NEXT: v_mov_b32_e32 v7, 1.0
6872 ; VI-NEXT: v_mov_b32_e32 v8, 1.0
6873 ; VI-NEXT: v_mov_b32_e32 v9, 1.0
6874 ; VI-NEXT: v_mov_b32_e32 v10, 2.0
6875 ; VI-NEXT: v_mov_b32_e32 v11, 2.0
6876 ; VI-NEXT: v_mov_b32_e32 v12, 2.0
6877 ; VI-NEXT: v_mov_b32_e32 v13, 2.0
6878 ; VI-NEXT: v_mov_b32_e32 v14, 2.0
6879 ; VI-NEXT: v_mov_b32_e32 v15, 0x40400000
6880 ; VI-NEXT: v_mov_b32_e32 v16, 0x40400000
6881 ; VI-NEXT: v_mov_b32_e32 v17, 0x40400000
6882 ; VI-NEXT: v_mov_b32_e32 v18, 0x40400000
6883 ; VI-NEXT: v_mov_b32_e32 v19, 0x40400000
6884 ; VI-NEXT: v_mov_b32_e32 v20, 4.0
6885 ; VI-NEXT: v_mov_b32_e32 v21, 4.0
6886 ; VI-NEXT: v_mov_b32_e32 v22, 4.0
6887 ; VI-NEXT: v_mov_b32_e32 v23, 4.0
6888 ; VI-NEXT: v_mov_b32_e32 v24, 4.0
6889 ; VI-NEXT: v_mov_b32_e32 v25, 0x40a00000
6890 ; VI-NEXT: v_mov_b32_e32 v26, 0x40a00000
6891 ; VI-NEXT: v_mov_b32_e32 v27, 0x40a00000
6892 ; VI-NEXT: v_mov_b32_e32 v28, 0x40a00000
6893 ; VI-NEXT: v_mov_b32_e32 v29, 0x40a00000
6894 ; VI-NEXT: v_mov_b32_e32 v30, 0x40c00000
6895 ; VI-NEXT: v_writelane_b32 v40, s31, 1
6896 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6897 ; VI-NEXT: v_readlane_b32 s31, v40, 1
6898 ; VI-NEXT: v_readlane_b32 s30, v40, 0
6899 ; VI-NEXT: s_mov_b32 s32, s33
6900 ; VI-NEXT: v_readlane_b32 s4, v40, 2
6901 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1
6902 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6903 ; VI-NEXT: s_mov_b64 exec, s[6:7]
6904 ; VI-NEXT: s_mov_b32 s33, s4
6905 ; VI-NEXT: s_waitcnt vmcnt(0)
6906 ; VI-NEXT: s_setpc_b64 s[30:31]
6908 ; CI-LABEL: stack_8xv5f32:
6909 ; CI: ; %bb.0: ; %entry
6910 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6911 ; CI-NEXT: s_mov_b32 s4, s33
6912 ; CI-NEXT: s_mov_b32 s33, s32
6913 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1
6914 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6915 ; CI-NEXT: s_mov_b64 exec, s[8:9]
6916 ; CI-NEXT: s_addk_i32 s32, 0x400
6917 ; CI-NEXT: v_mov_b32_e32 v0, 0x40e00000
6918 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6919 ; CI-NEXT: v_mov_b32_e32 v0, 0x41000000
6920 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6921 ; CI-NEXT: v_mov_b32_e32 v0, 0x41100000
6922 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6923 ; CI-NEXT: v_mov_b32_e32 v0, 0x41200000
6924 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6925 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000
6926 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6927 ; CI-NEXT: v_mov_b32_e32 v0, 0x41400000
6928 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6929 ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000
6930 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6931 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000
6932 ; CI-NEXT: v_writelane_b32 v40, s4, 2
6933 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6934 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000
6935 ; CI-NEXT: v_writelane_b32 v40, s30, 0
6936 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6937 ; CI-NEXT: s_getpc_b64 s[4:5]
6938 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
6939 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
6940 ; CI-NEXT: v_mov_b32_e32 v0, 0
6941 ; CI-NEXT: v_mov_b32_e32 v1, 0
6942 ; CI-NEXT: v_mov_b32_e32 v2, 0
6943 ; CI-NEXT: v_mov_b32_e32 v3, 0
6944 ; CI-NEXT: v_mov_b32_e32 v4, 0
6945 ; CI-NEXT: v_mov_b32_e32 v5, 1.0
6946 ; CI-NEXT: v_mov_b32_e32 v6, 1.0
6947 ; CI-NEXT: v_mov_b32_e32 v7, 1.0
6948 ; CI-NEXT: v_mov_b32_e32 v8, 1.0
6949 ; CI-NEXT: v_mov_b32_e32 v9, 1.0
6950 ; CI-NEXT: v_mov_b32_e32 v10, 2.0
6951 ; CI-NEXT: v_mov_b32_e32 v11, 2.0
6952 ; CI-NEXT: v_mov_b32_e32 v12, 2.0
6953 ; CI-NEXT: v_mov_b32_e32 v13, 2.0
6954 ; CI-NEXT: v_mov_b32_e32 v14, 2.0
6955 ; CI-NEXT: v_mov_b32_e32 v15, 0x40400000
6956 ; CI-NEXT: v_mov_b32_e32 v16, 0x40400000
6957 ; CI-NEXT: v_mov_b32_e32 v17, 0x40400000
6958 ; CI-NEXT: v_mov_b32_e32 v18, 0x40400000
6959 ; CI-NEXT: v_mov_b32_e32 v19, 0x40400000
6960 ; CI-NEXT: v_mov_b32_e32 v20, 4.0
6961 ; CI-NEXT: v_mov_b32_e32 v21, 4.0
6962 ; CI-NEXT: v_mov_b32_e32 v22, 4.0
6963 ; CI-NEXT: v_mov_b32_e32 v23, 4.0
6964 ; CI-NEXT: v_mov_b32_e32 v24, 4.0
6965 ; CI-NEXT: v_mov_b32_e32 v25, 0x40a00000
6966 ; CI-NEXT: v_mov_b32_e32 v26, 0x40a00000
6967 ; CI-NEXT: v_mov_b32_e32 v27, 0x40a00000
6968 ; CI-NEXT: v_mov_b32_e32 v28, 0x40a00000
6969 ; CI-NEXT: v_mov_b32_e32 v29, 0x40a00000
6970 ; CI-NEXT: v_mov_b32_e32 v30, 0x40c00000
6971 ; CI-NEXT: v_writelane_b32 v40, s31, 1
6972 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6973 ; CI-NEXT: v_readlane_b32 s31, v40, 1
6974 ; CI-NEXT: v_readlane_b32 s30, v40, 0
6975 ; CI-NEXT: s_mov_b32 s32, s33
6976 ; CI-NEXT: v_readlane_b32 s4, v40, 2
6977 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
6978 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6979 ; CI-NEXT: s_mov_b64 exec, s[6:7]
6980 ; CI-NEXT: s_mov_b32 s33, s4
6981 ; CI-NEXT: s_waitcnt vmcnt(0)
6982 ; CI-NEXT: s_setpc_b64 s[30:31]
6984 ; GFX9-LABEL: stack_8xv5f32:
6985 ; GFX9: ; %bb.0: ; %entry
6986 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6987 ; GFX9-NEXT: s_mov_b32 s4, s33
6988 ; GFX9-NEXT: s_mov_b32 s33, s32
6989 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
6990 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6991 ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
6992 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6993 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40e00000
6994 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
6995 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000
6996 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6997 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000
6998 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6999 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41200000
7000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
7001 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000
7002 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
7003 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000
7004 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
7005 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000
7006 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
7007 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000
7008 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2
7009 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
7010 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000
7011 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7012 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
7013 ; GFX9-NEXT: s_getpc_b64 s[4:5]
7014 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
7015 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
7016 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
7017 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
7018 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
7019 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
7020 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
7021 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0
7022 ; GFX9-NEXT: v_mov_b32_e32 v6, 1.0
7023 ; GFX9-NEXT: v_mov_b32_e32 v7, 1.0
7024 ; GFX9-NEXT: v_mov_b32_e32 v8, 1.0
7025 ; GFX9-NEXT: v_mov_b32_e32 v9, 1.0
7026 ; GFX9-NEXT: v_mov_b32_e32 v10, 2.0
7027 ; GFX9-NEXT: v_mov_b32_e32 v11, 2.0
7028 ; GFX9-NEXT: v_mov_b32_e32 v12, 2.0
7029 ; GFX9-NEXT: v_mov_b32_e32 v13, 2.0
7030 ; GFX9-NEXT: v_mov_b32_e32 v14, 2.0
7031 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40400000
7032 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40400000
7033 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40400000
7034 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40400000
7035 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40400000
7036 ; GFX9-NEXT: v_mov_b32_e32 v20, 4.0
7037 ; GFX9-NEXT: v_mov_b32_e32 v21, 4.0
7038 ; GFX9-NEXT: v_mov_b32_e32 v22, 4.0
7039 ; GFX9-NEXT: v_mov_b32_e32 v23, 4.0
7040 ; GFX9-NEXT: v_mov_b32_e32 v24, 4.0
7041 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x40a00000
7042 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x40a00000
7043 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x40a00000
7044 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000
7045 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000
7046 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000
7047 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7048 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
7049 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7050 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7051 ; GFX9-NEXT: s_mov_b32 s32, s33
7052 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
7053 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7054 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7055 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7056 ; GFX9-NEXT: s_mov_b32 s33, s4
7057 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7058 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7060 ; GFX11-LABEL: stack_8xv5f32:
7061 ; GFX11: ; %bb.0: ; %entry
7062 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7063 ; GFX11-NEXT: s_mov_b32 s0, s33
7064 ; GFX11-NEXT: s_mov_b32 s33, s32
7065 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7066 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7067 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7068 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7069 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40e00000
7070 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41000000
7071 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41100000
7072 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41200000
7073 ; GFX11-NEXT: v_mov_b32_e32 v8, 0x41700000
7074 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7075 ; GFX11-NEXT: v_mov_b32_e32 v4, 0x41300000
7076 ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41400000
7077 ; GFX11-NEXT: v_dual_mov_b32 v6, 0x41500000 :: v_dual_mov_b32 v9, 1.0
7078 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41600000
7079 ; GFX11-NEXT: s_add_i32 s0, s32, 32
7080 ; GFX11-NEXT: s_add_i32 s1, s32, 16
7081 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7082 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
7083 ; GFX11-NEXT: scratch_store_b32 off, v8, s0
7084 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1
7085 ; GFX11-NEXT: v_mov_b32_e32 v6, 1.0
7086 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
7087 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
7088 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0
7089 ; GFX11-NEXT: v_dual_mov_b32 v7, 1.0 :: v_dual_mov_b32 v8, 1.0
7090 ; GFX11-NEXT: v_dual_mov_b32 v11, 2.0 :: v_dual_mov_b32 v10, 2.0
7091 ; GFX11-NEXT: v_dual_mov_b32 v13, 2.0 :: v_dual_mov_b32 v12, 2.0
7092 ; GFX11-NEXT: v_dual_mov_b32 v15, 0x40400000 :: v_dual_mov_b32 v14, 2.0
7093 ; GFX11-NEXT: v_dual_mov_b32 v17, 0x40400000 :: v_dual_mov_b32 v16, 0x40400000
7094 ; GFX11-NEXT: v_dual_mov_b32 v19, 0x40400000 :: v_dual_mov_b32 v18, 0x40400000
7095 ; GFX11-NEXT: v_dual_mov_b32 v21, 4.0 :: v_dual_mov_b32 v20, 4.0
7096 ; GFX11-NEXT: v_dual_mov_b32 v23, 4.0 :: v_dual_mov_b32 v22, 4.0
7097 ; GFX11-NEXT: v_dual_mov_b32 v25, 0x40a00000 :: v_dual_mov_b32 v24, 4.0
7098 ; GFX11-NEXT: v_dual_mov_b32 v27, 0x40a00000 :: v_dual_mov_b32 v26, 0x40a00000
7099 ; GFX11-NEXT: v_dual_mov_b32 v29, 0x40a00000 :: v_dual_mov_b32 v28, 0x40a00000
7100 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x40c00000
7101 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7102 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
7103 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
7104 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7105 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7106 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7107 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7108 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7109 ; GFX11-NEXT: s_mov_b32 s32, s33
7110 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7111 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7112 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7113 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7114 ; GFX11-NEXT: s_mov_b32 s33, s0
7115 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7116 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7118 ; HSA-LABEL: stack_8xv5f32:
7119 ; HSA: ; %bb.0: ; %entry
7120 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7121 ; HSA-NEXT: s_mov_b32 s4, s33
7122 ; HSA-NEXT: s_mov_b32 s33, s32
7123 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1
7124 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7125 ; HSA-NEXT: s_mov_b64 exec, s[8:9]
7126 ; HSA-NEXT: s_addk_i32 s32, 0x400
7127 ; HSA-NEXT: v_mov_b32_e32 v0, 0x40e00000
7128 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
7129 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41000000
7130 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
7131 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41100000
7132 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
7133 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41200000
7134 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
7135 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000
7136 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
7137 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000
7138 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
7139 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000
7140 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
7141 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000
7142 ; HSA-NEXT: v_writelane_b32 v40, s4, 2
7143 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
7144 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000
7145 ; HSA-NEXT: v_writelane_b32 v40, s30, 0
7146 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
7147 ; HSA-NEXT: s_getpc_b64 s[4:5]
7148 ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
7149 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
7150 ; HSA-NEXT: v_mov_b32_e32 v0, 0
7151 ; HSA-NEXT: v_mov_b32_e32 v1, 0
7152 ; HSA-NEXT: v_mov_b32_e32 v2, 0
7153 ; HSA-NEXT: v_mov_b32_e32 v3, 0
7154 ; HSA-NEXT: v_mov_b32_e32 v4, 0
7155 ; HSA-NEXT: v_mov_b32_e32 v5, 1.0
7156 ; HSA-NEXT: v_mov_b32_e32 v6, 1.0
7157 ; HSA-NEXT: v_mov_b32_e32 v7, 1.0
7158 ; HSA-NEXT: v_mov_b32_e32 v8, 1.0
7159 ; HSA-NEXT: v_mov_b32_e32 v9, 1.0
7160 ; HSA-NEXT: v_mov_b32_e32 v10, 2.0
7161 ; HSA-NEXT: v_mov_b32_e32 v11, 2.0
7162 ; HSA-NEXT: v_mov_b32_e32 v12, 2.0
7163 ; HSA-NEXT: v_mov_b32_e32 v13, 2.0
7164 ; HSA-NEXT: v_mov_b32_e32 v14, 2.0
7165 ; HSA-NEXT: v_mov_b32_e32 v15, 0x40400000
7166 ; HSA-NEXT: v_mov_b32_e32 v16, 0x40400000
7167 ; HSA-NEXT: v_mov_b32_e32 v17, 0x40400000
7168 ; HSA-NEXT: v_mov_b32_e32 v18, 0x40400000
7169 ; HSA-NEXT: v_mov_b32_e32 v19, 0x40400000
7170 ; HSA-NEXT: v_mov_b32_e32 v20, 4.0
7171 ; HSA-NEXT: v_mov_b32_e32 v21, 4.0
7172 ; HSA-NEXT: v_mov_b32_e32 v22, 4.0
7173 ; HSA-NEXT: v_mov_b32_e32 v23, 4.0
7174 ; HSA-NEXT: v_mov_b32_e32 v24, 4.0
7175 ; HSA-NEXT: v_mov_b32_e32 v25, 0x40a00000
7176 ; HSA-NEXT: v_mov_b32_e32 v26, 0x40a00000
7177 ; HSA-NEXT: v_mov_b32_e32 v27, 0x40a00000
7178 ; HSA-NEXT: v_mov_b32_e32 v28, 0x40a00000
7179 ; HSA-NEXT: v_mov_b32_e32 v29, 0x40a00000
7180 ; HSA-NEXT: v_mov_b32_e32 v30, 0x40c00000
7181 ; HSA-NEXT: v_writelane_b32 v40, s31, 1
7182 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5]
7183 ; HSA-NEXT: v_readlane_b32 s31, v40, 1
7184 ; HSA-NEXT: v_readlane_b32 s30, v40, 0
7185 ; HSA-NEXT: s_mov_b32 s32, s33
7186 ; HSA-NEXT: v_readlane_b32 s4, v40, 2
7187 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1
7188 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7189 ; HSA-NEXT: s_mov_b64 exec, s[6:7]
7190 ; HSA-NEXT: s_mov_b32 s33, s4
7191 ; HSA-NEXT: s_waitcnt vmcnt(0)
7192 ; HSA-NEXT: s_setpc_b64 s[30:31]
7194 call void @external_void_func_8xv5f32(
7195 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
7196 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
7197 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
7198 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
7199 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
7200 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
7201 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
7202 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
7206 declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
7207 declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
7208 declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
7209 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
7210 declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
7211 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
7212 declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
7213 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
7214 declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
7215 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
7217 attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
7218 attributes #1 = { nounwind readnone }
7219 attributes #2 = { nounwind noinline }
7221 !llvm.module.flags = !{!0}
7222 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}