1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CI %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s
8 declare hidden void @external_void_func_i1(i1) #0
9 declare hidden void @external_void_func_i1_signext(i1 signext) #0
10 declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0
12 declare hidden void @external_void_func_i8(i8) #0
13 declare hidden void @external_void_func_i8_signext(i8 signext) #0
14 declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0
16 declare hidden void @external_void_func_i16(i16) #0
17 declare hidden void @external_void_func_i16_signext(i16 signext) #0
18 declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0
20 declare hidden void @external_void_func_i32(i32) #0
21 declare hidden void @external_void_func_i64(i64) #0
22 declare hidden void @external_void_func_v2i64(<2 x i64>) #0
23 declare hidden void @external_void_func_v3i64(<3 x i64>) #0
24 declare hidden void @external_void_func_v4i64(<4 x i64>) #0
26 declare hidden void @external_void_func_f16(half) #0
27 declare hidden void @external_void_func_f32(float) #0
28 declare hidden void @external_void_func_f64(double) #0
29 declare hidden void @external_void_func_v2f32(<2 x float>) #0
30 declare hidden void @external_void_func_v2f64(<2 x double>) #0
31 declare hidden void @external_void_func_v3f32(<3 x float>) #0
32 declare hidden void @external_void_func_v3f64(<3 x double>) #0
33 declare hidden void @external_void_func_v5f32(<5 x float>) #0
35 declare hidden void @external_void_func_v2i16(<2 x i16>) #0
36 declare hidden void @external_void_func_v2f16(<2 x half>) #0
37 declare hidden void @external_void_func_v3i16(<3 x i16>) #0
38 declare hidden void @external_void_func_v3f16(<3 x half>) #0
39 declare hidden void @external_void_func_v4i16(<4 x i16>) #0
40 declare hidden void @external_void_func_v4f16(<4 x half>) #0
42 declare hidden void @external_void_func_v2i32(<2 x i32>) #0
43 declare hidden void @external_void_func_v3i32(<3 x i32>) #0
44 declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
45 declare hidden void @external_void_func_v4i32(<4 x i32>) #0
46 declare hidden void @external_void_func_v5i32(<5 x i32>) #0
47 declare hidden void @external_void_func_v8i32(<8 x i32>) #0
48 declare hidden void @external_void_func_v16i32(<16 x i32>) #0
49 declare hidden void @external_void_func_v32i32(<32 x i32>) #0
50 declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
52 ; return value and argument
53 declare hidden i32 @external_i32_func_i32(i32) #0
56 declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
57 declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
58 declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
60 declare hidden void @external_void_func_v16i8(<16 x i8>) #0
62 ; FIXME: Should be passing -1
63 define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
64 ; VI-LABEL: test_call_external_void_func_i1_imm:
66 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
67 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
68 ; VI-NEXT: s_mov_b32 s38, -1
69 ; VI-NEXT: s_mov_b32 s39, 0xe80000
70 ; VI-NEXT: s_add_u32 s36, s36, s3
71 ; VI-NEXT: s_addc_u32 s37, s37, 0
72 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
73 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
74 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
75 ; VI-NEXT: v_mov_b32_e32 v0, 1
76 ; VI-NEXT: s_mov_b32 s32, 0
77 ; VI-NEXT: s_getpc_b64 s[4:5]
78 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
79 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
80 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
83 ; CI-LABEL: test_call_external_void_func_i1_imm:
85 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
86 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
87 ; CI-NEXT: s_mov_b32 s38, -1
88 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
89 ; CI-NEXT: s_add_u32 s36, s36, s3
90 ; CI-NEXT: s_addc_u32 s37, s37, 0
91 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
92 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
93 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
94 ; CI-NEXT: v_mov_b32_e32 v0, 1
95 ; CI-NEXT: s_mov_b32 s32, 0
96 ; CI-NEXT: s_getpc_b64 s[4:5]
97 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
98 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
99 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
102 ; GFX9-LABEL: test_call_external_void_func_i1_imm:
104 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
105 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
106 ; GFX9-NEXT: s_mov_b32 s38, -1
107 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
108 ; GFX9-NEXT: s_add_u32 s36, s36, s3
109 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
110 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
111 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
112 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
113 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
114 ; GFX9-NEXT: s_mov_b32 s32, 0
115 ; GFX9-NEXT: s_getpc_b64 s[4:5]
116 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
117 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
118 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
119 ; GFX9-NEXT: s_endpgm
121 ; GFX11-LABEL: test_call_external_void_func_i1_imm:
123 ; GFX11-NEXT: v_mov_b32_e32 v0, 1
124 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
125 ; GFX11-NEXT: s_mov_b32 s32, 0
126 ; GFX11-NEXT: s_getpc_b64 s[2:3]
127 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1@rel32@lo+4
128 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1@rel32@hi+12
129 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
130 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
131 ; GFX11-NEXT: s_endpgm
133 ; HSA-LABEL: test_call_external_void_func_i1_imm:
135 ; HSA-NEXT: s_add_i32 s6, s6, s9
136 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
137 ; HSA-NEXT: s_add_u32 s0, s0, s9
138 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
139 ; HSA-NEXT: s_addc_u32 s1, s1, 0
140 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
141 ; HSA-NEXT: v_mov_b32_e32 v0, 1
142 ; HSA-NEXT: s_mov_b32 s32, 0
143 ; HSA-NEXT: s_getpc_b64 s[8:9]
144 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1@rel32@lo+4
145 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1@rel32@hi+12
146 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
148 call void @external_void_func_i1(i1 true)
152 define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
153 ; VI-LABEL: test_call_external_void_func_i1_signext:
155 ; VI-NEXT: s_mov_b32 s3, 0xf000
156 ; VI-NEXT: s_mov_b32 s2, -1
157 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
158 ; VI-NEXT: s_waitcnt vmcnt(0)
159 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
160 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
161 ; VI-NEXT: s_mov_b32 s38, -1
162 ; VI-NEXT: s_mov_b32 s39, 0xe80000
163 ; VI-NEXT: s_add_u32 s36, s36, s5
164 ; VI-NEXT: s_addc_u32 s37, s37, 0
165 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
166 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
167 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
168 ; VI-NEXT: s_mov_b32 s32, 0
169 ; VI-NEXT: s_getpc_b64 s[4:5]
170 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
171 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
172 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1
173 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
176 ; CI-LABEL: test_call_external_void_func_i1_signext:
178 ; CI-NEXT: s_mov_b32 s3, 0xf000
179 ; CI-NEXT: s_mov_b32 s2, -1
180 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
181 ; CI-NEXT: s_waitcnt vmcnt(0)
182 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
183 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
184 ; CI-NEXT: s_mov_b32 s38, -1
185 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
186 ; CI-NEXT: s_add_u32 s36, s36, s5
187 ; CI-NEXT: s_addc_u32 s37, s37, 0
188 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
189 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
190 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
191 ; CI-NEXT: s_mov_b32 s32, 0
192 ; CI-NEXT: s_getpc_b64 s[4:5]
193 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
194 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
195 ; CI-NEXT: v_bfe_i32 v0, v0, 0, 1
196 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
199 ; GFX9-LABEL: test_call_external_void_func_i1_signext:
201 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
202 ; GFX9-NEXT: s_mov_b32 s2, -1
203 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
204 ; GFX9-NEXT: s_waitcnt vmcnt(0)
205 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
206 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
207 ; GFX9-NEXT: s_mov_b32 s38, -1
208 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
209 ; GFX9-NEXT: s_add_u32 s36, s36, s5
210 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
211 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
212 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
213 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
214 ; GFX9-NEXT: s_mov_b32 s32, 0
215 ; GFX9-NEXT: s_getpc_b64 s[4:5]
216 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
217 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
218 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1
219 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
220 ; GFX9-NEXT: s_endpgm
222 ; GFX11-LABEL: test_call_external_void_func_i1_signext:
224 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
225 ; GFX11-NEXT: s_mov_b32 s2, -1
226 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
227 ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc
228 ; GFX11-NEXT: s_waitcnt vmcnt(0)
229 ; GFX11-NEXT: s_mov_b32 s32, 0
230 ; GFX11-NEXT: s_getpc_b64 s[2:3]
231 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_signext@rel32@lo+4
232 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_signext@rel32@hi+12
233 ; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 1
234 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
235 ; GFX11-NEXT: s_endpgm
237 ; HSA-LABEL: test_call_external_void_func_i1_signext:
239 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
240 ; HSA-NEXT: s_mov_b32 s6, -1
241 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc
242 ; HSA-NEXT: s_waitcnt vmcnt(0)
243 ; HSA-NEXT: s_add_i32 s8, s8, s11
244 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
245 ; HSA-NEXT: s_add_u32 s0, s0, s11
246 ; HSA-NEXT: s_addc_u32 s1, s1, 0
247 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
248 ; HSA-NEXT: s_mov_b32 s32, 0
249 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
250 ; HSA-NEXT: s_getpc_b64 s[8:9]
251 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1_signext@rel32@lo+4
252 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_signext@rel32@hi+12
253 ; HSA-NEXT: v_bfe_i32 v0, v0, 0, 1
254 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
256 %var = load volatile i1, ptr addrspace(1) undef
257 call void @external_void_func_i1_signext(i1 signext %var)
261 ; FIXME: load should be scheduled before getpc
262 define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
263 ; VI-LABEL: test_call_external_void_func_i1_zeroext:
265 ; VI-NEXT: s_mov_b32 s3, 0xf000
266 ; VI-NEXT: s_mov_b32 s2, -1
267 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
268 ; VI-NEXT: s_waitcnt vmcnt(0)
269 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
270 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
271 ; VI-NEXT: s_mov_b32 s38, -1
272 ; VI-NEXT: s_mov_b32 s39, 0xe80000
273 ; VI-NEXT: s_add_u32 s36, s36, s5
274 ; VI-NEXT: s_addc_u32 s37, s37, 0
275 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
276 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
277 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
278 ; VI-NEXT: s_mov_b32 s32, 0
279 ; VI-NEXT: s_getpc_b64 s[4:5]
280 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
281 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
282 ; VI-NEXT: v_and_b32_e32 v0, 1, v0
283 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
286 ; CI-LABEL: test_call_external_void_func_i1_zeroext:
288 ; CI-NEXT: s_mov_b32 s3, 0xf000
289 ; CI-NEXT: s_mov_b32 s2, -1
290 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
291 ; CI-NEXT: s_waitcnt vmcnt(0)
292 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
293 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
294 ; CI-NEXT: s_mov_b32 s38, -1
295 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
296 ; CI-NEXT: s_add_u32 s36, s36, s5
297 ; CI-NEXT: s_addc_u32 s37, s37, 0
298 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
299 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
300 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
301 ; CI-NEXT: s_mov_b32 s32, 0
302 ; CI-NEXT: s_getpc_b64 s[4:5]
303 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
304 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
305 ; CI-NEXT: v_and_b32_e32 v0, 1, v0
306 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
309 ; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
311 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
312 ; GFX9-NEXT: s_mov_b32 s2, -1
313 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
314 ; GFX9-NEXT: s_waitcnt vmcnt(0)
315 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
316 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
317 ; GFX9-NEXT: s_mov_b32 s38, -1
318 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
319 ; GFX9-NEXT: s_add_u32 s36, s36, s5
320 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
321 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
322 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
323 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
324 ; GFX9-NEXT: s_mov_b32 s32, 0
325 ; GFX9-NEXT: s_getpc_b64 s[4:5]
326 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
327 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
328 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
329 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
330 ; GFX9-NEXT: s_endpgm
332 ; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
334 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
335 ; GFX11-NEXT: s_mov_b32 s2, -1
336 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
337 ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc
338 ; GFX11-NEXT: s_waitcnt vmcnt(0)
339 ; GFX11-NEXT: s_mov_b32 s32, 0
340 ; GFX11-NEXT: s_getpc_b64 s[2:3]
341 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_zeroext@rel32@lo+4
342 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_zeroext@rel32@hi+12
343 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
344 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
345 ; GFX11-NEXT: s_endpgm
347 ; HSA-LABEL: test_call_external_void_func_i1_zeroext:
349 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
350 ; HSA-NEXT: s_mov_b32 s6, -1
351 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc
352 ; HSA-NEXT: s_waitcnt vmcnt(0)
353 ; HSA-NEXT: s_add_i32 s8, s8, s11
354 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
355 ; HSA-NEXT: s_add_u32 s0, s0, s11
356 ; HSA-NEXT: s_addc_u32 s1, s1, 0
357 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
358 ; HSA-NEXT: s_mov_b32 s32, 0
359 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
360 ; HSA-NEXT: s_getpc_b64 s[8:9]
361 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1_zeroext@rel32@lo+4
362 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_zeroext@rel32@hi+12
363 ; HSA-NEXT: v_and_b32_e32 v0, 1, v0
364 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
366 %var = load volatile i1, ptr addrspace(1) undef
367 call void @external_void_func_i1_zeroext(i1 zeroext %var)
371 define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
372 ; VI-LABEL: test_call_external_void_func_i8_imm:
374 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
375 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
376 ; VI-NEXT: s_mov_b32 s38, -1
377 ; VI-NEXT: s_mov_b32 s39, 0xe80000
378 ; VI-NEXT: s_add_u32 s36, s36, s5
379 ; VI-NEXT: s_addc_u32 s37, s37, 0
380 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
381 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
382 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
383 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
384 ; VI-NEXT: s_mov_b32 s32, 0
385 ; VI-NEXT: s_getpc_b64 s[4:5]
386 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
387 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
388 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
391 ; CI-LABEL: test_call_external_void_func_i8_imm:
393 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
394 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
395 ; CI-NEXT: s_mov_b32 s38, -1
396 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
397 ; CI-NEXT: s_add_u32 s36, s36, s5
398 ; CI-NEXT: s_addc_u32 s37, s37, 0
399 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
400 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
401 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
402 ; CI-NEXT: v_mov_b32_e32 v0, 0x7b
403 ; CI-NEXT: s_mov_b32 s32, 0
404 ; CI-NEXT: s_getpc_b64 s[4:5]
405 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
406 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
407 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
410 ; GFX9-LABEL: test_call_external_void_func_i8_imm:
412 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
413 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
414 ; GFX9-NEXT: s_mov_b32 s38, -1
415 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
416 ; GFX9-NEXT: s_add_u32 s36, s36, s5
417 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
418 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
419 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
420 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
421 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
422 ; GFX9-NEXT: s_mov_b32 s32, 0
423 ; GFX9-NEXT: s_getpc_b64 s[4:5]
424 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
425 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
426 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
427 ; GFX9-NEXT: s_endpgm
429 ; GFX11-LABEL: test_call_external_void_func_i8_imm:
431 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
432 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
433 ; GFX11-NEXT: s_mov_b32 s32, 0
434 ; GFX11-NEXT: s_getpc_b64 s[2:3]
435 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
436 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
437 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
438 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
439 ; GFX11-NEXT: s_endpgm
441 ; HSA-LABEL: test_call_external_void_func_i8_imm:
443 ; HSA-NEXT: s_add_i32 s8, s8, s11
444 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
445 ; HSA-NEXT: s_add_u32 s0, s0, s11
446 ; HSA-NEXT: s_addc_u32 s1, s1, 0
447 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
448 ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b
449 ; HSA-NEXT: s_mov_b32 s32, 0
450 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
451 ; HSA-NEXT: s_getpc_b64 s[8:9]
452 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8@rel32@lo+4
453 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8@rel32@hi+12
454 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
456 call void @external_void_func_i8(i8 123)
460 ; FIXME: don't wait before call
461 define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
462 ; VI-LABEL: test_call_external_void_func_i8_signext:
464 ; VI-NEXT: s_mov_b32 s3, 0xf000
465 ; VI-NEXT: s_mov_b32 s2, -1
466 ; VI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
467 ; VI-NEXT: s_waitcnt vmcnt(0)
468 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
469 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
470 ; VI-NEXT: s_mov_b32 s38, -1
471 ; VI-NEXT: s_mov_b32 s39, 0xe80000
472 ; VI-NEXT: s_add_u32 s36, s36, s5
473 ; VI-NEXT: s_addc_u32 s37, s37, 0
474 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
475 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
476 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
477 ; VI-NEXT: s_mov_b32 s32, 0
478 ; VI-NEXT: s_getpc_b64 s[4:5]
479 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
480 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
481 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
484 ; CI-LABEL: test_call_external_void_func_i8_signext:
486 ; CI-NEXT: s_mov_b32 s3, 0xf000
487 ; CI-NEXT: s_mov_b32 s2, -1
488 ; CI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
489 ; CI-NEXT: s_waitcnt vmcnt(0)
490 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
491 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
492 ; CI-NEXT: s_mov_b32 s38, -1
493 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
494 ; CI-NEXT: s_add_u32 s36, s36, s5
495 ; CI-NEXT: s_addc_u32 s37, s37, 0
496 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
497 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
498 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
499 ; CI-NEXT: s_mov_b32 s32, 0
500 ; CI-NEXT: s_getpc_b64 s[4:5]
501 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
502 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
503 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
506 ; GFX9-LABEL: test_call_external_void_func_i8_signext:
508 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
509 ; GFX9-NEXT: s_mov_b32 s2, -1
510 ; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
511 ; GFX9-NEXT: s_waitcnt vmcnt(0)
512 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
513 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
514 ; GFX9-NEXT: s_mov_b32 s38, -1
515 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
516 ; GFX9-NEXT: s_add_u32 s36, s36, s5
517 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
518 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
519 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
520 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
521 ; GFX9-NEXT: s_mov_b32 s32, 0
522 ; GFX9-NEXT: s_getpc_b64 s[4:5]
523 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
524 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
525 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
526 ; GFX9-NEXT: s_endpgm
528 ; GFX11-LABEL: test_call_external_void_func_i8_signext:
530 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
531 ; GFX11-NEXT: s_mov_b32 s2, -1
532 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
533 ; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0 glc dlc
534 ; GFX11-NEXT: s_waitcnt vmcnt(0)
535 ; GFX11-NEXT: s_mov_b32 s32, 0
536 ; GFX11-NEXT: s_getpc_b64 s[2:3]
537 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_signext@rel32@lo+4
538 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_signext@rel32@hi+12
539 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
540 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
541 ; GFX11-NEXT: s_endpgm
543 ; HSA-LABEL: test_call_external_void_func_i8_signext:
545 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
546 ; HSA-NEXT: s_mov_b32 s6, -1
547 ; HSA-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 glc
548 ; HSA-NEXT: s_waitcnt vmcnt(0)
549 ; HSA-NEXT: s_add_i32 s8, s8, s11
550 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
551 ; HSA-NEXT: s_add_u32 s0, s0, s11
552 ; HSA-NEXT: s_addc_u32 s1, s1, 0
553 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
554 ; HSA-NEXT: s_mov_b32 s32, 0
555 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
556 ; HSA-NEXT: s_getpc_b64 s[8:9]
557 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8_signext@rel32@lo+4
558 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8_signext@rel32@hi+12
559 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
561 %var = load volatile i8, ptr addrspace(1) undef
562 call void @external_void_func_i8_signext(i8 signext %var)
566 define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
567 ; VI-LABEL: test_call_external_void_func_i8_zeroext:
569 ; VI-NEXT: s_mov_b32 s3, 0xf000
570 ; VI-NEXT: s_mov_b32 s2, -1
571 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
572 ; VI-NEXT: s_waitcnt vmcnt(0)
573 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
574 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
575 ; VI-NEXT: s_mov_b32 s38, -1
576 ; VI-NEXT: s_mov_b32 s39, 0xe80000
577 ; VI-NEXT: s_add_u32 s36, s36, s5
578 ; VI-NEXT: s_addc_u32 s37, s37, 0
579 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
580 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
581 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
582 ; VI-NEXT: s_mov_b32 s32, 0
583 ; VI-NEXT: s_getpc_b64 s[4:5]
584 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
585 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
586 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
589 ; CI-LABEL: test_call_external_void_func_i8_zeroext:
591 ; CI-NEXT: s_mov_b32 s3, 0xf000
592 ; CI-NEXT: s_mov_b32 s2, -1
593 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
594 ; CI-NEXT: s_waitcnt vmcnt(0)
595 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
596 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
597 ; CI-NEXT: s_mov_b32 s38, -1
598 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
599 ; CI-NEXT: s_add_u32 s36, s36, s5
600 ; CI-NEXT: s_addc_u32 s37, s37, 0
601 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
602 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
603 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
604 ; CI-NEXT: s_mov_b32 s32, 0
605 ; CI-NEXT: s_getpc_b64 s[4:5]
606 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
607 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
608 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
611 ; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
613 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
614 ; GFX9-NEXT: s_mov_b32 s2, -1
615 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
616 ; GFX9-NEXT: s_waitcnt vmcnt(0)
617 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
618 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
619 ; GFX9-NEXT: s_mov_b32 s38, -1
620 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
621 ; GFX9-NEXT: s_add_u32 s36, s36, s5
622 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
623 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
624 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
625 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
626 ; GFX9-NEXT: s_mov_b32 s32, 0
627 ; GFX9-NEXT: s_getpc_b64 s[4:5]
628 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
629 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
630 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
631 ; GFX9-NEXT: s_endpgm
633 ; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
635 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
636 ; GFX11-NEXT: s_mov_b32 s2, -1
637 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
638 ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc
639 ; GFX11-NEXT: s_waitcnt vmcnt(0)
640 ; GFX11-NEXT: s_mov_b32 s32, 0
641 ; GFX11-NEXT: s_getpc_b64 s[2:3]
642 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_zeroext@rel32@lo+4
643 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_zeroext@rel32@hi+12
644 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
645 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
646 ; GFX11-NEXT: s_endpgm
648 ; HSA-LABEL: test_call_external_void_func_i8_zeroext:
650 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
651 ; HSA-NEXT: s_mov_b32 s6, -1
652 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc
653 ; HSA-NEXT: s_waitcnt vmcnt(0)
654 ; HSA-NEXT: s_add_i32 s8, s8, s11
655 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
656 ; HSA-NEXT: s_add_u32 s0, s0, s11
657 ; HSA-NEXT: s_addc_u32 s1, s1, 0
658 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
659 ; HSA-NEXT: s_mov_b32 s32, 0
660 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
661 ; HSA-NEXT: s_getpc_b64 s[8:9]
662 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8_zeroext@rel32@lo+4
663 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8_zeroext@rel32@hi+12
664 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
666 %var = load volatile i8, ptr addrspace(1) undef
667 call void @external_void_func_i8_zeroext(i8 zeroext %var)
671 define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
672 ; VI-LABEL: test_call_external_void_func_i16_imm:
674 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
675 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
676 ; VI-NEXT: s_mov_b32 s38, -1
677 ; VI-NEXT: s_mov_b32 s39, 0xe80000
678 ; VI-NEXT: s_add_u32 s36, s36, s3
679 ; VI-NEXT: s_addc_u32 s37, s37, 0
680 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
681 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
682 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
683 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
684 ; VI-NEXT: s_mov_b32 s32, 0
685 ; VI-NEXT: s_getpc_b64 s[4:5]
686 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
687 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
688 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
691 ; CI-LABEL: test_call_external_void_func_i16_imm:
693 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
694 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
695 ; CI-NEXT: s_mov_b32 s38, -1
696 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
697 ; CI-NEXT: s_add_u32 s36, s36, s3
698 ; CI-NEXT: s_addc_u32 s37, s37, 0
699 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
700 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
701 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
702 ; CI-NEXT: v_mov_b32_e32 v0, 0x7b
703 ; CI-NEXT: s_mov_b32 s32, 0
704 ; CI-NEXT: s_getpc_b64 s[4:5]
705 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
706 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
707 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
710 ; GFX9-LABEL: test_call_external_void_func_i16_imm:
712 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
713 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
714 ; GFX9-NEXT: s_mov_b32 s38, -1
715 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
716 ; GFX9-NEXT: s_add_u32 s36, s36, s3
717 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
718 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
719 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
720 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
721 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
722 ; GFX9-NEXT: s_mov_b32 s32, 0
723 ; GFX9-NEXT: s_getpc_b64 s[4:5]
724 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
725 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
726 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
727 ; GFX9-NEXT: s_endpgm
729 ; GFX11-LABEL: test_call_external_void_func_i16_imm:
731 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
732 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
733 ; GFX11-NEXT: s_mov_b32 s32, 0
734 ; GFX11-NEXT: s_getpc_b64 s[2:3]
735 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
736 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
737 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
738 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
739 ; GFX11-NEXT: s_endpgm
741 ; HSA-LABEL: test_call_external_void_func_i16_imm:
743 ; HSA-NEXT: s_add_i32 s6, s6, s9
744 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
745 ; HSA-NEXT: s_add_u32 s0, s0, s9
746 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
747 ; HSA-NEXT: s_addc_u32 s1, s1, 0
748 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
749 ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b
750 ; HSA-NEXT: s_mov_b32 s32, 0
751 ; HSA-NEXT: s_getpc_b64 s[8:9]
752 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16@rel32@lo+4
753 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16@rel32@hi+12
754 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
756 call void @external_void_func_i16(i16 123)
760 define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
761 ; VI-LABEL: test_call_external_void_func_i16_signext:
763 ; VI-NEXT: s_mov_b32 s3, 0xf000
764 ; VI-NEXT: s_mov_b32 s2, -1
765 ; VI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
766 ; VI-NEXT: s_waitcnt vmcnt(0)
767 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
768 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
769 ; VI-NEXT: s_mov_b32 s38, -1
770 ; VI-NEXT: s_mov_b32 s39, 0xe80000
771 ; VI-NEXT: s_add_u32 s36, s36, s5
772 ; VI-NEXT: s_addc_u32 s37, s37, 0
773 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
774 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
775 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
776 ; VI-NEXT: s_mov_b32 s32, 0
777 ; VI-NEXT: s_getpc_b64 s[4:5]
778 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
779 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
780 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
783 ; CI-LABEL: test_call_external_void_func_i16_signext:
785 ; CI-NEXT: s_mov_b32 s3, 0xf000
786 ; CI-NEXT: s_mov_b32 s2, -1
787 ; CI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
788 ; CI-NEXT: s_waitcnt vmcnt(0)
789 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
790 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
791 ; CI-NEXT: s_mov_b32 s38, -1
792 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
793 ; CI-NEXT: s_add_u32 s36, s36, s5
794 ; CI-NEXT: s_addc_u32 s37, s37, 0
795 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
796 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
797 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
798 ; CI-NEXT: s_mov_b32 s32, 0
799 ; CI-NEXT: s_getpc_b64 s[4:5]
800 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
801 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
802 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
805 ; GFX9-LABEL: test_call_external_void_func_i16_signext:
807 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
808 ; GFX9-NEXT: s_mov_b32 s2, -1
809 ; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
810 ; GFX9-NEXT: s_waitcnt vmcnt(0)
811 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
812 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
813 ; GFX9-NEXT: s_mov_b32 s38, -1
814 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
815 ; GFX9-NEXT: s_add_u32 s36, s36, s5
816 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
817 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
818 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
819 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
820 ; GFX9-NEXT: s_mov_b32 s32, 0
821 ; GFX9-NEXT: s_getpc_b64 s[4:5]
822 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
823 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
824 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
825 ; GFX9-NEXT: s_endpgm
827 ; GFX11-LABEL: test_call_external_void_func_i16_signext:
829 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
830 ; GFX11-NEXT: s_mov_b32 s2, -1
831 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
832 ; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0 glc dlc
833 ; GFX11-NEXT: s_waitcnt vmcnt(0)
834 ; GFX11-NEXT: s_mov_b32 s32, 0
835 ; GFX11-NEXT: s_getpc_b64 s[2:3]
836 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_signext@rel32@lo+4
837 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_signext@rel32@hi+12
838 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
839 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
840 ; GFX11-NEXT: s_endpgm
842 ; HSA-LABEL: test_call_external_void_func_i16_signext:
844 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
845 ; HSA-NEXT: s_mov_b32 s6, -1
846 ; HSA-NEXT: buffer_load_sshort v0, off, s[4:7], 0 glc
847 ; HSA-NEXT: s_waitcnt vmcnt(0)
848 ; HSA-NEXT: s_add_i32 s8, s8, s11
849 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
850 ; HSA-NEXT: s_add_u32 s0, s0, s11
851 ; HSA-NEXT: s_addc_u32 s1, s1, 0
852 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
853 ; HSA-NEXT: s_mov_b32 s32, 0
854 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
855 ; HSA-NEXT: s_getpc_b64 s[8:9]
856 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16_signext@rel32@lo+4
857 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16_signext@rel32@hi+12
858 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
860 %var = load volatile i16, ptr addrspace(1) undef
861 call void @external_void_func_i16_signext(i16 signext %var)
865 define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
866 ; VI-LABEL: test_call_external_void_func_i16_zeroext:
868 ; VI-NEXT: s_mov_b32 s3, 0xf000
869 ; VI-NEXT: s_mov_b32 s2, -1
870 ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
871 ; VI-NEXT: s_waitcnt vmcnt(0)
872 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
873 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
874 ; VI-NEXT: s_mov_b32 s38, -1
875 ; VI-NEXT: s_mov_b32 s39, 0xe80000
876 ; VI-NEXT: s_add_u32 s36, s36, s5
877 ; VI-NEXT: s_addc_u32 s37, s37, 0
878 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
879 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
880 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
881 ; VI-NEXT: s_mov_b32 s32, 0
882 ; VI-NEXT: s_getpc_b64 s[4:5]
883 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
884 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
885 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
888 ; CI-LABEL: test_call_external_void_func_i16_zeroext:
890 ; CI-NEXT: s_mov_b32 s3, 0xf000
891 ; CI-NEXT: s_mov_b32 s2, -1
892 ; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
893 ; CI-NEXT: s_waitcnt vmcnt(0)
894 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
895 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
896 ; CI-NEXT: s_mov_b32 s38, -1
897 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
898 ; CI-NEXT: s_add_u32 s36, s36, s5
899 ; CI-NEXT: s_addc_u32 s37, s37, 0
900 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
901 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
902 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
903 ; CI-NEXT: s_mov_b32 s32, 0
904 ; CI-NEXT: s_getpc_b64 s[4:5]
905 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
906 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
907 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
910 ; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
912 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
913 ; GFX9-NEXT: s_mov_b32 s2, -1
914 ; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
915 ; GFX9-NEXT: s_waitcnt vmcnt(0)
916 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
917 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
918 ; GFX9-NEXT: s_mov_b32 s38, -1
919 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
920 ; GFX9-NEXT: s_add_u32 s36, s36, s5
921 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
922 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
923 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
924 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
925 ; GFX9-NEXT: s_mov_b32 s32, 0
926 ; GFX9-NEXT: s_getpc_b64 s[4:5]
927 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
928 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
929 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
930 ; GFX9-NEXT: s_endpgm
932 ; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
934 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
935 ; GFX11-NEXT: s_mov_b32 s2, -1
936 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
937 ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 glc dlc
938 ; GFX11-NEXT: s_waitcnt vmcnt(0)
939 ; GFX11-NEXT: s_mov_b32 s32, 0
940 ; GFX11-NEXT: s_getpc_b64 s[2:3]
941 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_zeroext@rel32@lo+4
942 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_zeroext@rel32@hi+12
943 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
944 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
945 ; GFX11-NEXT: s_endpgm
947 ; HSA-LABEL: test_call_external_void_func_i16_zeroext:
949 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
950 ; HSA-NEXT: s_mov_b32 s6, -1
951 ; HSA-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc
952 ; HSA-NEXT: s_waitcnt vmcnt(0)
953 ; HSA-NEXT: s_add_i32 s8, s8, s11
954 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
955 ; HSA-NEXT: s_add_u32 s0, s0, s11
956 ; HSA-NEXT: s_addc_u32 s1, s1, 0
957 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
958 ; HSA-NEXT: s_mov_b32 s32, 0
959 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
960 ; HSA-NEXT: s_getpc_b64 s[8:9]
961 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16_zeroext@rel32@lo+4
962 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16_zeroext@rel32@hi+12
963 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
965 %var = load volatile i16, ptr addrspace(1) undef
966 call void @external_void_func_i16_zeroext(i16 zeroext %var)
970 define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
971 ; VI-LABEL: test_call_external_void_func_i32_imm:
973 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
974 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
975 ; VI-NEXT: s_mov_b32 s38, -1
976 ; VI-NEXT: s_mov_b32 s39, 0xe80000
977 ; VI-NEXT: s_add_u32 s36, s36, s5
978 ; VI-NEXT: s_addc_u32 s37, s37, 0
979 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
980 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
981 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
982 ; VI-NEXT: v_mov_b32_e32 v0, 42
983 ; VI-NEXT: s_mov_b32 s32, 0
984 ; VI-NEXT: s_getpc_b64 s[4:5]
985 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
986 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
987 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
990 ; CI-LABEL: test_call_external_void_func_i32_imm:
992 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
993 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
994 ; CI-NEXT: s_mov_b32 s38, -1
995 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
996 ; CI-NEXT: s_add_u32 s36, s36, s5
997 ; CI-NEXT: s_addc_u32 s37, s37, 0
998 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
999 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1000 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1001 ; CI-NEXT: v_mov_b32_e32 v0, 42
1002 ; CI-NEXT: s_mov_b32 s32, 0
1003 ; CI-NEXT: s_getpc_b64 s[4:5]
1004 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
1005 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
1006 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1009 ; GFX9-LABEL: test_call_external_void_func_i32_imm:
1011 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1012 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1013 ; GFX9-NEXT: s_mov_b32 s38, -1
1014 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1015 ; GFX9-NEXT: s_add_u32 s36, s36, s5
1016 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1017 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1018 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1019 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1020 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
1021 ; GFX9-NEXT: s_mov_b32 s32, 0
1022 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1023 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
1024 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
1025 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1026 ; GFX9-NEXT: s_endpgm
1028 ; GFX11-LABEL: test_call_external_void_func_i32_imm:
1030 ; GFX11-NEXT: v_mov_b32_e32 v0, 42
1031 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1032 ; GFX11-NEXT: s_mov_b32 s32, 0
1033 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1034 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i32@rel32@lo+4
1035 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i32@rel32@hi+12
1036 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1037 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1038 ; GFX11-NEXT: s_endpgm
1040 ; HSA-LABEL: test_call_external_void_func_i32_imm:
1042 ; HSA-NEXT: s_add_i32 s8, s8, s11
1043 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
1044 ; HSA-NEXT: s_add_u32 s0, s0, s11
1045 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1046 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1047 ; HSA-NEXT: v_mov_b32_e32 v0, 42
1048 ; HSA-NEXT: s_mov_b32 s32, 0
1049 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
1050 ; HSA-NEXT: s_getpc_b64 s[8:9]
1051 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i32@rel32@lo+4
1052 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i32@rel32@hi+12
1053 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1054 ; HSA-NEXT: s_endpgm
1055 call void @external_void_func_i32(i32 42)
1059 define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
1060 ; VI-LABEL: test_call_external_void_func_i64_imm:
1062 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1063 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1064 ; VI-NEXT: s_mov_b32 s38, -1
1065 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1066 ; VI-NEXT: s_add_u32 s36, s36, s3
1067 ; VI-NEXT: s_addc_u32 s37, s37, 0
1068 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1069 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1070 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1071 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
1072 ; VI-NEXT: v_mov_b32_e32 v1, 0
1073 ; VI-NEXT: s_mov_b32 s32, 0
1074 ; VI-NEXT: s_getpc_b64 s[4:5]
1075 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1076 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1077 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1080 ; CI-LABEL: test_call_external_void_func_i64_imm:
1082 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1083 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1084 ; CI-NEXT: s_mov_b32 s38, -1
1085 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1086 ; CI-NEXT: s_add_u32 s36, s36, s3
1087 ; CI-NEXT: s_addc_u32 s37, s37, 0
1088 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1089 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1090 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1091 ; CI-NEXT: v_mov_b32_e32 v0, 0x7b
1092 ; CI-NEXT: v_mov_b32_e32 v1, 0
1093 ; CI-NEXT: s_mov_b32 s32, 0
1094 ; CI-NEXT: s_getpc_b64 s[4:5]
1095 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1096 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1097 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1100 ; GFX9-LABEL: test_call_external_void_func_i64_imm:
1102 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1103 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1104 ; GFX9-NEXT: s_mov_b32 s38, -1
1105 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1106 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1107 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1108 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1109 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1110 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1111 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
1112 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1113 ; GFX9-NEXT: s_mov_b32 s32, 0
1114 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1115 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1116 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1117 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1118 ; GFX9-NEXT: s_endpgm
1120 ; GFX11-LABEL: test_call_external_void_func_i64_imm:
1122 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
1123 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1124 ; GFX11-NEXT: s_mov_b32 s32, 0
1125 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1126 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i64@rel32@lo+4
1127 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i64@rel32@hi+12
1128 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1129 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1130 ; GFX11-NEXT: s_endpgm
1132 ; HSA-LABEL: test_call_external_void_func_i64_imm:
1134 ; HSA-NEXT: s_add_i32 s6, s6, s9
1135 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1136 ; HSA-NEXT: s_add_u32 s0, s0, s9
1137 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1138 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1139 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1140 ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b
1141 ; HSA-NEXT: v_mov_b32_e32 v1, 0
1142 ; HSA-NEXT: s_mov_b32 s32, 0
1143 ; HSA-NEXT: s_getpc_b64 s[8:9]
1144 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i64@rel32@lo+4
1145 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i64@rel32@hi+12
1146 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1147 ; HSA-NEXT: s_endpgm
1148 call void @external_void_func_i64(i64 123)
1152 define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
1153 ; VI-LABEL: test_call_external_void_func_v2i64:
1155 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1156 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1157 ; VI-NEXT: s_mov_b32 s38, -1
1158 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1159 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1160 ; VI-NEXT: s_mov_b32 s0, 0
1161 ; VI-NEXT: s_add_u32 s36, s36, s3
1162 ; VI-NEXT: s_mov_b32 s3, 0xf000
1163 ; VI-NEXT: s_mov_b32 s2, -1
1164 ; VI-NEXT: s_mov_b32 s1, s0
1165 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1166 ; VI-NEXT: s_addc_u32 s37, s37, 0
1167 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1168 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1169 ; VI-NEXT: s_mov_b32 s32, 0
1170 ; VI-NEXT: s_getpc_b64 s[4:5]
1171 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1172 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1173 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1176 ; CI-LABEL: test_call_external_void_func_v2i64:
1178 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1179 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1180 ; CI-NEXT: s_mov_b32 s38, -1
1181 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1182 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1183 ; CI-NEXT: s_mov_b32 s0, 0
1184 ; CI-NEXT: s_add_u32 s36, s36, s3
1185 ; CI-NEXT: s_mov_b32 s3, 0xf000
1186 ; CI-NEXT: s_mov_b32 s2, -1
1187 ; CI-NEXT: s_mov_b32 s1, s0
1188 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1189 ; CI-NEXT: s_addc_u32 s37, s37, 0
1190 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1191 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1192 ; CI-NEXT: s_mov_b32 s32, 0
1193 ; CI-NEXT: s_getpc_b64 s[4:5]
1194 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1195 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1196 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1199 ; GFX9-LABEL: test_call_external_void_func_v2i64:
1201 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1202 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1203 ; GFX9-NEXT: s_mov_b32 s38, -1
1204 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1205 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1206 ; GFX9-NEXT: s_mov_b32 s0, 0
1207 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1208 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
1209 ; GFX9-NEXT: s_mov_b32 s2, -1
1210 ; GFX9-NEXT: s_mov_b32 s1, s0
1211 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1212 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1213 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1214 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1215 ; GFX9-NEXT: s_mov_b32 s32, 0
1216 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1217 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1218 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1219 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1220 ; GFX9-NEXT: s_endpgm
1222 ; GFX11-LABEL: test_call_external_void_func_v2i64:
1224 ; GFX11-NEXT: s_mov_b32 s4, 0
1225 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
1226 ; GFX11-NEXT: s_mov_b32 s6, -1
1227 ; GFX11-NEXT: s_mov_b32 s5, s4
1228 ; GFX11-NEXT: s_mov_b32 s32, 0
1229 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
1230 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1231 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1232 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4
1233 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12
1234 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1235 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1236 ; GFX11-NEXT: s_endpgm
1238 ; HSA-LABEL: test_call_external_void_func_v2i64:
1240 ; HSA-NEXT: s_add_i32 s6, s6, s9
1241 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1242 ; HSA-NEXT: s_mov_b32 s8, 0
1243 ; HSA-NEXT: s_add_u32 s0, s0, s9
1244 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
1245 ; HSA-NEXT: s_mov_b32 s10, -1
1246 ; HSA-NEXT: s_mov_b32 s9, s8
1247 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1248 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1249 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1250 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1251 ; HSA-NEXT: s_mov_b32 s32, 0
1252 ; HSA-NEXT: s_getpc_b64 s[8:9]
1253 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4
1254 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12
1255 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1256 ; HSA-NEXT: s_endpgm
1257 %val = load <2 x i64>, ptr addrspace(1) null
1258 call void @external_void_func_v2i64(<2 x i64> %val)
1262 define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
1263 ; VI-LABEL: test_call_external_void_func_v2i64_imm:
1265 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1266 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1267 ; VI-NEXT: s_mov_b32 s38, -1
1268 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1269 ; VI-NEXT: s_add_u32 s36, s36, s3
1270 ; VI-NEXT: s_addc_u32 s37, s37, 0
1271 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1272 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1273 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1274 ; VI-NEXT: v_mov_b32_e32 v0, 1
1275 ; VI-NEXT: v_mov_b32_e32 v1, 2
1276 ; VI-NEXT: v_mov_b32_e32 v2, 3
1277 ; VI-NEXT: v_mov_b32_e32 v3, 4
1278 ; VI-NEXT: s_mov_b32 s32, 0
1279 ; VI-NEXT: s_getpc_b64 s[4:5]
1280 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1281 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1282 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1285 ; CI-LABEL: test_call_external_void_func_v2i64_imm:
1287 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1288 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1289 ; CI-NEXT: s_mov_b32 s38, -1
1290 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1291 ; CI-NEXT: s_add_u32 s36, s36, s3
1292 ; CI-NEXT: s_addc_u32 s37, s37, 0
1293 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1294 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1295 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1296 ; CI-NEXT: v_mov_b32_e32 v0, 1
1297 ; CI-NEXT: v_mov_b32_e32 v1, 2
1298 ; CI-NEXT: v_mov_b32_e32 v2, 3
1299 ; CI-NEXT: v_mov_b32_e32 v3, 4
1300 ; CI-NEXT: s_mov_b32 s32, 0
1301 ; CI-NEXT: s_getpc_b64 s[4:5]
1302 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1303 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1304 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1307 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
1309 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1310 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1311 ; GFX9-NEXT: s_mov_b32 s38, -1
1312 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1313 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1314 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1315 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1316 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1317 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1318 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
1319 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
1320 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
1321 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
1322 ; GFX9-NEXT: s_mov_b32 s32, 0
1323 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1324 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1325 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1326 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1327 ; GFX9-NEXT: s_endpgm
1329 ; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
1331 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
1332 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
1333 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1334 ; GFX11-NEXT: s_mov_b32 s32, 0
1335 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1336 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4
1337 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12
1338 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1339 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1340 ; GFX11-NEXT: s_endpgm
1342 ; HSA-LABEL: test_call_external_void_func_v2i64_imm:
1344 ; HSA-NEXT: s_add_i32 s6, s6, s9
1345 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1346 ; HSA-NEXT: s_add_u32 s0, s0, s9
1347 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1348 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1349 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1350 ; HSA-NEXT: v_mov_b32_e32 v0, 1
1351 ; HSA-NEXT: v_mov_b32_e32 v1, 2
1352 ; HSA-NEXT: v_mov_b32_e32 v2, 3
1353 ; HSA-NEXT: v_mov_b32_e32 v3, 4
1354 ; HSA-NEXT: s_mov_b32 s32, 0
1355 ; HSA-NEXT: s_getpc_b64 s[8:9]
1356 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4
1357 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12
1358 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1359 ; HSA-NEXT: s_endpgm
1360 call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
1364 define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
1365 ; VI-LABEL: test_call_external_void_func_v3i64:
1367 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1368 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1369 ; VI-NEXT: s_mov_b32 s38, -1
1370 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1371 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1372 ; VI-NEXT: s_mov_b32 s0, 0
1373 ; VI-NEXT: s_add_u32 s36, s36, s3
1374 ; VI-NEXT: s_mov_b32 s3, 0xf000
1375 ; VI-NEXT: s_mov_b32 s2, -1
1376 ; VI-NEXT: s_mov_b32 s1, s0
1377 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1378 ; VI-NEXT: s_addc_u32 s37, s37, 0
1379 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1380 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1381 ; VI-NEXT: v_mov_b32_e32 v4, 1
1382 ; VI-NEXT: v_mov_b32_e32 v5, 2
1383 ; VI-NEXT: s_mov_b32 s32, 0
1384 ; VI-NEXT: s_getpc_b64 s[4:5]
1385 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1386 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1387 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1390 ; CI-LABEL: test_call_external_void_func_v3i64:
1392 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1393 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1394 ; CI-NEXT: s_mov_b32 s38, -1
1395 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1396 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1397 ; CI-NEXT: s_mov_b32 s0, 0
1398 ; CI-NEXT: s_add_u32 s36, s36, s3
1399 ; CI-NEXT: s_mov_b32 s3, 0xf000
1400 ; CI-NEXT: s_mov_b32 s2, -1
1401 ; CI-NEXT: s_mov_b32 s1, s0
1402 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1403 ; CI-NEXT: s_addc_u32 s37, s37, 0
1404 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1405 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1406 ; CI-NEXT: v_mov_b32_e32 v4, 1
1407 ; CI-NEXT: v_mov_b32_e32 v5, 2
1408 ; CI-NEXT: s_mov_b32 s32, 0
1409 ; CI-NEXT: s_getpc_b64 s[4:5]
1410 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1411 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1412 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1415 ; GFX9-LABEL: test_call_external_void_func_v3i64:
1417 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1418 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1419 ; GFX9-NEXT: s_mov_b32 s38, -1
1420 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1421 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1422 ; GFX9-NEXT: s_mov_b32 s0, 0
1423 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1424 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
1425 ; GFX9-NEXT: s_mov_b32 s2, -1
1426 ; GFX9-NEXT: s_mov_b32 s1, s0
1427 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1428 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1429 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1430 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1431 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1432 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1433 ; GFX9-NEXT: s_mov_b32 s32, 0
1434 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1435 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1436 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1437 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1438 ; GFX9-NEXT: s_endpgm
1440 ; GFX11-LABEL: test_call_external_void_func_v3i64:
1442 ; GFX11-NEXT: s_mov_b32 s4, 0
1443 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
1444 ; GFX11-NEXT: s_mov_b32 s6, -1
1445 ; GFX11-NEXT: s_mov_b32 s5, s4
1446 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2
1447 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
1448 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1449 ; GFX11-NEXT: s_mov_b32 s32, 0
1450 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1451 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i64@rel32@lo+4
1452 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i64@rel32@hi+12
1453 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1454 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1455 ; GFX11-NEXT: s_endpgm
1457 ; HSA-LABEL: test_call_external_void_func_v3i64:
1459 ; HSA-NEXT: s_add_i32 s6, s6, s9
1460 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1461 ; HSA-NEXT: s_mov_b32 s8, 0
1462 ; HSA-NEXT: s_add_u32 s0, s0, s9
1463 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
1464 ; HSA-NEXT: s_mov_b32 s10, -1
1465 ; HSA-NEXT: s_mov_b32 s9, s8
1466 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1467 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1468 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1469 ; HSA-NEXT: v_mov_b32_e32 v4, 1
1470 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1471 ; HSA-NEXT: v_mov_b32_e32 v5, 2
1472 ; HSA-NEXT: s_mov_b32 s32, 0
1473 ; HSA-NEXT: s_getpc_b64 s[8:9]
1474 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i64@rel32@lo+4
1475 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i64@rel32@hi+12
1476 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1477 ; HSA-NEXT: s_endpgm
1478 %load = load <2 x i64>, ptr addrspace(1) null
1479 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
1481 call void @external_void_func_v3i64(<3 x i64> %val)
1485 define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
1486 ; VI-LABEL: test_call_external_void_func_v4i64:
1488 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1489 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1490 ; VI-NEXT: s_mov_b32 s38, -1
1491 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1492 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1493 ; VI-NEXT: s_mov_b32 s0, 0
1494 ; VI-NEXT: s_add_u32 s36, s36, s3
1495 ; VI-NEXT: s_mov_b32 s3, 0xf000
1496 ; VI-NEXT: s_mov_b32 s2, -1
1497 ; VI-NEXT: s_mov_b32 s1, s0
1498 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1499 ; VI-NEXT: s_addc_u32 s37, s37, 0
1500 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1501 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1502 ; VI-NEXT: v_mov_b32_e32 v4, 1
1503 ; VI-NEXT: v_mov_b32_e32 v5, 2
1504 ; VI-NEXT: v_mov_b32_e32 v6, 3
1505 ; VI-NEXT: v_mov_b32_e32 v7, 4
1506 ; VI-NEXT: s_mov_b32 s32, 0
1507 ; VI-NEXT: s_getpc_b64 s[4:5]
1508 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1509 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1510 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1513 ; CI-LABEL: test_call_external_void_func_v4i64:
1515 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1516 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1517 ; CI-NEXT: s_mov_b32 s38, -1
1518 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1519 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1520 ; CI-NEXT: s_mov_b32 s0, 0
1521 ; CI-NEXT: s_add_u32 s36, s36, s3
1522 ; CI-NEXT: s_mov_b32 s3, 0xf000
1523 ; CI-NEXT: s_mov_b32 s2, -1
1524 ; CI-NEXT: s_mov_b32 s1, s0
1525 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1526 ; CI-NEXT: s_addc_u32 s37, s37, 0
1527 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1528 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1529 ; CI-NEXT: v_mov_b32_e32 v4, 1
1530 ; CI-NEXT: v_mov_b32_e32 v5, 2
1531 ; CI-NEXT: v_mov_b32_e32 v6, 3
1532 ; CI-NEXT: v_mov_b32_e32 v7, 4
1533 ; CI-NEXT: s_mov_b32 s32, 0
1534 ; CI-NEXT: s_getpc_b64 s[4:5]
1535 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1536 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1537 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1540 ; GFX9-LABEL: test_call_external_void_func_v4i64:
1542 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1543 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1544 ; GFX9-NEXT: s_mov_b32 s38, -1
1545 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1546 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1547 ; GFX9-NEXT: s_mov_b32 s0, 0
1548 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1549 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
1550 ; GFX9-NEXT: s_mov_b32 s2, -1
1551 ; GFX9-NEXT: s_mov_b32 s1, s0
1552 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1553 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1554 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1555 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1556 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1557 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1558 ; GFX9-NEXT: v_mov_b32_e32 v6, 3
1559 ; GFX9-NEXT: v_mov_b32_e32 v7, 4
1560 ; GFX9-NEXT: s_mov_b32 s32, 0
1561 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1562 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1563 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1564 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1565 ; GFX9-NEXT: s_endpgm
1567 ; GFX11-LABEL: test_call_external_void_func_v4i64:
1569 ; GFX11-NEXT: s_mov_b32 s4, 0
1570 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
1571 ; GFX11-NEXT: s_mov_b32 s6, -1
1572 ; GFX11-NEXT: s_mov_b32 s5, s4
1573 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2
1574 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
1575 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4
1576 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1577 ; GFX11-NEXT: s_mov_b32 s32, 0
1578 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1579 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i64@rel32@lo+4
1580 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i64@rel32@hi+12
1581 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1582 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1583 ; GFX11-NEXT: s_endpgm
1585 ; HSA-LABEL: test_call_external_void_func_v4i64:
1587 ; HSA-NEXT: s_add_i32 s6, s6, s9
1588 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1589 ; HSA-NEXT: s_mov_b32 s8, 0
1590 ; HSA-NEXT: s_add_u32 s0, s0, s9
1591 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
1592 ; HSA-NEXT: s_mov_b32 s10, -1
1593 ; HSA-NEXT: s_mov_b32 s9, s8
1594 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1595 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1596 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1597 ; HSA-NEXT: v_mov_b32_e32 v4, 1
1598 ; HSA-NEXT: v_mov_b32_e32 v5, 2
1599 ; HSA-NEXT: v_mov_b32_e32 v6, 3
1600 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1601 ; HSA-NEXT: v_mov_b32_e32 v7, 4
1602 ; HSA-NEXT: s_mov_b32 s32, 0
1603 ; HSA-NEXT: s_getpc_b64 s[8:9]
1604 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i64@rel32@lo+4
1605 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i64@rel32@hi+12
1606 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1607 ; HSA-NEXT: s_endpgm
1608 %load = load <2 x i64>, ptr addrspace(1) null
1609 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1610 call void @external_void_func_v4i64(<4 x i64> %val)
1614 define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
1615 ; VI-LABEL: test_call_external_void_func_f16_imm:
1617 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1618 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1619 ; VI-NEXT: s_mov_b32 s38, -1
1620 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1621 ; VI-NEXT: s_add_u32 s36, s36, s3
1622 ; VI-NEXT: s_addc_u32 s37, s37, 0
1623 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1624 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1625 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1626 ; VI-NEXT: v_mov_b32_e32 v0, 0x4400
1627 ; VI-NEXT: s_mov_b32 s32, 0
1628 ; VI-NEXT: s_getpc_b64 s[4:5]
1629 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1630 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1631 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1634 ; CI-LABEL: test_call_external_void_func_f16_imm:
1636 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1637 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1638 ; CI-NEXT: s_mov_b32 s38, -1
1639 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1640 ; CI-NEXT: s_add_u32 s36, s36, s3
1641 ; CI-NEXT: s_addc_u32 s37, s37, 0
1642 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1643 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1644 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1645 ; CI-NEXT: v_mov_b32_e32 v0, 4.0
1646 ; CI-NEXT: s_mov_b32 s32, 0
1647 ; CI-NEXT: s_getpc_b64 s[4:5]
1648 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1649 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1650 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1653 ; GFX9-LABEL: test_call_external_void_func_f16_imm:
1655 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1656 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1657 ; GFX9-NEXT: s_mov_b32 s38, -1
1658 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1659 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1660 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1661 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1662 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1663 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1664 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
1665 ; GFX9-NEXT: s_mov_b32 s32, 0
1666 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1667 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1668 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1669 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1670 ; GFX9-NEXT: s_endpgm
1672 ; GFX11-LABEL: test_call_external_void_func_f16_imm:
1674 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400
1675 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1676 ; GFX11-NEXT: s_mov_b32 s32, 0
1677 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1678 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
1679 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
1680 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1681 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1682 ; GFX11-NEXT: s_endpgm
1684 ; HSA-LABEL: test_call_external_void_func_f16_imm:
1686 ; HSA-NEXT: s_add_i32 s6, s6, s9
1687 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1688 ; HSA-NEXT: s_add_u32 s0, s0, s9
1689 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1690 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1691 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1692 ; HSA-NEXT: v_mov_b32_e32 v0, 0x4400
1693 ; HSA-NEXT: s_mov_b32 s32, 0
1694 ; HSA-NEXT: s_getpc_b64 s[8:9]
1695 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f16@rel32@lo+4
1696 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f16@rel32@hi+12
1697 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1698 ; HSA-NEXT: s_endpgm
1699 call void @external_void_func_f16(half 4.0)
1703 define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
1704 ; VI-LABEL: test_call_external_void_func_f32_imm:
1706 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1707 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1708 ; VI-NEXT: s_mov_b32 s38, -1
1709 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1710 ; VI-NEXT: s_add_u32 s36, s36, s3
1711 ; VI-NEXT: s_addc_u32 s37, s37, 0
1712 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1713 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1714 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1715 ; VI-NEXT: v_mov_b32_e32 v0, 4.0
1716 ; VI-NEXT: s_mov_b32 s32, 0
1717 ; VI-NEXT: s_getpc_b64 s[4:5]
1718 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1719 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1720 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1723 ; CI-LABEL: test_call_external_void_func_f32_imm:
1725 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1726 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1727 ; CI-NEXT: s_mov_b32 s38, -1
1728 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1729 ; CI-NEXT: s_add_u32 s36, s36, s3
1730 ; CI-NEXT: s_addc_u32 s37, s37, 0
1731 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1732 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1733 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1734 ; CI-NEXT: v_mov_b32_e32 v0, 4.0
1735 ; CI-NEXT: s_mov_b32 s32, 0
1736 ; CI-NEXT: s_getpc_b64 s[4:5]
1737 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1738 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1739 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1742 ; GFX9-LABEL: test_call_external_void_func_f32_imm:
1744 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1745 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1746 ; GFX9-NEXT: s_mov_b32 s38, -1
1747 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1748 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1749 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1750 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1751 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1752 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1753 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
1754 ; GFX9-NEXT: s_mov_b32 s32, 0
1755 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1756 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1757 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1758 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1759 ; GFX9-NEXT: s_endpgm
1761 ; GFX11-LABEL: test_call_external_void_func_f32_imm:
1763 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
1764 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1765 ; GFX11-NEXT: s_mov_b32 s32, 0
1766 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1767 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f32@rel32@lo+4
1768 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f32@rel32@hi+12
1769 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1770 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1771 ; GFX11-NEXT: s_endpgm
1773 ; HSA-LABEL: test_call_external_void_func_f32_imm:
1775 ; HSA-NEXT: s_add_i32 s6, s6, s9
1776 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1777 ; HSA-NEXT: s_add_u32 s0, s0, s9
1778 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1779 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1780 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1781 ; HSA-NEXT: v_mov_b32_e32 v0, 4.0
1782 ; HSA-NEXT: s_mov_b32 s32, 0
1783 ; HSA-NEXT: s_getpc_b64 s[8:9]
1784 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f32@rel32@lo+4
1785 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f32@rel32@hi+12
1786 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1787 ; HSA-NEXT: s_endpgm
1788 call void @external_void_func_f32(float 4.0)
1792 define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
1793 ; VI-LABEL: test_call_external_void_func_v2f32_imm:
1795 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1796 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1797 ; VI-NEXT: s_mov_b32 s38, -1
1798 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1799 ; VI-NEXT: s_add_u32 s36, s36, s3
1800 ; VI-NEXT: s_addc_u32 s37, s37, 0
1801 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1802 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1803 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1804 ; VI-NEXT: v_mov_b32_e32 v0, 1.0
1805 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
1806 ; VI-NEXT: s_mov_b32 s32, 0
1807 ; VI-NEXT: s_getpc_b64 s[4:5]
1808 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1809 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1810 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1813 ; CI-LABEL: test_call_external_void_func_v2f32_imm:
1815 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1816 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1817 ; CI-NEXT: s_mov_b32 s38, -1
1818 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1819 ; CI-NEXT: s_add_u32 s36, s36, s3
1820 ; CI-NEXT: s_addc_u32 s37, s37, 0
1821 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1822 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1823 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1824 ; CI-NEXT: v_mov_b32_e32 v0, 1.0
1825 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
1826 ; CI-NEXT: s_mov_b32 s32, 0
1827 ; CI-NEXT: s_getpc_b64 s[4:5]
1828 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1829 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1830 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1833 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
1835 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1836 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1837 ; GFX9-NEXT: s_mov_b32 s38, -1
1838 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1839 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1840 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1841 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1842 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1843 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1844 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
1845 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
1846 ; GFX9-NEXT: s_mov_b32 s32, 0
1847 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1848 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1849 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1850 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1851 ; GFX9-NEXT: s_endpgm
1853 ; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
1855 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
1856 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1857 ; GFX11-NEXT: s_mov_b32 s32, 0
1858 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1859 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f32@rel32@lo+4
1860 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f32@rel32@hi+12
1861 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1862 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1863 ; GFX11-NEXT: s_endpgm
1865 ; HSA-LABEL: test_call_external_void_func_v2f32_imm:
1867 ; HSA-NEXT: s_add_i32 s6, s6, s9
1868 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1869 ; HSA-NEXT: s_add_u32 s0, s0, s9
1870 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1871 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1872 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1873 ; HSA-NEXT: v_mov_b32_e32 v0, 1.0
1874 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
1875 ; HSA-NEXT: s_mov_b32 s32, 0
1876 ; HSA-NEXT: s_getpc_b64 s[8:9]
1877 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f32@rel32@lo+4
1878 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f32@rel32@hi+12
1879 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1880 ; HSA-NEXT: s_endpgm
1881 call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
1885 define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
1886 ; VI-LABEL: test_call_external_void_func_v3f32_imm:
1888 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1889 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1890 ; VI-NEXT: s_mov_b32 s38, -1
1891 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1892 ; VI-NEXT: s_add_u32 s36, s36, s3
1893 ; VI-NEXT: s_addc_u32 s37, s37, 0
1894 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1895 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1896 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1897 ; VI-NEXT: v_mov_b32_e32 v0, 1.0
1898 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
1899 ; VI-NEXT: v_mov_b32_e32 v2, 4.0
1900 ; VI-NEXT: s_mov_b32 s32, 0
1901 ; VI-NEXT: s_getpc_b64 s[4:5]
1902 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1903 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1904 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1907 ; CI-LABEL: test_call_external_void_func_v3f32_imm:
1909 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1910 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1911 ; CI-NEXT: s_mov_b32 s38, -1
1912 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
1913 ; CI-NEXT: s_add_u32 s36, s36, s3
1914 ; CI-NEXT: s_addc_u32 s37, s37, 0
1915 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
1916 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
1917 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
1918 ; CI-NEXT: v_mov_b32_e32 v0, 1.0
1919 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
1920 ; CI-NEXT: v_mov_b32_e32 v2, 4.0
1921 ; CI-NEXT: s_mov_b32 s32, 0
1922 ; CI-NEXT: s_getpc_b64 s[4:5]
1923 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1924 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1925 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
1928 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
1930 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1931 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1932 ; GFX9-NEXT: s_mov_b32 s38, -1
1933 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
1934 ; GFX9-NEXT: s_add_u32 s36, s36, s3
1935 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
1936 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
1937 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
1938 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
1939 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
1940 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
1941 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
1942 ; GFX9-NEXT: s_mov_b32 s32, 0
1943 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1944 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1945 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1946 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1947 ; GFX9-NEXT: s_endpgm
1949 ; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
1951 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
1952 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0
1953 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
1954 ; GFX11-NEXT: s_mov_b32 s32, 0
1955 ; GFX11-NEXT: s_getpc_b64 s[2:3]
1956 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f32@rel32@lo+4
1957 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f32@rel32@hi+12
1958 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1959 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
1960 ; GFX11-NEXT: s_endpgm
1962 ; HSA-LABEL: test_call_external_void_func_v3f32_imm:
1964 ; HSA-NEXT: s_add_i32 s6, s6, s9
1965 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
1966 ; HSA-NEXT: s_add_u32 s0, s0, s9
1967 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
1968 ; HSA-NEXT: s_addc_u32 s1, s1, 0
1969 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
1970 ; HSA-NEXT: v_mov_b32_e32 v0, 1.0
1971 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
1972 ; HSA-NEXT: v_mov_b32_e32 v2, 4.0
1973 ; HSA-NEXT: s_mov_b32 s32, 0
1974 ; HSA-NEXT: s_getpc_b64 s[8:9]
1975 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f32@rel32@lo+4
1976 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32@rel32@hi+12
1977 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
1978 ; HSA-NEXT: s_endpgm
1979 call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
1983 define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
1984 ; VI-LABEL: test_call_external_void_func_v5f32_imm:
1986 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1987 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1988 ; VI-NEXT: s_mov_b32 s38, -1
1989 ; VI-NEXT: s_mov_b32 s39, 0xe80000
1990 ; VI-NEXT: s_add_u32 s36, s36, s3
1991 ; VI-NEXT: s_addc_u32 s37, s37, 0
1992 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
1993 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
1994 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
1995 ; VI-NEXT: v_mov_b32_e32 v0, 1.0
1996 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
1997 ; VI-NEXT: v_mov_b32_e32 v2, 4.0
1998 ; VI-NEXT: v_mov_b32_e32 v3, -1.0
1999 ; VI-NEXT: v_mov_b32_e32 v4, 0.5
2000 ; VI-NEXT: s_mov_b32 s32, 0
2001 ; VI-NEXT: s_getpc_b64 s[4:5]
2002 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
2003 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
2004 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2007 ; CI-LABEL: test_call_external_void_func_v5f32_imm:
2009 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2010 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2011 ; CI-NEXT: s_mov_b32 s38, -1
2012 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2013 ; CI-NEXT: s_add_u32 s36, s36, s3
2014 ; CI-NEXT: s_addc_u32 s37, s37, 0
2015 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2016 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2017 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2018 ; CI-NEXT: v_mov_b32_e32 v0, 1.0
2019 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
2020 ; CI-NEXT: v_mov_b32_e32 v2, 4.0
2021 ; CI-NEXT: v_mov_b32_e32 v3, -1.0
2022 ; CI-NEXT: v_mov_b32_e32 v4, 0.5
2023 ; CI-NEXT: s_mov_b32 s32, 0
2024 ; CI-NEXT: s_getpc_b64 s[4:5]
2025 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
2026 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
2027 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2030 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
2032 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2033 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2034 ; GFX9-NEXT: s_mov_b32 s38, -1
2035 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2036 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2037 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2038 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2039 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2040 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2041 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
2042 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2043 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
2044 ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0
2045 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5
2046 ; GFX9-NEXT: s_mov_b32 s32, 0
2047 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2048 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
2049 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
2050 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2051 ; GFX9-NEXT: s_endpgm
2053 ; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
2055 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2056 ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0
2057 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5
2058 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2059 ; GFX11-NEXT: s_mov_b32 s32, 0
2060 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2061 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v5f32@rel32@lo+4
2062 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v5f32@rel32@hi+12
2063 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2064 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2065 ; GFX11-NEXT: s_endpgm
2067 ; HSA-LABEL: test_call_external_void_func_v5f32_imm:
2069 ; HSA-NEXT: s_add_i32 s6, s6, s9
2070 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2071 ; HSA-NEXT: s_add_u32 s0, s0, s9
2072 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2073 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2074 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2075 ; HSA-NEXT: v_mov_b32_e32 v0, 1.0
2076 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
2077 ; HSA-NEXT: v_mov_b32_e32 v2, 4.0
2078 ; HSA-NEXT: v_mov_b32_e32 v3, -1.0
2079 ; HSA-NEXT: v_mov_b32_e32 v4, 0.5
2080 ; HSA-NEXT: s_mov_b32 s32, 0
2081 ; HSA-NEXT: s_getpc_b64 s[8:9]
2082 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v5f32@rel32@lo+4
2083 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v5f32@rel32@hi+12
2084 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2085 ; HSA-NEXT: s_endpgm
2086 call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
2090 define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
2091 ; VI-LABEL: test_call_external_void_func_f64_imm:
2093 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2094 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2095 ; VI-NEXT: s_mov_b32 s38, -1
2096 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2097 ; VI-NEXT: s_add_u32 s36, s36, s3
2098 ; VI-NEXT: s_addc_u32 s37, s37, 0
2099 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2100 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2101 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2102 ; VI-NEXT: v_mov_b32_e32 v0, 0
2103 ; VI-NEXT: v_mov_b32_e32 v1, 0x40100000
2104 ; VI-NEXT: s_mov_b32 s32, 0
2105 ; VI-NEXT: s_getpc_b64 s[4:5]
2106 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2107 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2108 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2111 ; CI-LABEL: test_call_external_void_func_f64_imm:
2113 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2114 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2115 ; CI-NEXT: s_mov_b32 s38, -1
2116 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2117 ; CI-NEXT: s_add_u32 s36, s36, s3
2118 ; CI-NEXT: s_addc_u32 s37, s37, 0
2119 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2120 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2121 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2122 ; CI-NEXT: v_mov_b32_e32 v0, 0
2123 ; CI-NEXT: v_mov_b32_e32 v1, 0x40100000
2124 ; CI-NEXT: s_mov_b32 s32, 0
2125 ; CI-NEXT: s_getpc_b64 s[4:5]
2126 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2127 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2128 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2131 ; GFX9-LABEL: test_call_external_void_func_f64_imm:
2133 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2134 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2135 ; GFX9-NEXT: s_mov_b32 s38, -1
2136 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2137 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2138 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2139 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2140 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2141 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2142 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2143 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
2144 ; GFX9-NEXT: s_mov_b32 s32, 0
2145 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2146 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2147 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2148 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2149 ; GFX9-NEXT: s_endpgm
2151 ; GFX11-LABEL: test_call_external_void_func_f64_imm:
2153 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000
2154 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2155 ; GFX11-NEXT: s_mov_b32 s32, 0
2156 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2157 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f64@rel32@lo+4
2158 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f64@rel32@hi+12
2159 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2160 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2161 ; GFX11-NEXT: s_endpgm
2163 ; HSA-LABEL: test_call_external_void_func_f64_imm:
2165 ; HSA-NEXT: s_add_i32 s6, s6, s9
2166 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2167 ; HSA-NEXT: s_add_u32 s0, s0, s9
2168 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2169 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2170 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2171 ; HSA-NEXT: v_mov_b32_e32 v0, 0
2172 ; HSA-NEXT: v_mov_b32_e32 v1, 0x40100000
2173 ; HSA-NEXT: s_mov_b32 s32, 0
2174 ; HSA-NEXT: s_getpc_b64 s[8:9]
2175 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f64@rel32@lo+4
2176 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f64@rel32@hi+12
2177 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2178 ; HSA-NEXT: s_endpgm
2179 call void @external_void_func_f64(double 4.0)
2183 define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
2184 ; VI-LABEL: test_call_external_void_func_v2f64_imm:
2186 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2187 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2188 ; VI-NEXT: s_mov_b32 s38, -1
2189 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2190 ; VI-NEXT: s_add_u32 s36, s36, s3
2191 ; VI-NEXT: s_addc_u32 s37, s37, 0
2192 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2193 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2194 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2195 ; VI-NEXT: v_mov_b32_e32 v0, 0
2196 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
2197 ; VI-NEXT: v_mov_b32_e32 v2, 0
2198 ; VI-NEXT: v_mov_b32_e32 v3, 0x40100000
2199 ; VI-NEXT: s_mov_b32 s32, 0
2200 ; VI-NEXT: s_getpc_b64 s[4:5]
2201 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2202 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2203 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2206 ; CI-LABEL: test_call_external_void_func_v2f64_imm:
2208 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2209 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2210 ; CI-NEXT: s_mov_b32 s38, -1
2211 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2212 ; CI-NEXT: s_add_u32 s36, s36, s3
2213 ; CI-NEXT: s_addc_u32 s37, s37, 0
2214 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2215 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2216 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2217 ; CI-NEXT: v_mov_b32_e32 v0, 0
2218 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
2219 ; CI-NEXT: v_mov_b32_e32 v2, 0
2220 ; CI-NEXT: v_mov_b32_e32 v3, 0x40100000
2221 ; CI-NEXT: s_mov_b32 s32, 0
2222 ; CI-NEXT: s_getpc_b64 s[4:5]
2223 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2224 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2225 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2228 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
2230 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2231 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2232 ; GFX9-NEXT: s_mov_b32 s38, -1
2233 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2234 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2235 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2236 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2237 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2238 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2239 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2240 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2241 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2242 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2243 ; GFX9-NEXT: s_mov_b32 s32, 0
2244 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2245 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2246 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2247 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2248 ; GFX9-NEXT: s_endpgm
2250 ; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
2252 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2253 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2254 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2255 ; GFX11-NEXT: s_mov_b32 s32, 0
2256 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2257 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f64@rel32@lo+4
2258 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f64@rel32@hi+12
2259 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2260 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2261 ; GFX11-NEXT: s_endpgm
2263 ; HSA-LABEL: test_call_external_void_func_v2f64_imm:
2265 ; HSA-NEXT: s_add_i32 s6, s6, s9
2266 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2267 ; HSA-NEXT: s_add_u32 s0, s0, s9
2268 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2269 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2270 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2271 ; HSA-NEXT: v_mov_b32_e32 v0, 0
2272 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
2273 ; HSA-NEXT: v_mov_b32_e32 v2, 0
2274 ; HSA-NEXT: v_mov_b32_e32 v3, 0x40100000
2275 ; HSA-NEXT: s_mov_b32 s32, 0
2276 ; HSA-NEXT: s_getpc_b64 s[8:9]
2277 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f64@rel32@lo+4
2278 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64@rel32@hi+12
2279 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2280 ; HSA-NEXT: s_endpgm
2281 call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
2285 define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
2286 ; VI-LABEL: test_call_external_void_func_v3f64_imm:
2288 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2289 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2290 ; VI-NEXT: s_mov_b32 s38, -1
2291 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2292 ; VI-NEXT: s_add_u32 s36, s36, s3
2293 ; VI-NEXT: s_addc_u32 s37, s37, 0
2294 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2295 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2296 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2297 ; VI-NEXT: v_mov_b32_e32 v0, 0
2298 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
2299 ; VI-NEXT: v_mov_b32_e32 v2, 0
2300 ; VI-NEXT: v_mov_b32_e32 v3, 0x40100000
2301 ; VI-NEXT: v_mov_b32_e32 v4, 0
2302 ; VI-NEXT: v_mov_b32_e32 v5, 0x40200000
2303 ; VI-NEXT: s_mov_b32 s32, 0
2304 ; VI-NEXT: s_getpc_b64 s[4:5]
2305 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2306 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2307 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2310 ; CI-LABEL: test_call_external_void_func_v3f64_imm:
2312 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2313 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2314 ; CI-NEXT: s_mov_b32 s38, -1
2315 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2316 ; CI-NEXT: s_add_u32 s36, s36, s3
2317 ; CI-NEXT: s_addc_u32 s37, s37, 0
2318 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2319 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2320 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2321 ; CI-NEXT: v_mov_b32_e32 v0, 0
2322 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
2323 ; CI-NEXT: v_mov_b32_e32 v2, 0
2324 ; CI-NEXT: v_mov_b32_e32 v3, 0x40100000
2325 ; CI-NEXT: v_mov_b32_e32 v4, 0
2326 ; CI-NEXT: v_mov_b32_e32 v5, 0x40200000
2327 ; CI-NEXT: s_mov_b32 s32, 0
2328 ; CI-NEXT: s_getpc_b64 s[4:5]
2329 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2330 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2331 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2334 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
2336 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2337 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2338 ; GFX9-NEXT: s_mov_b32 s38, -1
2339 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2340 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2341 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2342 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2343 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2344 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2345 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2346 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2347 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2348 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2349 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
2350 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000
2351 ; GFX9-NEXT: s_mov_b32 s32, 0
2352 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2353 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2354 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2355 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2356 ; GFX9-NEXT: s_endpgm
2358 ; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
2360 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2361 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2362 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000
2363 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2364 ; GFX11-NEXT: s_mov_b32 s32, 0
2365 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2366 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f64@rel32@lo+4
2367 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f64@rel32@hi+12
2368 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2369 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2370 ; GFX11-NEXT: s_endpgm
2372 ; HSA-LABEL: test_call_external_void_func_v3f64_imm:
2374 ; HSA-NEXT: s_add_i32 s6, s6, s9
2375 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2376 ; HSA-NEXT: s_add_u32 s0, s0, s9
2377 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2378 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2379 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2380 ; HSA-NEXT: v_mov_b32_e32 v0, 0
2381 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0
2382 ; HSA-NEXT: v_mov_b32_e32 v2, 0
2383 ; HSA-NEXT: v_mov_b32_e32 v3, 0x40100000
2384 ; HSA-NEXT: v_mov_b32_e32 v4, 0
2385 ; HSA-NEXT: v_mov_b32_e32 v5, 0x40200000
2386 ; HSA-NEXT: s_mov_b32 s32, 0
2387 ; HSA-NEXT: s_getpc_b64 s[8:9]
2388 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f64@rel32@lo+4
2389 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f64@rel32@hi+12
2390 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2391 ; HSA-NEXT: s_endpgm
2392 call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
2396 define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
2397 ; VI-LABEL: test_call_external_void_func_v2i16:
2399 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2400 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2401 ; VI-NEXT: s_mov_b32 s38, -1
2402 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2403 ; VI-NEXT: s_add_u32 s36, s36, s3
2404 ; VI-NEXT: s_mov_b32 s3, 0xf000
2405 ; VI-NEXT: s_mov_b32 s2, -1
2406 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
2407 ; VI-NEXT: s_addc_u32 s37, s37, 0
2408 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2409 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2410 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2411 ; VI-NEXT: s_mov_b32 s32, 0
2412 ; VI-NEXT: s_getpc_b64 s[4:5]
2413 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2414 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2415 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2418 ; CI-LABEL: test_call_external_void_func_v2i16:
2420 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2421 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2422 ; CI-NEXT: s_mov_b32 s38, -1
2423 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2424 ; CI-NEXT: s_add_u32 s36, s36, s3
2425 ; CI-NEXT: s_mov_b32 s3, 0xf000
2426 ; CI-NEXT: s_mov_b32 s2, -1
2427 ; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0
2428 ; CI-NEXT: s_addc_u32 s37, s37, 0
2429 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2430 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2431 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2432 ; CI-NEXT: s_mov_b32 s32, 0
2433 ; CI-NEXT: s_getpc_b64 s[4:5]
2434 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2435 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2436 ; CI-NEXT: s_waitcnt vmcnt(0)
2437 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2438 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2441 ; GFX9-LABEL: test_call_external_void_func_v2i16:
2443 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2444 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2445 ; GFX9-NEXT: s_mov_b32 s38, -1
2446 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2447 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2448 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
2449 ; GFX9-NEXT: s_mov_b32 s2, -1
2450 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
2451 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2452 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2453 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2454 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2455 ; GFX9-NEXT: s_mov_b32 s32, 0
2456 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2457 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2458 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2459 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2460 ; GFX9-NEXT: s_endpgm
2462 ; GFX11-LABEL: test_call_external_void_func_v2i16:
2464 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
2465 ; GFX11-NEXT: s_mov_b32 s2, -1
2466 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2467 ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
2468 ; GFX11-NEXT: s_mov_b32 s32, 0
2469 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2470 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i16@rel32@lo+4
2471 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i16@rel32@hi+12
2472 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2473 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2474 ; GFX11-NEXT: s_endpgm
2476 ; HSA-LABEL: test_call_external_void_func_v2i16:
2478 ; HSA-NEXT: s_add_i32 s6, s6, s9
2479 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2480 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2481 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
2482 ; HSA-NEXT: s_mov_b32 s6, -1
2483 ; HSA-NEXT: buffer_load_dword v0, off, s[4:7], 0
2484 ; HSA-NEXT: s_add_u32 s0, s0, s9
2485 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2486 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2487 ; HSA-NEXT: s_mov_b32 s32, 0
2488 ; HSA-NEXT: s_getpc_b64 s[8:9]
2489 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i16@rel32@lo+4
2490 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i16@rel32@hi+12
2491 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2492 ; HSA-NEXT: s_endpgm
2493 %val = load <2 x i16>, ptr addrspace(1) undef
2494 call void @external_void_func_v2i16(<2 x i16> %val)
2498 define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
2499 ; VI-LABEL: test_call_external_void_func_v3i16:
2501 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2502 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2503 ; VI-NEXT: s_mov_b32 s38, -1
2504 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2505 ; VI-NEXT: s_add_u32 s36, s36, s3
2506 ; VI-NEXT: s_mov_b32 s3, 0xf000
2507 ; VI-NEXT: s_mov_b32 s2, -1
2508 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2509 ; VI-NEXT: s_addc_u32 s37, s37, 0
2510 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2511 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2512 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2513 ; VI-NEXT: s_mov_b32 s32, 0
2514 ; VI-NEXT: s_getpc_b64 s[4:5]
2515 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2516 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2517 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2520 ; CI-LABEL: test_call_external_void_func_v3i16:
2522 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2523 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2524 ; CI-NEXT: s_mov_b32 s38, -1
2525 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2526 ; CI-NEXT: s_add_u32 s36, s36, s3
2527 ; CI-NEXT: s_mov_b32 s3, 0xf000
2528 ; CI-NEXT: s_mov_b32 s2, -1
2529 ; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0
2530 ; CI-NEXT: s_addc_u32 s37, s37, 0
2531 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2532 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2533 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2534 ; CI-NEXT: s_mov_b32 s32, 0
2535 ; CI-NEXT: s_getpc_b64 s[4:5]
2536 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2537 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2538 ; CI-NEXT: s_waitcnt vmcnt(0)
2539 ; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16
2540 ; CI-NEXT: v_mov_b32_e32 v0, v2
2541 ; CI-NEXT: v_mov_b32_e32 v2, v3
2542 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2545 ; GFX9-LABEL: test_call_external_void_func_v3i16:
2547 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2548 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2549 ; GFX9-NEXT: s_mov_b32 s38, -1
2550 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2551 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2552 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
2553 ; GFX9-NEXT: s_mov_b32 s2, -1
2554 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2555 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2556 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2557 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2558 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2559 ; GFX9-NEXT: s_mov_b32 s32, 0
2560 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2561 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2562 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2563 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2564 ; GFX9-NEXT: s_endpgm
2566 ; GFX11-LABEL: test_call_external_void_func_v3i16:
2568 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
2569 ; GFX11-NEXT: s_mov_b32 s2, -1
2570 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2571 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
2572 ; GFX11-NEXT: s_mov_b32 s32, 0
2573 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2574 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4
2575 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12
2576 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2577 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2578 ; GFX11-NEXT: s_endpgm
2580 ; HSA-LABEL: test_call_external_void_func_v3i16:
2582 ; HSA-NEXT: s_add_i32 s6, s6, s9
2583 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2584 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2585 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
2586 ; HSA-NEXT: s_mov_b32 s6, -1
2587 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2588 ; HSA-NEXT: s_add_u32 s0, s0, s9
2589 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2590 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2591 ; HSA-NEXT: s_mov_b32 s32, 0
2592 ; HSA-NEXT: s_getpc_b64 s[8:9]
2593 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4
2594 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12
2595 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2596 ; HSA-NEXT: s_endpgm
2597 %val = load <3 x i16>, ptr addrspace(1) undef
2598 call void @external_void_func_v3i16(<3 x i16> %val)
2602 define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
2603 ; VI-LABEL: test_call_external_void_func_v3f16:
2605 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2606 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2607 ; VI-NEXT: s_mov_b32 s38, -1
2608 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2609 ; VI-NEXT: s_add_u32 s36, s36, s3
2610 ; VI-NEXT: s_mov_b32 s3, 0xf000
2611 ; VI-NEXT: s_mov_b32 s2, -1
2612 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2613 ; VI-NEXT: s_addc_u32 s37, s37, 0
2614 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2615 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2616 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2617 ; VI-NEXT: s_mov_b32 s32, 0
2618 ; VI-NEXT: s_getpc_b64 s[4:5]
2619 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2620 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2621 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2624 ; CI-LABEL: test_call_external_void_func_v3f16:
2626 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2627 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2628 ; CI-NEXT: s_mov_b32 s38, -1
2629 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2630 ; CI-NEXT: s_add_u32 s36, s36, s3
2631 ; CI-NEXT: s_mov_b32 s3, 0xf000
2632 ; CI-NEXT: s_mov_b32 s2, -1
2633 ; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
2634 ; CI-NEXT: s_addc_u32 s37, s37, 0
2635 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2636 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2637 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2638 ; CI-NEXT: s_mov_b32 s32, 0
2639 ; CI-NEXT: s_getpc_b64 s[4:5]
2640 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2641 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2642 ; CI-NEXT: s_waitcnt vmcnt(0)
2643 ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1
2644 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2645 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
2646 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
2647 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2650 ; GFX9-LABEL: test_call_external_void_func_v3f16:
2652 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2653 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2654 ; GFX9-NEXT: s_mov_b32 s38, -1
2655 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2656 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2657 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
2658 ; GFX9-NEXT: s_mov_b32 s2, -1
2659 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2660 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2661 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2662 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2663 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2664 ; GFX9-NEXT: s_mov_b32 s32, 0
2665 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2666 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2667 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2668 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2669 ; GFX9-NEXT: s_endpgm
2671 ; GFX11-LABEL: test_call_external_void_func_v3f16:
2673 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
2674 ; GFX11-NEXT: s_mov_b32 s2, -1
2675 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2676 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
2677 ; GFX11-NEXT: s_mov_b32 s32, 0
2678 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2679 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4
2680 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12
2681 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2682 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2683 ; GFX11-NEXT: s_endpgm
2685 ; HSA-LABEL: test_call_external_void_func_v3f16:
2687 ; HSA-NEXT: s_add_i32 s6, s6, s9
2688 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2689 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2690 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
2691 ; HSA-NEXT: s_mov_b32 s6, -1
2692 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2693 ; HSA-NEXT: s_add_u32 s0, s0, s9
2694 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2695 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2696 ; HSA-NEXT: s_mov_b32 s32, 0
2697 ; HSA-NEXT: s_getpc_b64 s[8:9]
2698 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4
2699 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12
2700 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2701 ; HSA-NEXT: s_endpgm
2702 %val = load <3 x half>, ptr addrspace(1) undef
2703 call void @external_void_func_v3f16(<3 x half> %val)
2707 define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
2708 ; VI-LABEL: test_call_external_void_func_v3i16_imm:
2710 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2711 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2712 ; VI-NEXT: s_mov_b32 s38, -1
2713 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2714 ; VI-NEXT: s_add_u32 s36, s36, s3
2715 ; VI-NEXT: s_addc_u32 s37, s37, 0
2716 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2717 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2718 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2719 ; VI-NEXT: v_mov_b32_e32 v0, 0x20001
2720 ; VI-NEXT: v_mov_b32_e32 v1, 3
2721 ; VI-NEXT: s_mov_b32 s32, 0
2722 ; VI-NEXT: s_getpc_b64 s[4:5]
2723 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2724 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2725 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2728 ; CI-LABEL: test_call_external_void_func_v3i16_imm:
2730 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2731 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2732 ; CI-NEXT: s_mov_b32 s38, -1
2733 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2734 ; CI-NEXT: s_add_u32 s36, s36, s3
2735 ; CI-NEXT: s_addc_u32 s37, s37, 0
2736 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2737 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2738 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2739 ; CI-NEXT: v_mov_b32_e32 v0, 1
2740 ; CI-NEXT: v_mov_b32_e32 v1, 2
2741 ; CI-NEXT: v_mov_b32_e32 v2, 3
2742 ; CI-NEXT: s_mov_b32 s32, 0
2743 ; CI-NEXT: s_getpc_b64 s[4:5]
2744 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2745 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2746 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2749 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
2751 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2752 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2753 ; GFX9-NEXT: s_mov_b32 s38, -1
2754 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2755 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2756 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2757 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2758 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2759 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2760 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
2761 ; GFX9-NEXT: v_mov_b32_e32 v1, 3
2762 ; GFX9-NEXT: s_mov_b32 s32, 0
2763 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2764 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2765 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2766 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2767 ; GFX9-NEXT: s_endpgm
2769 ; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
2771 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
2772 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2773 ; GFX11-NEXT: s_mov_b32 s32, 0
2774 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2775 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4
2776 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12
2777 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2778 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2779 ; GFX11-NEXT: s_endpgm
2781 ; HSA-LABEL: test_call_external_void_func_v3i16_imm:
2783 ; HSA-NEXT: s_add_i32 s6, s6, s9
2784 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2785 ; HSA-NEXT: s_add_u32 s0, s0, s9
2786 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2787 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2788 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2789 ; HSA-NEXT: v_mov_b32_e32 v0, 0x20001
2790 ; HSA-NEXT: v_mov_b32_e32 v1, 3
2791 ; HSA-NEXT: s_mov_b32 s32, 0
2792 ; HSA-NEXT: s_getpc_b64 s[8:9]
2793 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4
2794 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12
2795 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2796 ; HSA-NEXT: s_endpgm
2797 call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
2801 define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
2802 ; VI-LABEL: test_call_external_void_func_v3f16_imm:
2804 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2805 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2806 ; VI-NEXT: s_mov_b32 s38, -1
2807 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2808 ; VI-NEXT: s_add_u32 s36, s36, s3
2809 ; VI-NEXT: s_addc_u32 s37, s37, 0
2810 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2811 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2812 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2813 ; VI-NEXT: v_mov_b32_e32 v0, 0x40003c00
2814 ; VI-NEXT: v_mov_b32_e32 v1, 0x4400
2815 ; VI-NEXT: s_mov_b32 s32, 0
2816 ; VI-NEXT: s_getpc_b64 s[4:5]
2817 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2818 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2819 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2822 ; CI-LABEL: test_call_external_void_func_v3f16_imm:
2824 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2825 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2826 ; CI-NEXT: s_mov_b32 s38, -1
2827 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2828 ; CI-NEXT: s_add_u32 s36, s36, s3
2829 ; CI-NEXT: s_addc_u32 s37, s37, 0
2830 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2831 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2832 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2833 ; CI-NEXT: v_mov_b32_e32 v0, 1.0
2834 ; CI-NEXT: v_mov_b32_e32 v1, 2.0
2835 ; CI-NEXT: v_mov_b32_e32 v2, 4.0
2836 ; CI-NEXT: s_mov_b32 s32, 0
2837 ; CI-NEXT: s_getpc_b64 s[4:5]
2838 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2839 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2840 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2843 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
2845 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2846 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2847 ; GFX9-NEXT: s_mov_b32 s38, -1
2848 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2849 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2850 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2851 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2852 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2853 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2854 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
2855 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
2856 ; GFX9-NEXT: s_mov_b32 s32, 0
2857 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2858 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2859 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2860 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2861 ; GFX9-NEXT: s_endpgm
2863 ; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
2865 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00
2866 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400
2867 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2868 ; GFX11-NEXT: s_mov_b32 s32, 0
2869 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2870 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4
2871 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12
2872 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2873 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2874 ; GFX11-NEXT: s_endpgm
2876 ; HSA-LABEL: test_call_external_void_func_v3f16_imm:
2878 ; HSA-NEXT: s_add_i32 s6, s6, s9
2879 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2880 ; HSA-NEXT: s_add_u32 s0, s0, s9
2881 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2882 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2883 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2884 ; HSA-NEXT: v_mov_b32_e32 v0, 0x40003c00
2885 ; HSA-NEXT: v_mov_b32_e32 v1, 0x4400
2886 ; HSA-NEXT: s_mov_b32 s32, 0
2887 ; HSA-NEXT: s_getpc_b64 s[8:9]
2888 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4
2889 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12
2890 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2891 ; HSA-NEXT: s_endpgm
2892 call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
2896 define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
2897 ; VI-LABEL: test_call_external_void_func_v4i16:
2899 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2900 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2901 ; VI-NEXT: s_mov_b32 s38, -1
2902 ; VI-NEXT: s_mov_b32 s39, 0xe80000
2903 ; VI-NEXT: s_add_u32 s36, s36, s3
2904 ; VI-NEXT: s_mov_b32 s3, 0xf000
2905 ; VI-NEXT: s_mov_b32 s2, -1
2906 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2907 ; VI-NEXT: s_addc_u32 s37, s37, 0
2908 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
2909 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
2910 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
2911 ; VI-NEXT: s_mov_b32 s32, 0
2912 ; VI-NEXT: s_getpc_b64 s[4:5]
2913 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2914 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2915 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2918 ; CI-LABEL: test_call_external_void_func_v4i16:
2920 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2921 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2922 ; CI-NEXT: s_mov_b32 s38, -1
2923 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
2924 ; CI-NEXT: s_add_u32 s36, s36, s3
2925 ; CI-NEXT: s_mov_b32 s3, 0xf000
2926 ; CI-NEXT: s_mov_b32 s2, -1
2927 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2928 ; CI-NEXT: s_addc_u32 s37, s37, 0
2929 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
2930 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
2931 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
2932 ; CI-NEXT: s_mov_b32 s32, 0
2933 ; CI-NEXT: s_getpc_b64 s[4:5]
2934 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2935 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2936 ; CI-NEXT: s_waitcnt vmcnt(0)
2937 ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
2938 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1
2939 ; CI-NEXT: v_mov_b32_e32 v2, v1
2940 ; CI-NEXT: v_mov_b32_e32 v1, v4
2941 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
2944 ; GFX9-LABEL: test_call_external_void_func_v4i16:
2946 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2947 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2948 ; GFX9-NEXT: s_mov_b32 s38, -1
2949 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
2950 ; GFX9-NEXT: s_add_u32 s36, s36, s3
2951 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
2952 ; GFX9-NEXT: s_mov_b32 s2, -1
2953 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2954 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
2955 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
2956 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
2957 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
2958 ; GFX9-NEXT: s_mov_b32 s32, 0
2959 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2960 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2961 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2962 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2963 ; GFX9-NEXT: s_endpgm
2965 ; GFX11-LABEL: test_call_external_void_func_v4i16:
2967 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
2968 ; GFX11-NEXT: s_mov_b32 s2, -1
2969 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
2970 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
2971 ; GFX11-NEXT: s_mov_b32 s32, 0
2972 ; GFX11-NEXT: s_getpc_b64 s[2:3]
2973 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4
2974 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12
2975 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2976 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
2977 ; GFX11-NEXT: s_endpgm
2979 ; HSA-LABEL: test_call_external_void_func_v4i16:
2981 ; HSA-NEXT: s_add_i32 s6, s6, s9
2982 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
2983 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
2984 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
2985 ; HSA-NEXT: s_mov_b32 s6, -1
2986 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2987 ; HSA-NEXT: s_add_u32 s0, s0, s9
2988 ; HSA-NEXT: s_addc_u32 s1, s1, 0
2989 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
2990 ; HSA-NEXT: s_mov_b32 s32, 0
2991 ; HSA-NEXT: s_getpc_b64 s[8:9]
2992 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4
2993 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12
2994 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
2995 ; HSA-NEXT: s_endpgm
2996 %val = load <4 x i16>, ptr addrspace(1) undef
2997 call void @external_void_func_v4i16(<4 x i16> %val)
3001 define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
3002 ; VI-LABEL: test_call_external_void_func_v4i16_imm:
3004 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3005 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3006 ; VI-NEXT: s_mov_b32 s38, -1
3007 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3008 ; VI-NEXT: s_add_u32 s36, s36, s3
3009 ; VI-NEXT: s_addc_u32 s37, s37, 0
3010 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3011 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3012 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3013 ; VI-NEXT: v_mov_b32_e32 v0, 0x20001
3014 ; VI-NEXT: v_mov_b32_e32 v1, 0x40003
3015 ; VI-NEXT: s_mov_b32 s32, 0
3016 ; VI-NEXT: s_getpc_b64 s[4:5]
3017 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
3018 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
3019 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3022 ; CI-LABEL: test_call_external_void_func_v4i16_imm:
3024 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3025 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3026 ; CI-NEXT: s_mov_b32 s38, -1
3027 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3028 ; CI-NEXT: s_add_u32 s36, s36, s3
3029 ; CI-NEXT: s_addc_u32 s37, s37, 0
3030 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3031 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3032 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3033 ; CI-NEXT: v_mov_b32_e32 v0, 1
3034 ; CI-NEXT: v_mov_b32_e32 v1, 2
3035 ; CI-NEXT: v_mov_b32_e32 v2, 3
3036 ; CI-NEXT: v_mov_b32_e32 v3, 4
3037 ; CI-NEXT: s_mov_b32 s32, 0
3038 ; CI-NEXT: s_getpc_b64 s[4:5]
3039 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
3040 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
3041 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3044 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
3046 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3047 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3048 ; GFX9-NEXT: s_mov_b32 s38, -1
3049 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3050 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3051 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3052 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3053 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3054 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3055 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
3056 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
3057 ; GFX9-NEXT: s_mov_b32 s32, 0
3058 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3059 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
3060 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
3061 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3062 ; GFX9-NEXT: s_endpgm
3064 ; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
3066 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
3067 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003
3068 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3069 ; GFX11-NEXT: s_mov_b32 s32, 0
3070 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3071 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4
3072 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12
3073 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3074 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3075 ; GFX11-NEXT: s_endpgm
3077 ; HSA-LABEL: test_call_external_void_func_v4i16_imm:
3079 ; HSA-NEXT: s_add_i32 s6, s6, s9
3080 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3081 ; HSA-NEXT: s_add_u32 s0, s0, s9
3082 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3083 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3084 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3085 ; HSA-NEXT: v_mov_b32_e32 v0, 0x20001
3086 ; HSA-NEXT: v_mov_b32_e32 v1, 0x40003
3087 ; HSA-NEXT: s_mov_b32 s32, 0
3088 ; HSA-NEXT: s_getpc_b64 s[8:9]
3089 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4
3090 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12
3091 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3092 ; HSA-NEXT: s_endpgm
3093 call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
3097 define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
3098 ; VI-LABEL: test_call_external_void_func_v2f16:
3100 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3101 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3102 ; VI-NEXT: s_mov_b32 s38, -1
3103 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3104 ; VI-NEXT: s_add_u32 s36, s36, s3
3105 ; VI-NEXT: s_mov_b32 s3, 0xf000
3106 ; VI-NEXT: s_mov_b32 s2, -1
3107 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
3108 ; VI-NEXT: s_addc_u32 s37, s37, 0
3109 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3110 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3111 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3112 ; VI-NEXT: s_mov_b32 s32, 0
3113 ; VI-NEXT: s_getpc_b64 s[4:5]
3114 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3115 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3116 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3119 ; CI-LABEL: test_call_external_void_func_v2f16:
3121 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3122 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3123 ; CI-NEXT: s_mov_b32 s38, -1
3124 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3125 ; CI-NEXT: s_add_u32 s36, s36, s3
3126 ; CI-NEXT: s_mov_b32 s3, 0xf000
3127 ; CI-NEXT: s_mov_b32 s2, -1
3128 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0
3129 ; CI-NEXT: s_addc_u32 s37, s37, 0
3130 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3131 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3132 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3133 ; CI-NEXT: s_mov_b32 s32, 0
3134 ; CI-NEXT: s_getpc_b64 s[4:5]
3135 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3136 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3137 ; CI-NEXT: s_waitcnt vmcnt(0)
3138 ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1
3139 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3140 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
3141 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3144 ; GFX9-LABEL: test_call_external_void_func_v2f16:
3146 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3147 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3148 ; GFX9-NEXT: s_mov_b32 s38, -1
3149 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3150 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3151 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
3152 ; GFX9-NEXT: s_mov_b32 s2, -1
3153 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
3154 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3155 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3156 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3157 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3158 ; GFX9-NEXT: s_mov_b32 s32, 0
3159 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3160 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3161 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3162 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3163 ; GFX9-NEXT: s_endpgm
3165 ; GFX11-LABEL: test_call_external_void_func_v2f16:
3167 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
3168 ; GFX11-NEXT: s_mov_b32 s2, -1
3169 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3170 ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
3171 ; GFX11-NEXT: s_mov_b32 s32, 0
3172 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3173 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f16@rel32@lo+4
3174 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f16@rel32@hi+12
3175 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3176 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3177 ; GFX11-NEXT: s_endpgm
3179 ; HSA-LABEL: test_call_external_void_func_v2f16:
3181 ; HSA-NEXT: s_add_i32 s6, s6, s9
3182 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3183 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3184 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
3185 ; HSA-NEXT: s_mov_b32 s6, -1
3186 ; HSA-NEXT: buffer_load_dword v0, off, s[4:7], 0
3187 ; HSA-NEXT: s_add_u32 s0, s0, s9
3188 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3189 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3190 ; HSA-NEXT: s_mov_b32 s32, 0
3191 ; HSA-NEXT: s_getpc_b64 s[8:9]
3192 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f16@rel32@lo+4
3193 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f16@rel32@hi+12
3194 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3195 ; HSA-NEXT: s_endpgm
3196 %val = load <2 x half>, ptr addrspace(1) undef
3197 call void @external_void_func_v2f16(<2 x half> %val)
3201 define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
3202 ; VI-LABEL: test_call_external_void_func_v2i32:
3204 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3205 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3206 ; VI-NEXT: s_mov_b32 s38, -1
3207 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3208 ; VI-NEXT: s_add_u32 s36, s36, s3
3209 ; VI-NEXT: s_mov_b32 s3, 0xf000
3210 ; VI-NEXT: s_mov_b32 s2, -1
3211 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3212 ; VI-NEXT: s_addc_u32 s37, s37, 0
3213 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3214 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3215 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3216 ; VI-NEXT: s_mov_b32 s32, 0
3217 ; VI-NEXT: s_getpc_b64 s[4:5]
3218 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3219 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3220 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3223 ; CI-LABEL: test_call_external_void_func_v2i32:
3225 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3226 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3227 ; CI-NEXT: s_mov_b32 s38, -1
3228 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3229 ; CI-NEXT: s_add_u32 s36, s36, s3
3230 ; CI-NEXT: s_mov_b32 s3, 0xf000
3231 ; CI-NEXT: s_mov_b32 s2, -1
3232 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3233 ; CI-NEXT: s_addc_u32 s37, s37, 0
3234 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3235 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3236 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3237 ; CI-NEXT: s_mov_b32 s32, 0
3238 ; CI-NEXT: s_getpc_b64 s[4:5]
3239 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3240 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3241 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3244 ; GFX9-LABEL: test_call_external_void_func_v2i32:
3246 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3247 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3248 ; GFX9-NEXT: s_mov_b32 s38, -1
3249 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3250 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3251 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
3252 ; GFX9-NEXT: s_mov_b32 s2, -1
3253 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3254 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3255 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3256 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3257 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3258 ; GFX9-NEXT: s_mov_b32 s32, 0
3259 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3260 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3261 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3262 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3263 ; GFX9-NEXT: s_endpgm
3265 ; GFX11-LABEL: test_call_external_void_func_v2i32:
3267 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
3268 ; GFX11-NEXT: s_mov_b32 s2, -1
3269 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3270 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
3271 ; GFX11-NEXT: s_mov_b32 s32, 0
3272 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3273 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4
3274 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12
3275 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3276 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3277 ; GFX11-NEXT: s_endpgm
3279 ; HSA-LABEL: test_call_external_void_func_v2i32:
3281 ; HSA-NEXT: s_add_i32 s6, s6, s9
3282 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3283 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3284 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
3285 ; HSA-NEXT: s_mov_b32 s6, -1
3286 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
3287 ; HSA-NEXT: s_add_u32 s0, s0, s9
3288 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3289 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3290 ; HSA-NEXT: s_mov_b32 s32, 0
3291 ; HSA-NEXT: s_getpc_b64 s[8:9]
3292 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4
3293 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12
3294 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3295 ; HSA-NEXT: s_endpgm
3296 %val = load <2 x i32>, ptr addrspace(1) undef
3297 call void @external_void_func_v2i32(<2 x i32> %val)
3301 define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
3302 ; VI-LABEL: test_call_external_void_func_v2i32_imm:
3304 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3305 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3306 ; VI-NEXT: s_mov_b32 s38, -1
3307 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3308 ; VI-NEXT: s_add_u32 s36, s36, s3
3309 ; VI-NEXT: s_addc_u32 s37, s37, 0
3310 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3311 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3312 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3313 ; VI-NEXT: v_mov_b32_e32 v0, 1
3314 ; VI-NEXT: v_mov_b32_e32 v1, 2
3315 ; VI-NEXT: s_mov_b32 s32, 0
3316 ; VI-NEXT: s_getpc_b64 s[4:5]
3317 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3318 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3319 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3322 ; CI-LABEL: test_call_external_void_func_v2i32_imm:
3324 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3325 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3326 ; CI-NEXT: s_mov_b32 s38, -1
3327 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3328 ; CI-NEXT: s_add_u32 s36, s36, s3
3329 ; CI-NEXT: s_addc_u32 s37, s37, 0
3330 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3331 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3332 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3333 ; CI-NEXT: v_mov_b32_e32 v0, 1
3334 ; CI-NEXT: v_mov_b32_e32 v1, 2
3335 ; CI-NEXT: s_mov_b32 s32, 0
3336 ; CI-NEXT: s_getpc_b64 s[4:5]
3337 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3338 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3339 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3342 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
3344 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3345 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3346 ; GFX9-NEXT: s_mov_b32 s38, -1
3347 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3348 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3349 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3350 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3351 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3352 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3353 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3354 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3355 ; GFX9-NEXT: s_mov_b32 s32, 0
3356 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3357 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3358 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3359 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3360 ; GFX9-NEXT: s_endpgm
3362 ; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
3364 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3365 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3366 ; GFX11-NEXT: s_mov_b32 s32, 0
3367 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3368 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4
3369 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12
3370 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3371 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3372 ; GFX11-NEXT: s_endpgm
3374 ; HSA-LABEL: test_call_external_void_func_v2i32_imm:
3376 ; HSA-NEXT: s_add_i32 s6, s6, s9
3377 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3378 ; HSA-NEXT: s_add_u32 s0, s0, s9
3379 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3380 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3381 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3382 ; HSA-NEXT: v_mov_b32_e32 v0, 1
3383 ; HSA-NEXT: v_mov_b32_e32 v1, 2
3384 ; HSA-NEXT: s_mov_b32 s32, 0
3385 ; HSA-NEXT: s_getpc_b64 s[8:9]
3386 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4
3387 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12
3388 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3389 ; HSA-NEXT: s_endpgm
3390 call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
3394 define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
3395 ; VI-LABEL: test_call_external_void_func_v3i32_imm:
3397 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3398 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3399 ; VI-NEXT: s_mov_b32 s38, -1
3400 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3401 ; VI-NEXT: s_add_u32 s36, s36, s5
3402 ; VI-NEXT: s_addc_u32 s37, s37, 0
3403 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3404 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3405 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3406 ; VI-NEXT: v_mov_b32_e32 v0, 3
3407 ; VI-NEXT: v_mov_b32_e32 v1, 4
3408 ; VI-NEXT: v_mov_b32_e32 v2, 5
3409 ; VI-NEXT: s_mov_b32 s32, 0
3410 ; VI-NEXT: s_getpc_b64 s[4:5]
3411 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3412 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3413 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3416 ; CI-LABEL: test_call_external_void_func_v3i32_imm:
3418 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3419 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3420 ; CI-NEXT: s_mov_b32 s38, -1
3421 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3422 ; CI-NEXT: s_add_u32 s36, s36, s5
3423 ; CI-NEXT: s_addc_u32 s37, s37, 0
3424 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3425 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3426 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3427 ; CI-NEXT: v_mov_b32_e32 v0, 3
3428 ; CI-NEXT: v_mov_b32_e32 v1, 4
3429 ; CI-NEXT: v_mov_b32_e32 v2, 5
3430 ; CI-NEXT: s_mov_b32 s32, 0
3431 ; CI-NEXT: s_getpc_b64 s[4:5]
3432 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3433 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3434 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3437 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
3439 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3440 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3441 ; GFX9-NEXT: s_mov_b32 s38, -1
3442 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3443 ; GFX9-NEXT: s_add_u32 s36, s36, s5
3444 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3445 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3446 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3447 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3448 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
3449 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
3450 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
3451 ; GFX9-NEXT: s_mov_b32 s32, 0
3452 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3453 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3454 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3455 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3456 ; GFX9-NEXT: s_endpgm
3458 ; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
3460 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
3461 ; GFX11-NEXT: v_mov_b32_e32 v2, 5
3462 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3463 ; GFX11-NEXT: s_mov_b32 s32, 0
3464 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3465 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i32@rel32@lo+4
3466 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i32@rel32@hi+12
3467 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3468 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3469 ; GFX11-NEXT: s_endpgm
3471 ; HSA-LABEL: test_call_external_void_func_v3i32_imm:
3473 ; HSA-NEXT: s_add_i32 s8, s8, s11
3474 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
3475 ; HSA-NEXT: s_add_u32 s0, s0, s11
3476 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3477 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3478 ; HSA-NEXT: v_mov_b32_e32 v0, 3
3479 ; HSA-NEXT: v_mov_b32_e32 v1, 4
3480 ; HSA-NEXT: v_mov_b32_e32 v2, 5
3481 ; HSA-NEXT: s_mov_b32 s32, 0
3482 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
3483 ; HSA-NEXT: s_getpc_b64 s[8:9]
3484 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i32@rel32@lo+4
3485 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32@rel32@hi+12
3486 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3487 ; HSA-NEXT: s_endpgm
3488 call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
3492 define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
3493 ; VI-LABEL: test_call_external_void_func_v3i32_i32:
3495 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3496 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3497 ; VI-NEXT: s_mov_b32 s38, -1
3498 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3499 ; VI-NEXT: s_add_u32 s36, s36, s5
3500 ; VI-NEXT: s_addc_u32 s37, s37, 0
3501 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3502 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3503 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3504 ; VI-NEXT: v_mov_b32_e32 v0, 3
3505 ; VI-NEXT: v_mov_b32_e32 v1, 4
3506 ; VI-NEXT: v_mov_b32_e32 v2, 5
3507 ; VI-NEXT: v_mov_b32_e32 v3, 6
3508 ; VI-NEXT: s_mov_b32 s32, 0
3509 ; VI-NEXT: s_getpc_b64 s[4:5]
3510 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3511 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3512 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3515 ; CI-LABEL: test_call_external_void_func_v3i32_i32:
3517 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3518 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3519 ; CI-NEXT: s_mov_b32 s38, -1
3520 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3521 ; CI-NEXT: s_add_u32 s36, s36, s5
3522 ; CI-NEXT: s_addc_u32 s37, s37, 0
3523 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3524 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3525 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3526 ; CI-NEXT: v_mov_b32_e32 v0, 3
3527 ; CI-NEXT: v_mov_b32_e32 v1, 4
3528 ; CI-NEXT: v_mov_b32_e32 v2, 5
3529 ; CI-NEXT: v_mov_b32_e32 v3, 6
3530 ; CI-NEXT: s_mov_b32 s32, 0
3531 ; CI-NEXT: s_getpc_b64 s[4:5]
3532 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3533 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3534 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3537 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
3539 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3540 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3541 ; GFX9-NEXT: s_mov_b32 s38, -1
3542 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3543 ; GFX9-NEXT: s_add_u32 s36, s36, s5
3544 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3545 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3546 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3547 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3548 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
3549 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
3550 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
3551 ; GFX9-NEXT: v_mov_b32_e32 v3, 6
3552 ; GFX9-NEXT: s_mov_b32 s32, 0
3553 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3554 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3555 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3556 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3557 ; GFX9-NEXT: s_endpgm
3559 ; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
3561 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
3562 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6
3563 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3564 ; GFX11-NEXT: s_mov_b32 s32, 0
3565 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3566 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i32_i32@rel32@lo+4
3567 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i32_i32@rel32@hi+12
3568 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3569 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3570 ; GFX11-NEXT: s_endpgm
3572 ; HSA-LABEL: test_call_external_void_func_v3i32_i32:
3574 ; HSA-NEXT: s_add_i32 s8, s8, s11
3575 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
3576 ; HSA-NEXT: s_add_u32 s0, s0, s11
3577 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3578 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3579 ; HSA-NEXT: v_mov_b32_e32 v0, 3
3580 ; HSA-NEXT: v_mov_b32_e32 v1, 4
3581 ; HSA-NEXT: v_mov_b32_e32 v2, 5
3582 ; HSA-NEXT: v_mov_b32_e32 v3, 6
3583 ; HSA-NEXT: s_mov_b32 s32, 0
3584 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
3585 ; HSA-NEXT: s_getpc_b64 s[8:9]
3586 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32@rel32@lo+4
3587 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32@rel32@hi+12
3588 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3589 ; HSA-NEXT: s_endpgm
3590 call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
3594 define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
3595 ; VI-LABEL: test_call_external_void_func_v4i32:
3597 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3598 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3599 ; VI-NEXT: s_mov_b32 s38, -1
3600 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3601 ; VI-NEXT: s_add_u32 s36, s36, s3
3602 ; VI-NEXT: s_mov_b32 s3, 0xf000
3603 ; VI-NEXT: s_mov_b32 s2, -1
3604 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3605 ; VI-NEXT: s_addc_u32 s37, s37, 0
3606 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3607 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3608 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3609 ; VI-NEXT: s_mov_b32 s32, 0
3610 ; VI-NEXT: s_getpc_b64 s[4:5]
3611 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3612 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3613 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3616 ; CI-LABEL: test_call_external_void_func_v4i32:
3618 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3619 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3620 ; CI-NEXT: s_mov_b32 s38, -1
3621 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3622 ; CI-NEXT: s_add_u32 s36, s36, s3
3623 ; CI-NEXT: s_mov_b32 s3, 0xf000
3624 ; CI-NEXT: s_mov_b32 s2, -1
3625 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3626 ; CI-NEXT: s_addc_u32 s37, s37, 0
3627 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3628 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3629 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3630 ; CI-NEXT: s_mov_b32 s32, 0
3631 ; CI-NEXT: s_getpc_b64 s[4:5]
3632 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3633 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3634 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3637 ; GFX9-LABEL: test_call_external_void_func_v4i32:
3639 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3640 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3641 ; GFX9-NEXT: s_mov_b32 s38, -1
3642 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3643 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3644 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
3645 ; GFX9-NEXT: s_mov_b32 s2, -1
3646 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3647 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3648 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3649 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3650 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3651 ; GFX9-NEXT: s_mov_b32 s32, 0
3652 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3653 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3654 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3655 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3656 ; GFX9-NEXT: s_endpgm
3658 ; GFX11-LABEL: test_call_external_void_func_v4i32:
3660 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
3661 ; GFX11-NEXT: s_mov_b32 s2, -1
3662 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3663 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
3664 ; GFX11-NEXT: s_mov_b32 s32, 0
3665 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3666 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4
3667 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12
3668 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3669 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3670 ; GFX11-NEXT: s_endpgm
3672 ; HSA-LABEL: test_call_external_void_func_v4i32:
3674 ; HSA-NEXT: s_add_i32 s6, s6, s9
3675 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3676 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3677 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
3678 ; HSA-NEXT: s_mov_b32 s6, -1
3679 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
3680 ; HSA-NEXT: s_add_u32 s0, s0, s9
3681 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3682 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3683 ; HSA-NEXT: s_mov_b32 s32, 0
3684 ; HSA-NEXT: s_getpc_b64 s[8:9]
3685 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4
3686 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12
3687 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3688 ; HSA-NEXT: s_endpgm
3689 %val = load <4 x i32>, ptr addrspace(1) undef
3690 call void @external_void_func_v4i32(<4 x i32> %val)
3694 define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
3695 ; VI-LABEL: test_call_external_void_func_v4i32_imm:
3697 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3698 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3699 ; VI-NEXT: s_mov_b32 s38, -1
3700 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3701 ; VI-NEXT: s_add_u32 s36, s36, s3
3702 ; VI-NEXT: s_addc_u32 s37, s37, 0
3703 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3704 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3705 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3706 ; VI-NEXT: v_mov_b32_e32 v0, 1
3707 ; VI-NEXT: v_mov_b32_e32 v1, 2
3708 ; VI-NEXT: v_mov_b32_e32 v2, 3
3709 ; VI-NEXT: v_mov_b32_e32 v3, 4
3710 ; VI-NEXT: s_mov_b32 s32, 0
3711 ; VI-NEXT: s_getpc_b64 s[4:5]
3712 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3713 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3714 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3717 ; CI-LABEL: test_call_external_void_func_v4i32_imm:
3719 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3720 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3721 ; CI-NEXT: s_mov_b32 s38, -1
3722 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3723 ; CI-NEXT: s_add_u32 s36, s36, s3
3724 ; CI-NEXT: s_addc_u32 s37, s37, 0
3725 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3726 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3727 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3728 ; CI-NEXT: v_mov_b32_e32 v0, 1
3729 ; CI-NEXT: v_mov_b32_e32 v1, 2
3730 ; CI-NEXT: v_mov_b32_e32 v2, 3
3731 ; CI-NEXT: v_mov_b32_e32 v3, 4
3732 ; CI-NEXT: s_mov_b32 s32, 0
3733 ; CI-NEXT: s_getpc_b64 s[4:5]
3734 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3735 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3736 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3739 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
3741 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3742 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3743 ; GFX9-NEXT: s_mov_b32 s38, -1
3744 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3745 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3746 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3747 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3748 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3749 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3750 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3751 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3752 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
3753 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
3754 ; GFX9-NEXT: s_mov_b32 s32, 0
3755 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3756 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3757 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3758 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3759 ; GFX9-NEXT: s_endpgm
3761 ; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
3763 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3764 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
3765 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3766 ; GFX11-NEXT: s_mov_b32 s32, 0
3767 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3768 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4
3769 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12
3770 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3771 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3772 ; GFX11-NEXT: s_endpgm
3774 ; HSA-LABEL: test_call_external_void_func_v4i32_imm:
3776 ; HSA-NEXT: s_add_i32 s6, s6, s9
3777 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3778 ; HSA-NEXT: s_add_u32 s0, s0, s9
3779 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3780 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3781 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3782 ; HSA-NEXT: v_mov_b32_e32 v0, 1
3783 ; HSA-NEXT: v_mov_b32_e32 v1, 2
3784 ; HSA-NEXT: v_mov_b32_e32 v2, 3
3785 ; HSA-NEXT: v_mov_b32_e32 v3, 4
3786 ; HSA-NEXT: s_mov_b32 s32, 0
3787 ; HSA-NEXT: s_getpc_b64 s[8:9]
3788 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4
3789 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12
3790 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3791 ; HSA-NEXT: s_endpgm
3792 call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
3796 define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
3797 ; VI-LABEL: test_call_external_void_func_v5i32_imm:
3799 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3800 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3801 ; VI-NEXT: s_mov_b32 s38, -1
3802 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3803 ; VI-NEXT: s_add_u32 s36, s36, s3
3804 ; VI-NEXT: s_addc_u32 s37, s37, 0
3805 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3806 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3807 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3808 ; VI-NEXT: v_mov_b32_e32 v0, 1
3809 ; VI-NEXT: v_mov_b32_e32 v1, 2
3810 ; VI-NEXT: v_mov_b32_e32 v2, 3
3811 ; VI-NEXT: v_mov_b32_e32 v3, 4
3812 ; VI-NEXT: v_mov_b32_e32 v4, 5
3813 ; VI-NEXT: s_mov_b32 s32, 0
3814 ; VI-NEXT: s_getpc_b64 s[4:5]
3815 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3816 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3817 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3820 ; CI-LABEL: test_call_external_void_func_v5i32_imm:
3822 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3823 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3824 ; CI-NEXT: s_mov_b32 s38, -1
3825 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3826 ; CI-NEXT: s_add_u32 s36, s36, s3
3827 ; CI-NEXT: s_addc_u32 s37, s37, 0
3828 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3829 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3830 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3831 ; CI-NEXT: v_mov_b32_e32 v0, 1
3832 ; CI-NEXT: v_mov_b32_e32 v1, 2
3833 ; CI-NEXT: v_mov_b32_e32 v2, 3
3834 ; CI-NEXT: v_mov_b32_e32 v3, 4
3835 ; CI-NEXT: v_mov_b32_e32 v4, 5
3836 ; CI-NEXT: s_mov_b32 s32, 0
3837 ; CI-NEXT: s_getpc_b64 s[4:5]
3838 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3839 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3840 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3843 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
3845 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3846 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3847 ; GFX9-NEXT: s_mov_b32 s38, -1
3848 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3849 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3850 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3851 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3852 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3853 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3854 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3855 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3856 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
3857 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
3858 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
3859 ; GFX9-NEXT: s_mov_b32 s32, 0
3860 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3861 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3862 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3863 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3864 ; GFX9-NEXT: s_endpgm
3866 ; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
3868 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3869 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
3870 ; GFX11-NEXT: v_mov_b32_e32 v4, 5
3871 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3872 ; GFX11-NEXT: s_mov_b32 s32, 0
3873 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3874 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v5i32@rel32@lo+4
3875 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v5i32@rel32@hi+12
3876 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3877 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3878 ; GFX11-NEXT: s_endpgm
3880 ; HSA-LABEL: test_call_external_void_func_v5i32_imm:
3882 ; HSA-NEXT: s_add_i32 s6, s6, s9
3883 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3884 ; HSA-NEXT: s_add_u32 s0, s0, s9
3885 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
3886 ; HSA-NEXT: s_addc_u32 s1, s1, 0
3887 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
3888 ; HSA-NEXT: v_mov_b32_e32 v0, 1
3889 ; HSA-NEXT: v_mov_b32_e32 v1, 2
3890 ; HSA-NEXT: v_mov_b32_e32 v2, 3
3891 ; HSA-NEXT: v_mov_b32_e32 v3, 4
3892 ; HSA-NEXT: v_mov_b32_e32 v4, 5
3893 ; HSA-NEXT: s_mov_b32 s32, 0
3894 ; HSA-NEXT: s_getpc_b64 s[8:9]
3895 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v5i32@rel32@lo+4
3896 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v5i32@rel32@hi+12
3897 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
3898 ; HSA-NEXT: s_endpgm
3899 call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
3903 define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
3904 ; VI-LABEL: test_call_external_void_func_v8i32:
3906 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3907 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
3908 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
3909 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3910 ; VI-NEXT: s_mov_b32 s38, -1
3911 ; VI-NEXT: s_mov_b32 s39, 0xe80000
3912 ; VI-NEXT: s_add_u32 s36, s36, s3
3913 ; VI-NEXT: s_mov_b32 s3, 0xf000
3914 ; VI-NEXT: s_mov_b32 s2, -1
3915 ; VI-NEXT: s_waitcnt lgkmcnt(0)
3916 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3917 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3918 ; VI-NEXT: s_addc_u32 s37, s37, 0
3919 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
3920 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
3921 ; VI-NEXT: s_mov_b32 s32, 0
3922 ; VI-NEXT: s_getpc_b64 s[4:5]
3923 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3924 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3925 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3928 ; CI-LABEL: test_call_external_void_func_v8i32:
3930 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3931 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
3932 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
3933 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3934 ; CI-NEXT: s_mov_b32 s38, -1
3935 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
3936 ; CI-NEXT: s_add_u32 s36, s36, s3
3937 ; CI-NEXT: s_mov_b32 s3, 0xf000
3938 ; CI-NEXT: s_mov_b32 s2, -1
3939 ; CI-NEXT: s_waitcnt lgkmcnt(0)
3940 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3941 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3942 ; CI-NEXT: s_addc_u32 s37, s37, 0
3943 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
3944 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
3945 ; CI-NEXT: s_mov_b32 s32, 0
3946 ; CI-NEXT: s_getpc_b64 s[4:5]
3947 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3948 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3949 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
3952 ; GFX9-LABEL: test_call_external_void_func_v8i32:
3954 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3955 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
3956 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
3957 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3958 ; GFX9-NEXT: s_mov_b32 s38, -1
3959 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
3960 ; GFX9-NEXT: s_add_u32 s36, s36, s3
3961 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
3962 ; GFX9-NEXT: s_mov_b32 s2, -1
3963 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3964 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3965 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3966 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
3967 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
3968 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
3969 ; GFX9-NEXT: s_mov_b32 s32, 0
3970 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3971 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3972 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3973 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3974 ; GFX9-NEXT: s_endpgm
3976 ; GFX11-LABEL: test_call_external_void_func_v8i32:
3978 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
3979 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
3980 ; GFX11-NEXT: s_mov_b32 s6, -1
3981 ; GFX11-NEXT: s_mov_b32 s32, 0
3982 ; GFX11-NEXT: s_getpc_b64 s[2:3]
3983 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4
3984 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12
3985 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3986 ; GFX11-NEXT: s_clause 0x1
3987 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
3988 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
3989 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
3990 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
3991 ; GFX11-NEXT: s_endpgm
3993 ; HSA-LABEL: test_call_external_void_func_v8i32:
3995 ; HSA-NEXT: s_add_i32 s6, s6, s9
3996 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
3997 ; HSA-NEXT: s_add_u32 s0, s0, s9
3998 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
3999 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4000 ; HSA-NEXT: s_mov_b32 s10, -1
4001 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4002 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4003 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4004 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4005 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4006 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4007 ; HSA-NEXT: s_mov_b32 s32, 0
4008 ; HSA-NEXT: s_getpc_b64 s[8:9]
4009 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4
4010 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12
4011 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4012 ; HSA-NEXT: s_endpgm
4013 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
4014 %val = load <8 x i32>, ptr addrspace(1) %ptr
4015 call void @external_void_func_v8i32(<8 x i32> %val)
4019 define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
4020 ; VI-LABEL: test_call_external_void_func_v8i32_imm:
4022 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4023 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4024 ; VI-NEXT: s_mov_b32 s38, -1
4025 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4026 ; VI-NEXT: s_add_u32 s36, s36, s3
4027 ; VI-NEXT: s_addc_u32 s37, s37, 0
4028 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4029 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4030 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4031 ; VI-NEXT: v_mov_b32_e32 v0, 1
4032 ; VI-NEXT: v_mov_b32_e32 v1, 2
4033 ; VI-NEXT: v_mov_b32_e32 v2, 3
4034 ; VI-NEXT: v_mov_b32_e32 v3, 4
4035 ; VI-NEXT: v_mov_b32_e32 v4, 5
4036 ; VI-NEXT: v_mov_b32_e32 v5, 6
4037 ; VI-NEXT: v_mov_b32_e32 v6, 7
4038 ; VI-NEXT: v_mov_b32_e32 v7, 8
4039 ; VI-NEXT: s_mov_b32 s32, 0
4040 ; VI-NEXT: s_getpc_b64 s[4:5]
4041 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
4042 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
4043 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4046 ; CI-LABEL: test_call_external_void_func_v8i32_imm:
4048 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4049 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4050 ; CI-NEXT: s_mov_b32 s38, -1
4051 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4052 ; CI-NEXT: s_add_u32 s36, s36, s3
4053 ; CI-NEXT: s_addc_u32 s37, s37, 0
4054 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4055 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4056 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4057 ; CI-NEXT: v_mov_b32_e32 v0, 1
4058 ; CI-NEXT: v_mov_b32_e32 v1, 2
4059 ; CI-NEXT: v_mov_b32_e32 v2, 3
4060 ; CI-NEXT: v_mov_b32_e32 v3, 4
4061 ; CI-NEXT: v_mov_b32_e32 v4, 5
4062 ; CI-NEXT: v_mov_b32_e32 v5, 6
4063 ; CI-NEXT: v_mov_b32_e32 v6, 7
4064 ; CI-NEXT: v_mov_b32_e32 v7, 8
4065 ; CI-NEXT: s_mov_b32 s32, 0
4066 ; CI-NEXT: s_getpc_b64 s[4:5]
4067 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
4068 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
4069 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4072 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
4074 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4075 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4076 ; GFX9-NEXT: s_mov_b32 s38, -1
4077 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4078 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4079 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4080 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4081 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4082 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4083 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
4084 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
4085 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
4086 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
4087 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
4088 ; GFX9-NEXT: v_mov_b32_e32 v5, 6
4089 ; GFX9-NEXT: v_mov_b32_e32 v6, 7
4090 ; GFX9-NEXT: v_mov_b32_e32 v7, 8
4091 ; GFX9-NEXT: s_mov_b32 s32, 0
4092 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4093 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
4094 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
4095 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4096 ; GFX9-NEXT: s_endpgm
4098 ; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
4100 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
4101 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
4102 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6
4103 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8
4104 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4105 ; GFX11-NEXT: s_mov_b32 s32, 0
4106 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4107 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4
4108 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12
4109 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4110 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4111 ; GFX11-NEXT: s_endpgm
4113 ; HSA-LABEL: test_call_external_void_func_v8i32_imm:
4115 ; HSA-NEXT: s_add_i32 s6, s6, s9
4116 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4117 ; HSA-NEXT: s_add_u32 s0, s0, s9
4118 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4119 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4120 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4121 ; HSA-NEXT: v_mov_b32_e32 v0, 1
4122 ; HSA-NEXT: v_mov_b32_e32 v1, 2
4123 ; HSA-NEXT: v_mov_b32_e32 v2, 3
4124 ; HSA-NEXT: v_mov_b32_e32 v3, 4
4125 ; HSA-NEXT: v_mov_b32_e32 v4, 5
4126 ; HSA-NEXT: v_mov_b32_e32 v5, 6
4127 ; HSA-NEXT: v_mov_b32_e32 v6, 7
4128 ; HSA-NEXT: v_mov_b32_e32 v7, 8
4129 ; HSA-NEXT: s_mov_b32 s32, 0
4130 ; HSA-NEXT: s_getpc_b64 s[8:9]
4131 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4
4132 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12
4133 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4134 ; HSA-NEXT: s_endpgm
4135 call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
4139 define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
4140 ; VI-LABEL: test_call_external_void_func_v16i32:
4142 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4143 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4144 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4145 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4146 ; VI-NEXT: s_mov_b32 s38, -1
4147 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4148 ; VI-NEXT: s_add_u32 s36, s36, s3
4149 ; VI-NEXT: s_mov_b32 s3, 0xf000
4150 ; VI-NEXT: s_mov_b32 s2, -1
4151 ; VI-NEXT: s_waitcnt lgkmcnt(0)
4152 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4153 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4154 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4155 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4156 ; VI-NEXT: s_addc_u32 s37, s37, 0
4157 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4158 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4159 ; VI-NEXT: s_mov_b32 s32, 0
4160 ; VI-NEXT: s_getpc_b64 s[4:5]
4161 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4162 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4163 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4166 ; CI-LABEL: test_call_external_void_func_v16i32:
4168 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4169 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4170 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4171 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4172 ; CI-NEXT: s_mov_b32 s38, -1
4173 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4174 ; CI-NEXT: s_add_u32 s36, s36, s3
4175 ; CI-NEXT: s_mov_b32 s3, 0xf000
4176 ; CI-NEXT: s_mov_b32 s2, -1
4177 ; CI-NEXT: s_waitcnt lgkmcnt(0)
4178 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4179 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4180 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4181 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4182 ; CI-NEXT: s_addc_u32 s37, s37, 0
4183 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4184 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4185 ; CI-NEXT: s_mov_b32 s32, 0
4186 ; CI-NEXT: s_getpc_b64 s[4:5]
4187 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4188 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4189 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4192 ; GFX9-LABEL: test_call_external_void_func_v16i32:
4194 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4195 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4196 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4197 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4198 ; GFX9-NEXT: s_mov_b32 s38, -1
4199 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4200 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4201 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
4202 ; GFX9-NEXT: s_mov_b32 s2, -1
4203 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4204 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4205 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4206 ; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4207 ; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4208 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4209 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4210 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4211 ; GFX9-NEXT: s_mov_b32 s32, 0
4212 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4213 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4214 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4215 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4216 ; GFX9-NEXT: s_endpgm
4218 ; GFX11-LABEL: test_call_external_void_func_v16i32:
4220 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
4221 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
4222 ; GFX11-NEXT: s_mov_b32 s6, -1
4223 ; GFX11-NEXT: s_mov_b32 s32, 0
4224 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4225 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v16i32@rel32@lo+4
4226 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v16i32@rel32@hi+12
4227 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4228 ; GFX11-NEXT: s_clause 0x3
4229 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
4230 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4231 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4232 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4233 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4234 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4235 ; GFX11-NEXT: s_endpgm
4237 ; HSA-LABEL: test_call_external_void_func_v16i32:
4239 ; HSA-NEXT: s_add_i32 s6, s6, s9
4240 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4241 ; HSA-NEXT: s_add_u32 s0, s0, s9
4242 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4243 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4244 ; HSA-NEXT: s_mov_b32 s10, -1
4245 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4246 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4247 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4248 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4249 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4250 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4251 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4252 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4253 ; HSA-NEXT: s_mov_b32 s32, 0
4254 ; HSA-NEXT: s_getpc_b64 s[8:9]
4255 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v16i32@rel32@lo+4
4256 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v16i32@rel32@hi+12
4257 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4258 ; HSA-NEXT: s_endpgm
4259 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
4260 %val = load <16 x i32>, ptr addrspace(1) %ptr
4261 call void @external_void_func_v16i32(<16 x i32> %val)
4265 define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
4266 ; VI-LABEL: test_call_external_void_func_v32i32:
4268 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4269 ; VI-NEXT: s_mov_b32 s7, 0xf000
4270 ; VI-NEXT: s_mov_b32 s6, -1
4271 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4272 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4273 ; VI-NEXT: s_waitcnt lgkmcnt(0)
4274 ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4275 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4276 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4277 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4278 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4279 ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4280 ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4281 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4282 ; VI-NEXT: s_mov_b32 s38, -1
4283 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4284 ; VI-NEXT: s_add_u32 s36, s36, s3
4285 ; VI-NEXT: s_addc_u32 s37, s37, 0
4286 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4287 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4288 ; VI-NEXT: s_mov_b32 s32, 0
4289 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4290 ; VI-NEXT: s_getpc_b64 s[8:9]
4291 ; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4292 ; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4293 ; VI-NEXT: s_waitcnt vmcnt(7)
4294 ; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
4295 ; VI-NEXT: s_swappc_b64 s[30:31], s[8:9]
4298 ; CI-LABEL: test_call_external_void_func_v32i32:
4300 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4301 ; CI-NEXT: s_mov_b32 s7, 0xf000
4302 ; CI-NEXT: s_mov_b32 s6, -1
4303 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4304 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4305 ; CI-NEXT: s_waitcnt lgkmcnt(0)
4306 ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4307 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4308 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4309 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4310 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4311 ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4312 ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4313 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4314 ; CI-NEXT: s_mov_b32 s38, -1
4315 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4316 ; CI-NEXT: s_add_u32 s36, s36, s3
4317 ; CI-NEXT: s_addc_u32 s37, s37, 0
4318 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4319 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4320 ; CI-NEXT: s_mov_b32 s32, 0
4321 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4322 ; CI-NEXT: s_getpc_b64 s[8:9]
4323 ; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4324 ; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4325 ; CI-NEXT: s_waitcnt vmcnt(7)
4326 ; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
4327 ; CI-NEXT: s_swappc_b64 s[30:31], s[8:9]
4330 ; GFX9-LABEL: test_call_external_void_func_v32i32:
4332 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4333 ; GFX9-NEXT: s_mov_b32 s7, 0xf000
4334 ; GFX9-NEXT: s_mov_b32 s6, -1
4335 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4336 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4337 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4338 ; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4339 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4340 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4341 ; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4342 ; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4343 ; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4344 ; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4345 ; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4346 ; GFX9-NEXT: s_mov_b32 s38, -1
4347 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4348 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4349 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4350 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4351 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4352 ; GFX9-NEXT: s_mov_b32 s32, 0
4353 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4354 ; GFX9-NEXT: s_getpc_b64 s[8:9]
4355 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4356 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4357 ; GFX9-NEXT: s_waitcnt vmcnt(7)
4358 ; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
4359 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
4360 ; GFX9-NEXT: s_endpgm
4362 ; GFX11-LABEL: test_call_external_void_func_v32i32:
4364 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
4365 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
4366 ; GFX11-NEXT: s_mov_b32 s6, -1
4367 ; GFX11-NEXT: s_mov_b32 s32, 0
4368 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4369 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32@rel32@lo+4
4370 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32@rel32@hi+12
4371 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4372 ; GFX11-NEXT: s_clause 0x7
4373 ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
4374 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
4375 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4376 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4377 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4378 ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
4379 ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
4380 ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
4381 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4382 ; GFX11-NEXT: s_waitcnt vmcnt(7)
4383 ; GFX11-NEXT: scratch_store_b32 off, v31, s32
4384 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4385 ; GFX11-NEXT: s_endpgm
4387 ; HSA-LABEL: test_call_external_void_func_v32i32:
4389 ; HSA-NEXT: s_add_i32 s6, s6, s9
4390 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4391 ; HSA-NEXT: s_add_u32 s0, s0, s9
4392 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4393 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4394 ; HSA-NEXT: s_mov_b32 s10, -1
4395 ; HSA-NEXT: s_mov_b32 s32, 0
4396 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4397 ; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
4398 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4399 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4400 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4401 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4402 ; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
4403 ; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
4404 ; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
4405 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4406 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4407 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4408 ; HSA-NEXT: s_getpc_b64 s[12:13]
4409 ; HSA-NEXT: s_add_u32 s12, s12, external_void_func_v32i32@rel32@lo+4
4410 ; HSA-NEXT: s_addc_u32 s13, s13, external_void_func_v32i32@rel32@hi+12
4411 ; HSA-NEXT: s_waitcnt vmcnt(7)
4412 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
4413 ; HSA-NEXT: s_swappc_b64 s[30:31], s[12:13]
4414 ; HSA-NEXT: s_endpgm
4415 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
4416 %val = load <32 x i32>, ptr addrspace(1) %ptr
4417 call void @external_void_func_v32i32(<32 x i32> %val)
4421 define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
4422 ; VI-LABEL: test_call_external_void_func_v32i32_i32:
4424 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4425 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4426 ; VI-NEXT: s_mov_b32 s38, -1
4427 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4428 ; VI-NEXT: s_add_u32 s36, s36, s5
4429 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4430 ; VI-NEXT: s_mov_b32 s7, 0xf000
4431 ; VI-NEXT: s_mov_b32 s6, -1
4432 ; VI-NEXT: s_addc_u32 s37, s37, 0
4433 ; VI-NEXT: s_waitcnt lgkmcnt(0)
4434 ; VI-NEXT: buffer_load_dword v32, off, s[4:7], 0
4435 ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4436 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4437 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4438 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4439 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4440 ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4441 ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4442 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4443 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4444 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4445 ; VI-NEXT: s_mov_b32 s32, 0
4446 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4447 ; VI-NEXT: s_getpc_b64 s[4:5]
4448 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4449 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4450 ; VI-NEXT: s_waitcnt vmcnt(8)
4451 ; VI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
4452 ; VI-NEXT: s_waitcnt vmcnt(8)
4453 ; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
4454 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4457 ; CI-LABEL: test_call_external_void_func_v32i32_i32:
4459 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4460 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4461 ; CI-NEXT: s_mov_b32 s38, -1
4462 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4463 ; CI-NEXT: s_add_u32 s36, s36, s5
4464 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4465 ; CI-NEXT: s_mov_b32 s7, 0xf000
4466 ; CI-NEXT: s_mov_b32 s6, -1
4467 ; CI-NEXT: s_addc_u32 s37, s37, 0
4468 ; CI-NEXT: s_waitcnt lgkmcnt(0)
4469 ; CI-NEXT: buffer_load_dword v32, off, s[4:7], 0
4470 ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4471 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4472 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4473 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4474 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4475 ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4476 ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4477 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4478 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4479 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4480 ; CI-NEXT: s_mov_b32 s32, 0
4481 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4482 ; CI-NEXT: s_getpc_b64 s[4:5]
4483 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4484 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4485 ; CI-NEXT: s_waitcnt vmcnt(8)
4486 ; CI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
4487 ; CI-NEXT: s_waitcnt vmcnt(8)
4488 ; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
4489 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4492 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
4494 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4495 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4496 ; GFX9-NEXT: s_mov_b32 s38, -1
4497 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4498 ; GFX9-NEXT: s_add_u32 s36, s36, s5
4499 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
4500 ; GFX9-NEXT: s_mov_b32 s7, 0xf000
4501 ; GFX9-NEXT: s_mov_b32 s6, -1
4502 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4503 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4504 ; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0
4505 ; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4506 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4507 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4508 ; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4509 ; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4510 ; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4511 ; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4512 ; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4513 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4514 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4515 ; GFX9-NEXT: s_mov_b32 s32, 0
4516 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4517 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4518 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4519 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4520 ; GFX9-NEXT: s_waitcnt vmcnt(8)
4521 ; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
4522 ; GFX9-NEXT: s_waitcnt vmcnt(8)
4523 ; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
4524 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4525 ; GFX9-NEXT: s_endpgm
4527 ; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
4529 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
4530 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
4531 ; GFX11-NEXT: s_mov_b32 s6, -1
4532 ; GFX11-NEXT: s_mov_b32 s32, 0
4533 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4534 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32_i32@rel32@lo+4
4535 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32_i32@rel32@hi+12
4536 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4537 ; GFX11-NEXT: s_clause 0x8
4538 ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
4539 ; GFX11-NEXT: buffer_load_b32 v32, off, s[4:7], 0
4540 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
4541 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4542 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4543 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4544 ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
4545 ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
4546 ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
4547 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4548 ; GFX11-NEXT: s_add_i32 s4, s32, 4
4549 ; GFX11-NEXT: s_waitcnt vmcnt(8)
4550 ; GFX11-NEXT: scratch_store_b32 off, v31, s32
4551 ; GFX11-NEXT: s_waitcnt vmcnt(7)
4552 ; GFX11-NEXT: scratch_store_b32 off, v32, s4
4553 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4554 ; GFX11-NEXT: s_endpgm
4556 ; HSA-LABEL: test_call_external_void_func_v32i32_i32:
4558 ; HSA-NEXT: s_add_i32 s8, s8, s11
4559 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
4560 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
4561 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4562 ; HSA-NEXT: s_add_u32 s0, s0, s11
4563 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4564 ; HSA-NEXT: s_mov_b32 s10, -1
4565 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4566 ; HSA-NEXT: buffer_load_dword v32, off, s[8:11], 0
4567 ; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
4568 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4569 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4570 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4571 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4572 ; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
4573 ; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
4574 ; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
4575 ; HSA-NEXT: s_mov_b32 s32, 0
4576 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4577 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4578 ; HSA-NEXT: s_getpc_b64 s[8:9]
4579 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v32i32_i32@rel32@lo+4
4580 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32_i32@rel32@hi+12
4581 ; HSA-NEXT: s_waitcnt vmcnt(8)
4582 ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
4583 ; HSA-NEXT: s_waitcnt vmcnt(8)
4584 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
4585 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4586 ; HSA-NEXT: s_endpgm
4587 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
4588 %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
4589 %val1 = load i32, ptr addrspace(1) undef
4590 call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
4594 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
4595 ; VI-LABEL: test_call_external_i32_func_i32_imm:
4597 ; VI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4598 ; VI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4599 ; VI-NEXT: s_mov_b32 s42, -1
4600 ; VI-NEXT: s_mov_b32 s43, 0xe80000
4601 ; VI-NEXT: s_add_u32 s40, s40, s5
4602 ; VI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
4603 ; VI-NEXT: s_addc_u32 s41, s41, 0
4604 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4605 ; VI-NEXT: s_mov_b64 s[0:1], s[40:41]
4606 ; VI-NEXT: s_mov_b64 s[2:3], s[42:43]
4607 ; VI-NEXT: v_mov_b32_e32 v0, 42
4608 ; VI-NEXT: s_mov_b32 s32, 0
4609 ; VI-NEXT: s_mov_b32 s39, 0xf000
4610 ; VI-NEXT: s_mov_b32 s38, -1
4611 ; VI-NEXT: s_getpc_b64 s[4:5]
4612 ; VI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4613 ; VI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4614 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4615 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0
4616 ; VI-NEXT: s_waitcnt vmcnt(0)
4619 ; CI-LABEL: test_call_external_i32_func_i32_imm:
4621 ; CI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4622 ; CI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4623 ; CI-NEXT: s_mov_b32 s42, -1
4624 ; CI-NEXT: s_mov_b32 s43, 0xe8f000
4625 ; CI-NEXT: s_add_u32 s40, s40, s5
4626 ; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x9
4627 ; CI-NEXT: s_addc_u32 s41, s41, 0
4628 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4629 ; CI-NEXT: s_mov_b64 s[0:1], s[40:41]
4630 ; CI-NEXT: s_mov_b64 s[2:3], s[42:43]
4631 ; CI-NEXT: v_mov_b32_e32 v0, 42
4632 ; CI-NEXT: s_mov_b32 s32, 0
4633 ; CI-NEXT: s_mov_b32 s39, 0xf000
4634 ; CI-NEXT: s_mov_b32 s38, -1
4635 ; CI-NEXT: s_getpc_b64 s[4:5]
4636 ; CI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4637 ; CI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4638 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4639 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0
4640 ; CI-NEXT: s_waitcnt vmcnt(0)
4643 ; GFX9-LABEL: test_call_external_i32_func_i32_imm:
4645 ; GFX9-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4646 ; GFX9-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4647 ; GFX9-NEXT: s_mov_b32 s42, -1
4648 ; GFX9-NEXT: s_mov_b32 s43, 0xe00000
4649 ; GFX9-NEXT: s_add_u32 s40, s40, s5
4650 ; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
4651 ; GFX9-NEXT: s_addc_u32 s41, s41, 0
4652 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4653 ; GFX9-NEXT: s_mov_b64 s[0:1], s[40:41]
4654 ; GFX9-NEXT: s_mov_b64 s[2:3], s[42:43]
4655 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
4656 ; GFX9-NEXT: s_mov_b32 s32, 0
4657 ; GFX9-NEXT: s_mov_b32 s39, 0xf000
4658 ; GFX9-NEXT: s_mov_b32 s38, -1
4659 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4660 ; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4661 ; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4662 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4663 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0
4664 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4665 ; GFX9-NEXT: s_endpgm
4667 ; GFX11-LABEL: test_call_external_i32_func_i32_imm:
4669 ; GFX11-NEXT: s_load_b64 s[36:37], s[2:3], 0x24
4670 ; GFX11-NEXT: v_mov_b32_e32 v0, 42
4671 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4672 ; GFX11-NEXT: s_mov_b32 s32, 0
4673 ; GFX11-NEXT: s_mov_b32 s39, 0x31016000
4674 ; GFX11-NEXT: s_mov_b32 s38, -1
4675 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4676 ; GFX11-NEXT: s_add_u32 s2, s2, external_i32_func_i32@rel32@lo+4
4677 ; GFX11-NEXT: s_addc_u32 s3, s3, external_i32_func_i32@rel32@hi+12
4678 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4679 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4680 ; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc
4681 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4682 ; GFX11-NEXT: s_nop 0
4683 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4684 ; GFX11-NEXT: s_endpgm
4686 ; HSA-LABEL: test_call_external_i32_func_i32_imm:
4688 ; HSA-NEXT: s_add_i32 s8, s8, s11
4689 ; HSA-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x0
4690 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
4691 ; HSA-NEXT: s_add_u32 s0, s0, s11
4692 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4693 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4694 ; HSA-NEXT: v_mov_b32_e32 v0, 42
4695 ; HSA-NEXT: s_mov_b32 s32, 0
4696 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
4697 ; HSA-NEXT: s_mov_b32 s39, 0x1100f000
4698 ; HSA-NEXT: s_mov_b32 s38, -1
4699 ; HSA-NEXT: s_getpc_b64 s[8:9]
4700 ; HSA-NEXT: s_add_u32 s8, s8, external_i32_func_i32@rel32@lo+4
4701 ; HSA-NEXT: s_addc_u32 s9, s9, external_i32_func_i32@rel32@hi+12
4702 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4703 ; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0
4704 ; HSA-NEXT: s_waitcnt vmcnt(0)
4705 ; HSA-NEXT: s_endpgm
4706 %val = call i32 @external_i32_func_i32(i32 42)
4707 store volatile i32 %val, ptr addrspace(1) %out
4711 define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
4712 ; VI-LABEL: test_call_external_void_func_struct_i8_i32:
4714 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4715 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4716 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4717 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4718 ; VI-NEXT: s_mov_b32 s38, -1
4719 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4720 ; VI-NEXT: s_add_u32 s36, s36, s3
4721 ; VI-NEXT: s_mov_b32 s3, 0xf000
4722 ; VI-NEXT: s_mov_b32 s2, -1
4723 ; VI-NEXT: s_waitcnt lgkmcnt(0)
4724 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
4725 ; VI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
4726 ; VI-NEXT: s_addc_u32 s37, s37, 0
4727 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4728 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4729 ; VI-NEXT: s_mov_b32 s32, 0
4730 ; VI-NEXT: s_getpc_b64 s[4:5]
4731 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4732 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4733 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4736 ; CI-LABEL: test_call_external_void_func_struct_i8_i32:
4738 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4739 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4740 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4741 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4742 ; CI-NEXT: s_mov_b32 s38, -1
4743 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4744 ; CI-NEXT: s_add_u32 s36, s36, s3
4745 ; CI-NEXT: s_mov_b32 s3, 0xf000
4746 ; CI-NEXT: s_mov_b32 s2, -1
4747 ; CI-NEXT: s_waitcnt lgkmcnt(0)
4748 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
4749 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
4750 ; CI-NEXT: s_addc_u32 s37, s37, 0
4751 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4752 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4753 ; CI-NEXT: s_mov_b32 s32, 0
4754 ; CI-NEXT: s_getpc_b64 s[4:5]
4755 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4756 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4757 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4760 ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
4762 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4763 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4764 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4765 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4766 ; GFX9-NEXT: s_mov_b32 s38, -1
4767 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4768 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4769 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
4770 ; GFX9-NEXT: s_mov_b32 s2, -1
4771 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4772 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
4773 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
4774 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4775 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4776 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4777 ; GFX9-NEXT: s_mov_b32 s32, 0
4778 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4779 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4780 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4781 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4782 ; GFX9-NEXT: s_endpgm
4784 ; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
4786 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
4787 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
4788 ; GFX11-NEXT: s_mov_b32 s6, -1
4789 ; GFX11-NEXT: s_mov_b32 s32, 0
4790 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4791 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_struct_i8_i32@rel32@lo+4
4792 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_struct_i8_i32@rel32@hi+12
4793 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
4794 ; GFX11-NEXT: s_clause 0x1
4795 ; GFX11-NEXT: buffer_load_u8 v0, off, s[4:7], 0
4796 ; GFX11-NEXT: buffer_load_b32 v1, off, s[4:7], 0 offset:4
4797 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4798 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4799 ; GFX11-NEXT: s_endpgm
4801 ; HSA-LABEL: test_call_external_void_func_struct_i8_i32:
4803 ; HSA-NEXT: s_add_i32 s6, s6, s9
4804 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4805 ; HSA-NEXT: s_add_u32 s0, s0, s9
4806 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
4807 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
4808 ; HSA-NEXT: s_mov_b32 s10, -1
4809 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4810 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
4811 ; HSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4812 ; HSA-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:4
4813 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4814 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4815 ; HSA-NEXT: s_mov_b32 s32, 0
4816 ; HSA-NEXT: s_getpc_b64 s[8:9]
4817 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_struct_i8_i32@rel32@lo+4
4818 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_struct_i8_i32@rel32@hi+12
4819 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4820 ; HSA-NEXT: s_endpgm
4821 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
4822 %val = load { i8, i32 }, ptr addrspace(1) %ptr0
4823 call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
4827 define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
4828 ; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4830 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4831 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4832 ; VI-NEXT: s_mov_b32 s38, -1
4833 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4834 ; VI-NEXT: s_add_u32 s36, s36, s3
4835 ; VI-NEXT: s_addc_u32 s37, s37, 0
4836 ; VI-NEXT: v_mov_b32_e32 v0, 3
4837 ; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 offset:8
4838 ; VI-NEXT: v_mov_b32_e32 v0, 8
4839 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:12
4840 ; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:12
4841 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8
4842 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4843 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4844 ; VI-NEXT: s_movk_i32 s32, 0x400
4845 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4846 ; VI-NEXT: s_getpc_b64 s[4:5]
4847 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4848 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4849 ; VI-NEXT: s_waitcnt vmcnt(1)
4850 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4851 ; VI-NEXT: s_waitcnt vmcnt(1)
4852 ; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32
4853 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4856 ; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4858 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4859 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4860 ; CI-NEXT: s_mov_b32 s38, -1
4861 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
4862 ; CI-NEXT: s_add_u32 s36, s36, s3
4863 ; CI-NEXT: s_addc_u32 s37, s37, 0
4864 ; CI-NEXT: v_mov_b32_e32 v0, 3
4865 ; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 offset:8
4866 ; CI-NEXT: v_mov_b32_e32 v0, 8
4867 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:12
4868 ; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:12
4869 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8
4870 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
4871 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
4872 ; CI-NEXT: s_movk_i32 s32, 0x400
4873 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
4874 ; CI-NEXT: s_getpc_b64 s[4:5]
4875 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4876 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4877 ; CI-NEXT: s_waitcnt vmcnt(1)
4878 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4879 ; CI-NEXT: s_waitcnt vmcnt(1)
4880 ; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32
4881 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4884 ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4886 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4887 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4888 ; GFX9-NEXT: s_mov_b32 s38, -1
4889 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
4890 ; GFX9-NEXT: s_add_u32 s36, s36, s3
4891 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
4892 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
4893 ; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 offset:8
4894 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
4895 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:12
4896 ; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:12
4897 ; GFX9-NEXT: s_nop 0
4898 ; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8
4899 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
4900 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
4901 ; GFX9-NEXT: s_movk_i32 s32, 0x400
4902 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
4903 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4904 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4905 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4906 ; GFX9-NEXT: s_waitcnt vmcnt(1)
4907 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4908 ; GFX9-NEXT: s_waitcnt vmcnt(1)
4909 ; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
4910 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4911 ; GFX9-NEXT: s_endpgm
4913 ; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4915 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
4916 ; GFX11-NEXT: s_mov_b32 s32, 16
4917 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
4918 ; GFX11-NEXT: s_getpc_b64 s[2:3]
4919 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_byval_struct_i8_i32@rel32@lo+4
4920 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_byval_struct_i8_i32@rel32@hi+12
4921 ; GFX11-NEXT: s_clause 0x1
4922 ; GFX11-NEXT: scratch_store_b8 off, v0, off offset:8
4923 ; GFX11-NEXT: scratch_store_b32 off, v1, off offset:12
4924 ; GFX11-NEXT: scratch_load_b64 v[0:1], off, off offset:8
4925 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4926 ; GFX11-NEXT: scratch_store_b64 off, v[0:1], s32
4927 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
4928 ; GFX11-NEXT: s_endpgm
4930 ; HSA-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4932 ; HSA-NEXT: s_add_i32 s6, s6, s9
4933 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
4934 ; HSA-NEXT: s_add_u32 s0, s0, s9
4935 ; HSA-NEXT: s_addc_u32 s1, s1, 0
4936 ; HSA-NEXT: v_mov_b32_e32 v0, 3
4937 ; HSA-NEXT: buffer_store_byte v0, off, s[0:3], 0 offset:8
4938 ; HSA-NEXT: v_mov_b32_e32 v0, 8
4939 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
4940 ; HSA-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:12
4941 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8
4942 ; HSA-NEXT: s_movk_i32 s32, 0x400
4943 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
4944 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
4945 ; HSA-NEXT: s_getpc_b64 s[8:9]
4946 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_byval_struct_i8_i32@rel32@lo+4
4947 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_byval_struct_i8_i32@rel32@hi+12
4948 ; HSA-NEXT: s_waitcnt vmcnt(1)
4949 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
4950 ; HSA-NEXT: s_waitcnt vmcnt(1)
4951 ; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32
4952 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
4953 ; HSA-NEXT: s_endpgm
4954 %val = alloca { i8, i32 }, align 8, addrspace(5)
4955 %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
4956 %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
4957 store i8 3, ptr addrspace(5) %gep0
4958 store i32 8, ptr addrspace(5) %gep1
4959 call void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val)
4963 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
4964 ; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
4966 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4967 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4968 ; VI-NEXT: s_mov_b32 s38, -1
4969 ; VI-NEXT: s_mov_b32 s39, 0xe80000
4970 ; VI-NEXT: s_add_u32 s36, s36, s5
4971 ; VI-NEXT: s_addc_u32 s37, s37, 0
4972 ; VI-NEXT: v_mov_b32_e32 v0, 3
4973 ; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 offset:8
4974 ; VI-NEXT: v_mov_b32_e32 v0, 8
4975 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:12
4976 ; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:12
4977 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8
4978 ; VI-NEXT: s_movk_i32 s32, 0x800
4979 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
4980 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
4981 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
4982 ; VI-NEXT: s_getpc_b64 s[4:5]
4983 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
4984 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
4985 ; VI-NEXT: s_waitcnt vmcnt(1)
4986 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
4987 ; VI-NEXT: s_waitcnt vmcnt(1)
4988 ; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32
4989 ; VI-NEXT: v_mov_b32_e32 v0, 16
4990 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
4991 ; VI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:16
4992 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:20
4993 ; VI-NEXT: s_mov_b32 s3, 0xf000
4994 ; VI-NEXT: s_mov_b32 s2, -1
4995 ; VI-NEXT: s_waitcnt vmcnt(1)
4996 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
4997 ; VI-NEXT: s_waitcnt vmcnt(0)
4998 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
4999 ; VI-NEXT: s_waitcnt vmcnt(0)
5002 ; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5004 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5005 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5006 ; CI-NEXT: s_mov_b32 s38, -1
5007 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
5008 ; CI-NEXT: s_add_u32 s36, s36, s5
5009 ; CI-NEXT: s_addc_u32 s37, s37, 0
5010 ; CI-NEXT: v_mov_b32_e32 v0, 3
5011 ; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 offset:8
5012 ; CI-NEXT: v_mov_b32_e32 v0, 8
5013 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:12
5014 ; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:12
5015 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8
5016 ; CI-NEXT: s_movk_i32 s32, 0x800
5017 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
5018 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
5019 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
5020 ; CI-NEXT: s_getpc_b64 s[4:5]
5021 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5022 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5023 ; CI-NEXT: s_waitcnt vmcnt(1)
5024 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
5025 ; CI-NEXT: s_waitcnt vmcnt(1)
5026 ; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32
5027 ; CI-NEXT: v_mov_b32_e32 v0, 16
5028 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5029 ; CI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:16
5030 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:20
5031 ; CI-NEXT: s_mov_b32 s3, 0xf000
5032 ; CI-NEXT: s_mov_b32 s2, -1
5033 ; CI-NEXT: s_waitcnt vmcnt(1)
5034 ; CI-NEXT: buffer_store_byte v0, off, s[0:3], 0
5035 ; CI-NEXT: s_waitcnt vmcnt(0)
5036 ; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0
5037 ; CI-NEXT: s_waitcnt vmcnt(0)
5040 ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5042 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5043 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5044 ; GFX9-NEXT: s_mov_b32 s38, -1
5045 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
5046 ; GFX9-NEXT: s_add_u32 s36, s36, s5
5047 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
5048 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
5049 ; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 offset:8
5050 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
5051 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:12
5052 ; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:12
5053 ; GFX9-NEXT: s_nop 0
5054 ; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8
5055 ; GFX9-NEXT: s_movk_i32 s32, 0x800
5056 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
5057 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
5058 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
5059 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5060 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5061 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5062 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5063 ; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
5064 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5065 ; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
5066 ; GFX9-NEXT: v_mov_b32_e32 v0, 16
5067 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
5068 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:16
5069 ; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:20
5070 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
5071 ; GFX9-NEXT: s_mov_b32 s2, -1
5072 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5073 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
5074 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5075 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], 0
5076 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5077 ; GFX9-NEXT: s_endpgm
5079 ; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5081 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
5082 ; GFX11-NEXT: s_mov_b32 s32, 32
5083 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
5084 ; GFX11-NEXT: s_getpc_b64 s[2:3]
5085 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5086 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5087 ; GFX11-NEXT: s_clause 0x1
5088 ; GFX11-NEXT: scratch_store_b8 off, v0, off offset:8
5089 ; GFX11-NEXT: scratch_store_b32 off, v1, off offset:12
5090 ; GFX11-NEXT: scratch_load_b64 v[0:1], off, off offset:8
5091 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5092 ; GFX11-NEXT: scratch_store_b64 off, v[0:1], s32
5093 ; GFX11-NEXT: v_mov_b32_e32 v0, 16
5094 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
5095 ; GFX11-NEXT: s_clause 0x1
5096 ; GFX11-NEXT: scratch_load_u8 v0, off, off offset:16
5097 ; GFX11-NEXT: scratch_load_b32 v1, off, off offset:20
5098 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
5099 ; GFX11-NEXT: s_mov_b32 s2, -1
5100 ; GFX11-NEXT: s_waitcnt vmcnt(1)
5101 ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc
5102 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
5103 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5104 ; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0 dlc
5105 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
5106 ; GFX11-NEXT: s_nop 0
5107 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5108 ; GFX11-NEXT: s_endpgm
5110 ; HSA-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5112 ; HSA-NEXT: s_add_i32 s8, s8, s11
5113 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
5114 ; HSA-NEXT: s_add_u32 s0, s0, s11
5115 ; HSA-NEXT: s_addc_u32 s1, s1, 0
5116 ; HSA-NEXT: v_mov_b32_e32 v0, 3
5117 ; HSA-NEXT: buffer_store_byte v0, off, s[0:3], 0 offset:8
5118 ; HSA-NEXT: v_mov_b32_e32 v0, 8
5119 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
5120 ; HSA-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:12
5121 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8
5122 ; HSA-NEXT: s_movk_i32 s32, 0x800
5123 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
5124 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
5125 ; HSA-NEXT: s_getpc_b64 s[8:9]
5126 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5127 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5128 ; HSA-NEXT: s_waitcnt vmcnt(1)
5129 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5130 ; HSA-NEXT: s_waitcnt vmcnt(1)
5131 ; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32
5132 ; HSA-NEXT: v_mov_b32_e32 v0, 16
5133 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
5134 ; HSA-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:16
5135 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:20
5136 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000
5137 ; HSA-NEXT: s_mov_b32 s6, -1
5138 ; HSA-NEXT: s_waitcnt vmcnt(1)
5139 ; HSA-NEXT: buffer_store_byte v0, off, s[4:7], 0
5140 ; HSA-NEXT: s_waitcnt vmcnt(0)
5141 ; HSA-NEXT: buffer_store_dword v1, off, s[4:7], 0
5142 ; HSA-NEXT: s_waitcnt vmcnt(0)
5143 ; HSA-NEXT: s_endpgm
5144 %in.val = alloca { i8, i32 }, align 8, addrspace(5)
5145 %out.val = alloca { i8, i32 }, align 8, addrspace(5)
5146 %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
5147 %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
5148 store i8 3, ptr addrspace(5) %in.gep0
5149 store i32 8, ptr addrspace(5) %in.gep1
5150 call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) byval({ i8, i32 }) %in.val)
5151 %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
5152 %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
5153 %out.val0 = load i8, ptr addrspace(5) %out.gep0
5154 %out.val1 = load i32, ptr addrspace(5) %out.gep1
5156 store volatile i8 %out.val0, ptr addrspace(1) undef
5157 store volatile i32 %out.val1, ptr addrspace(1) undef
5161 define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
5162 ; VI-LABEL: test_call_external_void_func_v16i8:
5164 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5165 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
5166 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
5167 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5168 ; VI-NEXT: s_mov_b32 s38, -1
5169 ; VI-NEXT: s_mov_b32 s39, 0xe80000
5170 ; VI-NEXT: s_add_u32 s36, s36, s3
5171 ; VI-NEXT: s_mov_b32 s3, 0xf000
5172 ; VI-NEXT: s_mov_b32 s2, -1
5173 ; VI-NEXT: s_waitcnt lgkmcnt(0)
5174 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5175 ; VI-NEXT: s_addc_u32 s37, s37, 0
5176 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
5177 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
5178 ; VI-NEXT: s_mov_b32 s32, 0
5179 ; VI-NEXT: s_getpc_b64 s[4:5]
5180 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5181 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5182 ; VI-NEXT: s_waitcnt vmcnt(0)
5183 ; VI-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5184 ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5185 ; VI-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5186 ; VI-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5187 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5188 ; VI-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5189 ; VI-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5190 ; VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5191 ; VI-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5192 ; VI-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5193 ; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5194 ; VI-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5195 ; VI-NEXT: v_mov_b32_e32 v4, v1
5196 ; VI-NEXT: v_mov_b32_e32 v8, v2
5197 ; VI-NEXT: v_mov_b32_e32 v12, v3
5198 ; VI-NEXT: v_mov_b32_e32 v1, v16
5199 ; VI-NEXT: v_mov_b32_e32 v2, v17
5200 ; VI-NEXT: v_mov_b32_e32 v3, v18
5201 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5204 ; CI-LABEL: test_call_external_void_func_v16i8:
5206 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5207 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
5208 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
5209 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5210 ; CI-NEXT: s_mov_b32 s38, -1
5211 ; CI-NEXT: s_mov_b32 s39, 0xe8f000
5212 ; CI-NEXT: s_add_u32 s36, s36, s3
5213 ; CI-NEXT: s_mov_b32 s3, 0xf000
5214 ; CI-NEXT: s_mov_b32 s2, -1
5215 ; CI-NEXT: s_waitcnt lgkmcnt(0)
5216 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5217 ; CI-NEXT: s_addc_u32 s37, s37, 0
5218 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
5219 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
5220 ; CI-NEXT: s_mov_b32 s32, 0
5221 ; CI-NEXT: s_getpc_b64 s[4:5]
5222 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5223 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5224 ; CI-NEXT: s_waitcnt vmcnt(0)
5225 ; CI-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5226 ; CI-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5227 ; CI-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5228 ; CI-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5229 ; CI-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5230 ; CI-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5231 ; CI-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5232 ; CI-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5233 ; CI-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5234 ; CI-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5235 ; CI-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5236 ; CI-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5237 ; CI-NEXT: v_mov_b32_e32 v4, v1
5238 ; CI-NEXT: v_mov_b32_e32 v8, v2
5239 ; CI-NEXT: v_mov_b32_e32 v12, v3
5240 ; CI-NEXT: v_mov_b32_e32 v1, v16
5241 ; CI-NEXT: v_mov_b32_e32 v2, v17
5242 ; CI-NEXT: v_mov_b32_e32 v3, v18
5243 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5246 ; GFX9-LABEL: test_call_external_void_func_v16i8:
5248 ; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5249 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
5250 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
5251 ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5252 ; GFX9-NEXT: s_mov_b32 s38, -1
5253 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000
5254 ; GFX9-NEXT: s_add_u32 s36, s36, s3
5255 ; GFX9-NEXT: s_mov_b32 s3, 0xf000
5256 ; GFX9-NEXT: s_mov_b32 s2, -1
5257 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5258 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5259 ; GFX9-NEXT: s_addc_u32 s37, s37, 0
5260 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
5261 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
5262 ; GFX9-NEXT: s_mov_b32 s32, 0
5263 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5264 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5265 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5266 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5267 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5268 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5269 ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5270 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5271 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5272 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5273 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5274 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5275 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5276 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5277 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5278 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5279 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
5280 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
5281 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
5282 ; GFX9-NEXT: v_mov_b32_e32 v1, v16
5283 ; GFX9-NEXT: v_mov_b32_e32 v2, v17
5284 ; GFX9-NEXT: v_mov_b32_e32 v3, v18
5285 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
5286 ; GFX9-NEXT: s_endpgm
5288 ; GFX11-LABEL: test_call_external_void_func_v16i8:
5290 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
5291 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
5292 ; GFX11-NEXT: s_mov_b32 s6, -1
5293 ; GFX11-NEXT: s_mov_b32 s32, 0
5294 ; GFX11-NEXT: s_getpc_b64 s[2:3]
5295 ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v16i8@rel32@lo+4
5296 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v16i8@rel32@hi+12
5297 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5298 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
5299 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
5300 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5301 ; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5302 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5303 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5304 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5305 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5306 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5307 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5308 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5309 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5310 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5311 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5312 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5313 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16
5314 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
5315 ; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
5316 ; GFX11-NEXT: v_mov_b32_e32 v2, v17
5317 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
5318 ; GFX11-NEXT: s_endpgm
5320 ; HSA-LABEL: test_call_external_void_func_v16i8:
5322 ; HSA-NEXT: s_add_i32 s6, s6, s9
5323 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
5324 ; HSA-NEXT: s_add_u32 s0, s0, s9
5325 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
5326 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
5327 ; HSA-NEXT: s_mov_b32 s10, -1
5328 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
5329 ; HSA-NEXT: s_addc_u32 s1, s1, 0
5330 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
5331 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
5332 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
5333 ; HSA-NEXT: s_mov_b32 s32, 0
5334 ; HSA-NEXT: s_getpc_b64 s[8:9]
5335 ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v16i8@rel32@lo+4
5336 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v16i8@rel32@hi+12
5337 ; HSA-NEXT: s_waitcnt vmcnt(0)
5338 ; HSA-NEXT: v_lshrrev_b32_e32 v16, 8, v0
5339 ; HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0
5340 ; HSA-NEXT: v_lshrrev_b32_e32 v18, 24, v0
5341 ; HSA-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5342 ; HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5343 ; HSA-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5344 ; HSA-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5345 ; HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5346 ; HSA-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5347 ; HSA-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5348 ; HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5349 ; HSA-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5350 ; HSA-NEXT: v_mov_b32_e32 v4, v1
5351 ; HSA-NEXT: v_mov_b32_e32 v8, v2
5352 ; HSA-NEXT: v_mov_b32_e32 v12, v3
5353 ; HSA-NEXT: v_mov_b32_e32 v1, v16
5354 ; HSA-NEXT: v_mov_b32_e32 v2, v17
5355 ; HSA-NEXT: v_mov_b32_e32 v3, v18
5356 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
5357 ; HSA-NEXT: s_endpgm
5358 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
5359 %val = load <16 x i8>, ptr addrspace(1) %ptr
5360 call void @external_void_func_v16i8(<16 x i8> %val)
5364 define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
5365 ; VI-LABEL: stack_passed_arg_alignment_v32i32_f64:
5366 ; VI: ; %bb.0: ; %entry
5367 ; VI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5368 ; VI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5369 ; VI-NEXT: s_mov_b32 s54, -1
5370 ; VI-NEXT: s_mov_b32 s55, 0xe80000
5371 ; VI-NEXT: s_add_u32 s52, s52, s5
5372 ; VI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
5373 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
5374 ; VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
5375 ; VI-NEXT: s_mov_b32 s32, 0
5376 ; VI-NEXT: s_addc_u32 s53, s53, 0
5377 ; VI-NEXT: s_waitcnt lgkmcnt(0)
5378 ; VI-NEXT: v_mov_b32_e32 v0, s23
5379 ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32
5380 ; VI-NEXT: v_mov_b32_e32 v0, s4
5381 ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
5382 ; VI-NEXT: v_mov_b32_e32 v0, s5
5383 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
5384 ; VI-NEXT: s_mov_b64 s[0:1], s[52:53]
5385 ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
5386 ; VI-NEXT: s_mov_b64 s[2:3], s[54:55]
5387 ; VI-NEXT: v_mov_b32_e32 v0, s36
5388 ; VI-NEXT: v_mov_b32_e32 v1, s37
5389 ; VI-NEXT: v_mov_b32_e32 v2, s38
5390 ; VI-NEXT: v_mov_b32_e32 v3, s39
5391 ; VI-NEXT: v_mov_b32_e32 v4, s40
5392 ; VI-NEXT: v_mov_b32_e32 v5, s41
5393 ; VI-NEXT: v_mov_b32_e32 v6, s42
5394 ; VI-NEXT: v_mov_b32_e32 v7, s43
5395 ; VI-NEXT: v_mov_b32_e32 v8, s44
5396 ; VI-NEXT: v_mov_b32_e32 v9, s45
5397 ; VI-NEXT: v_mov_b32_e32 v10, s46
5398 ; VI-NEXT: v_mov_b32_e32 v11, s47
5399 ; VI-NEXT: v_mov_b32_e32 v12, s48
5400 ; VI-NEXT: v_mov_b32_e32 v13, s49
5401 ; VI-NEXT: v_mov_b32_e32 v14, s50
5402 ; VI-NEXT: v_mov_b32_e32 v15, s51
5403 ; VI-NEXT: v_mov_b32_e32 v16, s8
5404 ; VI-NEXT: v_mov_b32_e32 v17, s9
5405 ; VI-NEXT: v_mov_b32_e32 v18, s10
5406 ; VI-NEXT: v_mov_b32_e32 v19, s11
5407 ; VI-NEXT: v_mov_b32_e32 v20, s12
5408 ; VI-NEXT: v_mov_b32_e32 v21, s13
5409 ; VI-NEXT: v_mov_b32_e32 v22, s14
5410 ; VI-NEXT: v_mov_b32_e32 v23, s15
5411 ; VI-NEXT: v_mov_b32_e32 v24, s16
5412 ; VI-NEXT: v_mov_b32_e32 v25, s17
5413 ; VI-NEXT: v_mov_b32_e32 v26, s18
5414 ; VI-NEXT: v_mov_b32_e32 v27, s19
5415 ; VI-NEXT: v_mov_b32_e32 v28, s20
5416 ; VI-NEXT: v_mov_b32_e32 v29, s21
5417 ; VI-NEXT: v_mov_b32_e32 v30, s22
5418 ; VI-NEXT: s_getpc_b64 s[4:5]
5419 ; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5420 ; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5421 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5424 ; CI-LABEL: stack_passed_arg_alignment_v32i32_f64:
5425 ; CI: ; %bb.0: ; %entry
5426 ; CI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5427 ; CI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5428 ; CI-NEXT: s_mov_b32 s54, -1
5429 ; CI-NEXT: s_mov_b32 s55, 0xe8f000
5430 ; CI-NEXT: s_add_u32 s52, s52, s5
5431 ; CI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x19
5432 ; CI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x29
5433 ; CI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x9
5434 ; CI-NEXT: s_mov_b32 s32, 0
5435 ; CI-NEXT: s_addc_u32 s53, s53, 0
5436 ; CI-NEXT: s_waitcnt lgkmcnt(0)
5437 ; CI-NEXT: v_mov_b32_e32 v0, s23
5438 ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32
5439 ; CI-NEXT: v_mov_b32_e32 v0, s4
5440 ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
5441 ; CI-NEXT: v_mov_b32_e32 v0, s5
5442 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
5443 ; CI-NEXT: s_mov_b64 s[0:1], s[52:53]
5444 ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
5445 ; CI-NEXT: s_mov_b64 s[2:3], s[54:55]
5446 ; CI-NEXT: v_mov_b32_e32 v0, s36
5447 ; CI-NEXT: v_mov_b32_e32 v1, s37
5448 ; CI-NEXT: v_mov_b32_e32 v2, s38
5449 ; CI-NEXT: v_mov_b32_e32 v3, s39
5450 ; CI-NEXT: v_mov_b32_e32 v4, s40
5451 ; CI-NEXT: v_mov_b32_e32 v5, s41
5452 ; CI-NEXT: v_mov_b32_e32 v6, s42
5453 ; CI-NEXT: v_mov_b32_e32 v7, s43
5454 ; CI-NEXT: v_mov_b32_e32 v8, s44
5455 ; CI-NEXT: v_mov_b32_e32 v9, s45
5456 ; CI-NEXT: v_mov_b32_e32 v10, s46
5457 ; CI-NEXT: v_mov_b32_e32 v11, s47
5458 ; CI-NEXT: v_mov_b32_e32 v12, s48
5459 ; CI-NEXT: v_mov_b32_e32 v13, s49
5460 ; CI-NEXT: v_mov_b32_e32 v14, s50
5461 ; CI-NEXT: v_mov_b32_e32 v15, s51
5462 ; CI-NEXT: v_mov_b32_e32 v16, s8
5463 ; CI-NEXT: v_mov_b32_e32 v17, s9
5464 ; CI-NEXT: v_mov_b32_e32 v18, s10
5465 ; CI-NEXT: v_mov_b32_e32 v19, s11
5466 ; CI-NEXT: v_mov_b32_e32 v20, s12
5467 ; CI-NEXT: v_mov_b32_e32 v21, s13
5468 ; CI-NEXT: v_mov_b32_e32 v22, s14
5469 ; CI-NEXT: v_mov_b32_e32 v23, s15
5470 ; CI-NEXT: v_mov_b32_e32 v24, s16
5471 ; CI-NEXT: v_mov_b32_e32 v25, s17
5472 ; CI-NEXT: v_mov_b32_e32 v26, s18
5473 ; CI-NEXT: v_mov_b32_e32 v27, s19
5474 ; CI-NEXT: v_mov_b32_e32 v28, s20
5475 ; CI-NEXT: v_mov_b32_e32 v29, s21
5476 ; CI-NEXT: v_mov_b32_e32 v30, s22
5477 ; CI-NEXT: s_getpc_b64 s[4:5]
5478 ; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5479 ; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5480 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5483 ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
5484 ; GFX9: ; %bb.0: ; %entry
5485 ; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5486 ; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5487 ; GFX9-NEXT: s_mov_b32 s54, -1
5488 ; GFX9-NEXT: s_mov_b32 s55, 0xe00000
5489 ; GFX9-NEXT: s_add_u32 s52, s52, s5
5490 ; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
5491 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
5492 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
5493 ; GFX9-NEXT: s_mov_b32 s32, 0
5494 ; GFX9-NEXT: s_addc_u32 s53, s53, 0
5495 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5496 ; GFX9-NEXT: v_mov_b32_e32 v0, s23
5497 ; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32
5498 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
5499 ; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
5500 ; GFX9-NEXT: v_mov_b32_e32 v0, s5
5501 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
5502 ; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53]
5503 ; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
5504 ; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55]
5505 ; GFX9-NEXT: v_mov_b32_e32 v0, s36
5506 ; GFX9-NEXT: v_mov_b32_e32 v1, s37
5507 ; GFX9-NEXT: v_mov_b32_e32 v2, s38
5508 ; GFX9-NEXT: v_mov_b32_e32 v3, s39
5509 ; GFX9-NEXT: v_mov_b32_e32 v4, s40
5510 ; GFX9-NEXT: v_mov_b32_e32 v5, s41
5511 ; GFX9-NEXT: v_mov_b32_e32 v6, s42
5512 ; GFX9-NEXT: v_mov_b32_e32 v7, s43
5513 ; GFX9-NEXT: v_mov_b32_e32 v8, s44
5514 ; GFX9-NEXT: v_mov_b32_e32 v9, s45
5515 ; GFX9-NEXT: v_mov_b32_e32 v10, s46
5516 ; GFX9-NEXT: v_mov_b32_e32 v11, s47
5517 ; GFX9-NEXT: v_mov_b32_e32 v12, s48
5518 ; GFX9-NEXT: v_mov_b32_e32 v13, s49
5519 ; GFX9-NEXT: v_mov_b32_e32 v14, s50
5520 ; GFX9-NEXT: v_mov_b32_e32 v15, s51
5521 ; GFX9-NEXT: v_mov_b32_e32 v16, s8
5522 ; GFX9-NEXT: v_mov_b32_e32 v17, s9
5523 ; GFX9-NEXT: v_mov_b32_e32 v18, s10
5524 ; GFX9-NEXT: v_mov_b32_e32 v19, s11
5525 ; GFX9-NEXT: v_mov_b32_e32 v20, s12
5526 ; GFX9-NEXT: v_mov_b32_e32 v21, s13
5527 ; GFX9-NEXT: v_mov_b32_e32 v22, s14
5528 ; GFX9-NEXT: v_mov_b32_e32 v23, s15
5529 ; GFX9-NEXT: v_mov_b32_e32 v24, s16
5530 ; GFX9-NEXT: v_mov_b32_e32 v25, s17
5531 ; GFX9-NEXT: v_mov_b32_e32 v26, s18
5532 ; GFX9-NEXT: v_mov_b32_e32 v27, s19
5533 ; GFX9-NEXT: v_mov_b32_e32 v28, s20
5534 ; GFX9-NEXT: v_mov_b32_e32 v29, s21
5535 ; GFX9-NEXT: v_mov_b32_e32 v30, s22
5536 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5537 ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5538 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5539 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
5540 ; GFX9-NEXT: s_endpgm
5542 ; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
5543 ; GFX11: ; %bb.0: ; %entry
5544 ; GFX11-NEXT: s_clause 0x2
5545 ; GFX11-NEXT: s_load_b64 s[20:21], s[2:3], 0xa4
5546 ; GFX11-NEXT: s_load_b512 s[4:19], s[2:3], 0x64
5547 ; GFX11-NEXT: s_load_b512 s[36:51], s[2:3], 0x24
5548 ; GFX11-NEXT: s_mov_b32 s32, 0
5549 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5550 ; GFX11-NEXT: s_add_i32 s22, s32, 8
5551 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5552 ; GFX11-NEXT: v_dual_mov_b32 v0, s21 :: v_dual_mov_b32 v1, s20
5553 ; GFX11-NEXT: v_mov_b32_e32 v2, s19
5554 ; GFX11-NEXT: s_add_i32 s19, s32, 4
5555 ; GFX11-NEXT: v_dual_mov_b32 v4, s40 :: v_dual_mov_b32 v7, s43
5556 ; GFX11-NEXT: scratch_store_b32 off, v0, s22
5557 ; GFX11-NEXT: scratch_store_b32 off, v1, s19
5558 ; GFX11-NEXT: scratch_store_b32 off, v2, s32
5559 ; GFX11-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s39
5560 ; GFX11-NEXT: v_dual_mov_b32 v1, s37 :: v_dual_mov_b32 v2, s38
5561 ; GFX11-NEXT: v_dual_mov_b32 v5, s41 :: v_dual_mov_b32 v6, s42
5562 ; GFX11-NEXT: v_dual_mov_b32 v9, s45 :: v_dual_mov_b32 v8, s44
5563 ; GFX11-NEXT: v_dual_mov_b32 v11, s47 :: v_dual_mov_b32 v10, s46
5564 ; GFX11-NEXT: v_dual_mov_b32 v13, s49 :: v_dual_mov_b32 v12, s48
5565 ; GFX11-NEXT: v_dual_mov_b32 v15, s51 :: v_dual_mov_b32 v14, s50
5566 ; GFX11-NEXT: v_dual_mov_b32 v17, s5 :: v_dual_mov_b32 v16, s4
5567 ; GFX11-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6
5568 ; GFX11-NEXT: v_dual_mov_b32 v21, s9 :: v_dual_mov_b32 v20, s8
5569 ; GFX11-NEXT: v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v22, s10
5570 ; GFX11-NEXT: v_dual_mov_b32 v25, s13 :: v_dual_mov_b32 v24, s12
5571 ; GFX11-NEXT: v_dual_mov_b32 v27, s15 :: v_dual_mov_b32 v26, s14
5572 ; GFX11-NEXT: v_dual_mov_b32 v29, s17 :: v_dual_mov_b32 v28, s16
5573 ; GFX11-NEXT: v_mov_b32_e32 v30, s18
5574 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
5575 ; GFX11-NEXT: s_getpc_b64 s[2:3]
5576 ; GFX11-NEXT: s_add_u32 s2, s2, stack_passed_f64_arg@rel32@lo+4
5577 ; GFX11-NEXT: s_addc_u32 s3, s3, stack_passed_f64_arg@rel32@hi+12
5578 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5579 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
5580 ; GFX11-NEXT: s_endpgm
5582 ; HSA-LABEL: stack_passed_arg_alignment_v32i32_f64:
5583 ; HSA: ; %bb.0: ; %entry
5584 ; HSA-NEXT: s_add_i32 s8, s8, s11
5585 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
5586 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
5587 ; HSA-NEXT: s_add_u32 s0, s0, s11
5588 ; HSA-NEXT: s_load_dwordx16 s[8:23], s[6:7], 0x40
5589 ; HSA-NEXT: s_load_dwordx2 s[24:25], s[6:7], 0x80
5590 ; HSA-NEXT: s_load_dwordx16 s[36:51], s[6:7], 0x0
5591 ; HSA-NEXT: s_mov_b32 s32, 0
5592 ; HSA-NEXT: s_addc_u32 s1, s1, 0
5593 ; HSA-NEXT: s_waitcnt lgkmcnt(0)
5594 ; HSA-NEXT: v_mov_b32_e32 v0, s23
5595 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
5596 ; HSA-NEXT: v_mov_b32_e32 v0, s24
5597 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5598 ; HSA-NEXT: v_mov_b32_e32 v0, s25
5599 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
5600 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
5601 ; HSA-NEXT: v_mov_b32_e32 v0, s36
5602 ; HSA-NEXT: v_mov_b32_e32 v1, s37
5603 ; HSA-NEXT: v_mov_b32_e32 v2, s38
5604 ; HSA-NEXT: v_mov_b32_e32 v3, s39
5605 ; HSA-NEXT: v_mov_b32_e32 v4, s40
5606 ; HSA-NEXT: v_mov_b32_e32 v5, s41
5607 ; HSA-NEXT: v_mov_b32_e32 v6, s42
5608 ; HSA-NEXT: v_mov_b32_e32 v7, s43
5609 ; HSA-NEXT: v_mov_b32_e32 v8, s44
5610 ; HSA-NEXT: v_mov_b32_e32 v9, s45
5611 ; HSA-NEXT: v_mov_b32_e32 v10, s46
5612 ; HSA-NEXT: v_mov_b32_e32 v11, s47
5613 ; HSA-NEXT: v_mov_b32_e32 v12, s48
5614 ; HSA-NEXT: v_mov_b32_e32 v13, s49
5615 ; HSA-NEXT: v_mov_b32_e32 v14, s50
5616 ; HSA-NEXT: v_mov_b32_e32 v15, s51
5617 ; HSA-NEXT: v_mov_b32_e32 v16, s8
5618 ; HSA-NEXT: v_mov_b32_e32 v17, s9
5619 ; HSA-NEXT: v_mov_b32_e32 v18, s10
5620 ; HSA-NEXT: v_mov_b32_e32 v19, s11
5621 ; HSA-NEXT: v_mov_b32_e32 v20, s12
5622 ; HSA-NEXT: v_mov_b32_e32 v21, s13
5623 ; HSA-NEXT: v_mov_b32_e32 v22, s14
5624 ; HSA-NEXT: v_mov_b32_e32 v23, s15
5625 ; HSA-NEXT: v_mov_b32_e32 v24, s16
5626 ; HSA-NEXT: v_mov_b32_e32 v25, s17
5627 ; HSA-NEXT: v_mov_b32_e32 v26, s18
5628 ; HSA-NEXT: v_mov_b32_e32 v27, s19
5629 ; HSA-NEXT: v_mov_b32_e32 v28, s20
5630 ; HSA-NEXT: v_mov_b32_e32 v29, s21
5631 ; HSA-NEXT: v_mov_b32_e32 v30, s22
5632 ; HSA-NEXT: s_getpc_b64 s[24:25]
5633 ; HSA-NEXT: s_add_u32 s24, s24, stack_passed_f64_arg@rel32@lo+4
5634 ; HSA-NEXT: s_addc_u32 s25, s25, stack_passed_f64_arg@rel32@hi+12
5635 ; HSA-NEXT: s_swappc_b64 s[30:31], s[24:25]
5636 ; HSA-NEXT: s_endpgm
5638 call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
5642 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
5643 ; VI-LABEL: tail_call_byval_align16:
5644 ; VI: ; %bb.0: ; %entry
5645 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5646 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
5647 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32
5648 ; VI-NEXT: s_getpc_b64 s[4:5]
5649 ; VI-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5650 ; VI-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5651 ; VI-NEXT: s_waitcnt vmcnt(1)
5652 ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
5653 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
5654 ; VI-NEXT: s_waitcnt vmcnt(2)
5655 ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32
5656 ; VI-NEXT: s_waitcnt vmcnt(1)
5657 ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
5658 ; VI-NEXT: s_setpc_b64 s[4:5]
5660 ; CI-LABEL: tail_call_byval_align16:
5661 ; CI: ; %bb.0: ; %entry
5662 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5663 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
5664 ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32
5665 ; CI-NEXT: s_getpc_b64 s[4:5]
5666 ; CI-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5667 ; CI-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5668 ; CI-NEXT: s_waitcnt vmcnt(1)
5669 ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
5670 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
5671 ; CI-NEXT: s_waitcnt vmcnt(2)
5672 ; CI-NEXT: buffer_store_dword v32, off, s[0:3], s32
5673 ; CI-NEXT: s_waitcnt vmcnt(1)
5674 ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
5675 ; CI-NEXT: s_setpc_b64 s[4:5]
5677 ; GFX9-LABEL: tail_call_byval_align16:
5678 ; GFX9: ; %bb.0: ; %entry
5679 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5680 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
5681 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32
5682 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5683 ; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5684 ; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5685 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5686 ; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
5687 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
5688 ; GFX9-NEXT: s_waitcnt vmcnt(2)
5689 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
5690 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5691 ; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
5692 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5694 ; GFX11-LABEL: tail_call_byval_align16:
5695 ; GFX11: ; %bb.0: ; %entry
5696 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5697 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
5698 ; GFX11-NEXT: s_getpc_b64 s[0:1]
5699 ; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
5700 ; GFX11-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
5701 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5702 ; GFX11-NEXT: scratch_store_b32 off, v31, s32
5703 ; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:24
5704 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5705 ; GFX11-NEXT: scratch_store_b64 off, v[31:32], s32 offset:16
5706 ; GFX11-NEXT: s_setpc_b64 s[0:1]
5708 ; HSA-LABEL: tail_call_byval_align16:
5709 ; HSA: ; %bb.0: ; %entry
5710 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5711 ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
5712 ; HSA-NEXT: buffer_load_dword v32, off, s[0:3], s32
5713 ; HSA-NEXT: s_getpc_b64 s[4:5]
5714 ; HSA-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5715 ; HSA-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5716 ; HSA-NEXT: s_waitcnt vmcnt(1)
5717 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
5718 ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
5719 ; HSA-NEXT: s_waitcnt vmcnt(2)
5720 ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32
5721 ; HSA-NEXT: s_waitcnt vmcnt(1)
5722 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
5723 ; HSA-NEXT: s_setpc_b64 s[4:5]
5725 %alloca = alloca double, align 8, addrspace(5)
5726 tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca)
5730 define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
5731 ; VI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5732 ; VI: ; %bb.0: ; %entry
5733 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5734 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
5735 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
5736 ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
5737 ; VI-NEXT: s_getpc_b64 s[4:5]
5738 ; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5739 ; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5740 ; VI-NEXT: s_waitcnt vmcnt(2)
5741 ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32
5742 ; VI-NEXT: s_waitcnt vmcnt(2)
5743 ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
5744 ; VI-NEXT: s_waitcnt vmcnt(2)
5745 ; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8
5746 ; VI-NEXT: s_setpc_b64 s[4:5]
5748 ; CI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5749 ; CI: ; %bb.0: ; %entry
5750 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5751 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
5752 ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
5753 ; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
5754 ; CI-NEXT: s_getpc_b64 s[4:5]
5755 ; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5756 ; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5757 ; CI-NEXT: s_waitcnt vmcnt(2)
5758 ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32
5759 ; CI-NEXT: s_waitcnt vmcnt(2)
5760 ; CI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
5761 ; CI-NEXT: s_waitcnt vmcnt(2)
5762 ; CI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8
5763 ; CI-NEXT: s_setpc_b64 s[4:5]
5765 ; GFX9-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5766 ; GFX9: ; %bb.0: ; %entry
5767 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5768 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
5769 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
5770 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
5771 ; GFX9-NEXT: s_getpc_b64 s[4:5]
5772 ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5773 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5774 ; GFX9-NEXT: s_waitcnt vmcnt(2)
5775 ; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32
5776 ; GFX9-NEXT: s_waitcnt vmcnt(2)
5777 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
5778 ; GFX9-NEXT: s_waitcnt vmcnt(2)
5779 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8
5780 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5782 ; GFX11-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5783 ; GFX11: ; %bb.0: ; %entry
5784 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5785 ; GFX11-NEXT: s_clause 0x1
5786 ; GFX11-NEXT: scratch_load_b32 v33, off, s32
5787 ; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:4
5788 ; GFX11-NEXT: s_getpc_b64 s[0:1]
5789 ; GFX11-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4
5790 ; GFX11-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12
5791 ; GFX11-NEXT: s_waitcnt vmcnt(1)
5792 ; GFX11-NEXT: scratch_store_b32 off, v33, s32
5793 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5794 ; GFX11-NEXT: scratch_store_b64 off, v[31:32], s32 offset:4
5795 ; GFX11-NEXT: s_setpc_b64 s[0:1]
5797 ; HSA-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5798 ; HSA: ; %bb.0: ; %entry
5799 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5800 ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32
5801 ; HSA-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
5802 ; HSA-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
5803 ; HSA-NEXT: s_getpc_b64 s[4:5]
5804 ; HSA-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5805 ; HSA-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5806 ; HSA-NEXT: s_waitcnt vmcnt(2)
5807 ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
5808 ; HSA-NEXT: s_waitcnt vmcnt(2)
5809 ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
5810 ; HSA-NEXT: s_waitcnt vmcnt(2)
5811 ; HSA-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8
5812 ; HSA-NEXT: s_setpc_b64 s[4:5]
5814 tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
5818 define void @stack_12xv3i32() #0 {
5819 ; VI-LABEL: stack_12xv3i32:
5820 ; VI: ; %bb.0: ; %entry
5821 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5822 ; VI-NEXT: s_mov_b32 s4, s33
5823 ; VI-NEXT: s_mov_b32 s33, s32
5824 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1
5825 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5826 ; VI-NEXT: s_mov_b64 exec, s[8:9]
5827 ; VI-NEXT: s_addk_i32 s32, 0x400
5828 ; VI-NEXT: v_mov_b32_e32 v0, 11
5829 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32
5830 ; VI-NEXT: v_mov_b32_e32 v0, 12
5831 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5832 ; VI-NEXT: v_mov_b32_e32 v0, 13
5833 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
5834 ; VI-NEXT: v_mov_b32_e32 v0, 14
5835 ; VI-NEXT: v_writelane_b32 v40, s4, 2
5836 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
5837 ; VI-NEXT: v_mov_b32_e32 v0, 15
5838 ; VI-NEXT: v_writelane_b32 v40, s30, 0
5839 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
5840 ; VI-NEXT: v_mov_b32_e32 v0, 0
5841 ; VI-NEXT: v_mov_b32_e32 v1, 0
5842 ; VI-NEXT: v_mov_b32_e32 v2, 0
5843 ; VI-NEXT: v_mov_b32_e32 v3, 1
5844 ; VI-NEXT: v_mov_b32_e32 v4, 1
5845 ; VI-NEXT: v_mov_b32_e32 v5, 1
5846 ; VI-NEXT: v_mov_b32_e32 v6, 2
5847 ; VI-NEXT: v_mov_b32_e32 v7, 2
5848 ; VI-NEXT: v_mov_b32_e32 v8, 2
5849 ; VI-NEXT: v_mov_b32_e32 v9, 3
5850 ; VI-NEXT: v_mov_b32_e32 v10, 3
5851 ; VI-NEXT: v_mov_b32_e32 v11, 3
5852 ; VI-NEXT: v_mov_b32_e32 v12, 4
5853 ; VI-NEXT: v_mov_b32_e32 v13, 4
5854 ; VI-NEXT: v_mov_b32_e32 v14, 4
5855 ; VI-NEXT: v_mov_b32_e32 v15, 5
5856 ; VI-NEXT: v_mov_b32_e32 v16, 5
5857 ; VI-NEXT: v_mov_b32_e32 v17, 5
5858 ; VI-NEXT: v_mov_b32_e32 v18, 6
5859 ; VI-NEXT: v_mov_b32_e32 v19, 6
5860 ; VI-NEXT: v_mov_b32_e32 v20, 6
5861 ; VI-NEXT: v_mov_b32_e32 v21, 7
5862 ; VI-NEXT: v_mov_b32_e32 v22, 7
5863 ; VI-NEXT: v_mov_b32_e32 v23, 7
5864 ; VI-NEXT: v_mov_b32_e32 v24, 8
5865 ; VI-NEXT: v_mov_b32_e32 v25, 8
5866 ; VI-NEXT: v_mov_b32_e32 v26, 8
5867 ; VI-NEXT: v_mov_b32_e32 v27, 9
5868 ; VI-NEXT: v_mov_b32_e32 v28, 9
5869 ; VI-NEXT: v_mov_b32_e32 v29, 9
5870 ; VI-NEXT: v_mov_b32_e32 v30, 10
5871 ; VI-NEXT: v_writelane_b32 v40, s31, 1
5872 ; VI-NEXT: s_getpc_b64 s[4:5]
5873 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
5874 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
5875 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5876 ; VI-NEXT: v_readlane_b32 s31, v40, 1
5877 ; VI-NEXT: v_readlane_b32 s30, v40, 0
5878 ; VI-NEXT: v_readlane_b32 s4, v40, 2
5879 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1
5880 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5881 ; VI-NEXT: s_mov_b64 exec, s[6:7]
5882 ; VI-NEXT: s_addk_i32 s32, 0xfc00
5883 ; VI-NEXT: s_mov_b32 s33, s4
5884 ; VI-NEXT: s_waitcnt vmcnt(0)
5885 ; VI-NEXT: s_setpc_b64 s[30:31]
5887 ; CI-LABEL: stack_12xv3i32:
5888 ; CI: ; %bb.0: ; %entry
5889 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5890 ; CI-NEXT: s_mov_b32 s4, s33
5891 ; CI-NEXT: s_mov_b32 s33, s32
5892 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1
5893 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5894 ; CI-NEXT: s_mov_b64 exec, s[8:9]
5895 ; CI-NEXT: s_addk_i32 s32, 0x400
5896 ; CI-NEXT: v_mov_b32_e32 v0, 11
5897 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32
5898 ; CI-NEXT: v_mov_b32_e32 v0, 12
5899 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5900 ; CI-NEXT: v_mov_b32_e32 v0, 13
5901 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
5902 ; CI-NEXT: v_mov_b32_e32 v0, 14
5903 ; CI-NEXT: v_writelane_b32 v40, s4, 2
5904 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
5905 ; CI-NEXT: v_mov_b32_e32 v0, 15
5906 ; CI-NEXT: v_writelane_b32 v40, s30, 0
5907 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
5908 ; CI-NEXT: v_mov_b32_e32 v0, 0
5909 ; CI-NEXT: v_mov_b32_e32 v1, 0
5910 ; CI-NEXT: v_mov_b32_e32 v2, 0
5911 ; CI-NEXT: v_mov_b32_e32 v3, 1
5912 ; CI-NEXT: v_mov_b32_e32 v4, 1
5913 ; CI-NEXT: v_mov_b32_e32 v5, 1
5914 ; CI-NEXT: v_mov_b32_e32 v6, 2
5915 ; CI-NEXT: v_mov_b32_e32 v7, 2
5916 ; CI-NEXT: v_mov_b32_e32 v8, 2
5917 ; CI-NEXT: v_mov_b32_e32 v9, 3
5918 ; CI-NEXT: v_mov_b32_e32 v10, 3
5919 ; CI-NEXT: v_mov_b32_e32 v11, 3
5920 ; CI-NEXT: v_mov_b32_e32 v12, 4
5921 ; CI-NEXT: v_mov_b32_e32 v13, 4
5922 ; CI-NEXT: v_mov_b32_e32 v14, 4
5923 ; CI-NEXT: v_mov_b32_e32 v15, 5
5924 ; CI-NEXT: v_mov_b32_e32 v16, 5
5925 ; CI-NEXT: v_mov_b32_e32 v17, 5
5926 ; CI-NEXT: v_mov_b32_e32 v18, 6
5927 ; CI-NEXT: v_mov_b32_e32 v19, 6
5928 ; CI-NEXT: v_mov_b32_e32 v20, 6
5929 ; CI-NEXT: v_mov_b32_e32 v21, 7
5930 ; CI-NEXT: v_mov_b32_e32 v22, 7
5931 ; CI-NEXT: v_mov_b32_e32 v23, 7
5932 ; CI-NEXT: v_mov_b32_e32 v24, 8
5933 ; CI-NEXT: v_mov_b32_e32 v25, 8
5934 ; CI-NEXT: v_mov_b32_e32 v26, 8
5935 ; CI-NEXT: v_mov_b32_e32 v27, 9
5936 ; CI-NEXT: v_mov_b32_e32 v28, 9
5937 ; CI-NEXT: v_mov_b32_e32 v29, 9
5938 ; CI-NEXT: v_mov_b32_e32 v30, 10
5939 ; CI-NEXT: v_writelane_b32 v40, s31, 1
5940 ; CI-NEXT: s_getpc_b64 s[4:5]
5941 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
5942 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
5943 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
5944 ; CI-NEXT: v_readlane_b32 s31, v40, 1
5945 ; CI-NEXT: v_readlane_b32 s30, v40, 0
5946 ; CI-NEXT: v_readlane_b32 s4, v40, 2
5947 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
5948 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5949 ; CI-NEXT: s_mov_b64 exec, s[6:7]
5950 ; CI-NEXT: s_addk_i32 s32, 0xfc00
5951 ; CI-NEXT: s_mov_b32 s33, s4
5952 ; CI-NEXT: s_waitcnt vmcnt(0)
5953 ; CI-NEXT: s_setpc_b64 s[30:31]
5955 ; GFX9-LABEL: stack_12xv3i32:
5956 ; GFX9: ; %bb.0: ; %entry
5957 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5958 ; GFX9-NEXT: s_mov_b32 s4, s33
5959 ; GFX9-NEXT: s_mov_b32 s33, s32
5960 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
5961 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5962 ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
5963 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5964 ; GFX9-NEXT: v_mov_b32_e32 v0, 11
5965 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
5966 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
5967 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
5968 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
5969 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
5970 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
5971 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2
5972 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
5973 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
5974 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5975 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
5976 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
5977 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
5978 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
5979 ; GFX9-NEXT: v_mov_b32_e32 v3, 1
5980 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
5981 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
5982 ; GFX9-NEXT: v_mov_b32_e32 v6, 2
5983 ; GFX9-NEXT: v_mov_b32_e32 v7, 2
5984 ; GFX9-NEXT: v_mov_b32_e32 v8, 2
5985 ; GFX9-NEXT: v_mov_b32_e32 v9, 3
5986 ; GFX9-NEXT: v_mov_b32_e32 v10, 3
5987 ; GFX9-NEXT: v_mov_b32_e32 v11, 3
5988 ; GFX9-NEXT: v_mov_b32_e32 v12, 4
5989 ; GFX9-NEXT: v_mov_b32_e32 v13, 4
5990 ; GFX9-NEXT: v_mov_b32_e32 v14, 4
5991 ; GFX9-NEXT: v_mov_b32_e32 v15, 5
5992 ; GFX9-NEXT: v_mov_b32_e32 v16, 5
5993 ; GFX9-NEXT: v_mov_b32_e32 v17, 5
5994 ; GFX9-NEXT: v_mov_b32_e32 v18, 6
5995 ; GFX9-NEXT: v_mov_b32_e32 v19, 6
5996 ; GFX9-NEXT: v_mov_b32_e32 v20, 6
5997 ; GFX9-NEXT: v_mov_b32_e32 v21, 7
5998 ; GFX9-NEXT: v_mov_b32_e32 v22, 7
5999 ; GFX9-NEXT: v_mov_b32_e32 v23, 7
6000 ; GFX9-NEXT: v_mov_b32_e32 v24, 8
6001 ; GFX9-NEXT: v_mov_b32_e32 v25, 8
6002 ; GFX9-NEXT: v_mov_b32_e32 v26, 8
6003 ; GFX9-NEXT: v_mov_b32_e32 v27, 9
6004 ; GFX9-NEXT: v_mov_b32_e32 v28, 9
6005 ; GFX9-NEXT: v_mov_b32_e32 v29, 9
6006 ; GFX9-NEXT: v_mov_b32_e32 v30, 10
6007 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6008 ; GFX9-NEXT: s_getpc_b64 s[4:5]
6009 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
6010 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
6011 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
6012 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6013 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6014 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
6015 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6016 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6017 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6018 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6019 ; GFX9-NEXT: s_mov_b32 s33, s4
6020 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6021 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6023 ; GFX11-LABEL: stack_12xv3i32:
6024 ; GFX11: ; %bb.0: ; %entry
6025 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6026 ; GFX11-NEXT: s_mov_b32 s0, s33
6027 ; GFX11-NEXT: s_mov_b32 s33, s32
6028 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6029 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6030 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6031 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6032 ; GFX11-NEXT: v_dual_mov_b32 v0, 11 :: v_dual_mov_b32 v1, 12
6033 ; GFX11-NEXT: v_dual_mov_b32 v2, 13 :: v_dual_mov_b32 v3, 14
6034 ; GFX11-NEXT: v_mov_b32_e32 v4, 15
6035 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6036 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6037 ; GFX11-NEXT: s_add_i32 s0, s32, 16
6038 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
6039 ; GFX11-NEXT: scratch_store_b32 off, v4, s0
6040 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0
6041 ; GFX11-NEXT: v_dual_mov_b32 v3, 1 :: v_dual_mov_b32 v2, 0
6042 ; GFX11-NEXT: v_dual_mov_b32 v5, 1 :: v_dual_mov_b32 v4, 1
6043 ; GFX11-NEXT: v_dual_mov_b32 v7, 2 :: v_dual_mov_b32 v6, 2
6044 ; GFX11-NEXT: v_dual_mov_b32 v9, 3 :: v_dual_mov_b32 v8, 2
6045 ; GFX11-NEXT: v_dual_mov_b32 v11, 3 :: v_dual_mov_b32 v10, 3
6046 ; GFX11-NEXT: v_dual_mov_b32 v13, 4 :: v_dual_mov_b32 v12, 4
6047 ; GFX11-NEXT: v_dual_mov_b32 v15, 5 :: v_dual_mov_b32 v14, 4
6048 ; GFX11-NEXT: v_dual_mov_b32 v17, 5 :: v_dual_mov_b32 v16, 5
6049 ; GFX11-NEXT: v_dual_mov_b32 v19, 6 :: v_dual_mov_b32 v18, 6
6050 ; GFX11-NEXT: v_dual_mov_b32 v21, 7 :: v_dual_mov_b32 v20, 6
6051 ; GFX11-NEXT: v_dual_mov_b32 v23, 7 :: v_dual_mov_b32 v22, 7
6052 ; GFX11-NEXT: v_dual_mov_b32 v25, 8 :: v_dual_mov_b32 v24, 8
6053 ; GFX11-NEXT: v_dual_mov_b32 v27, 9 :: v_dual_mov_b32 v26, 8
6054 ; GFX11-NEXT: v_dual_mov_b32 v29, 9 :: v_dual_mov_b32 v28, 9
6055 ; GFX11-NEXT: v_mov_b32_e32 v30, 10
6056 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6057 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6058 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
6059 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
6060 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6061 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6062 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6063 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6064 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6065 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6066 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6067 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6068 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6069 ; GFX11-NEXT: s_mov_b32 s33, s0
6070 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6071 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6073 ; HSA-LABEL: stack_12xv3i32:
6074 ; HSA: ; %bb.0: ; %entry
6075 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6076 ; HSA-NEXT: s_mov_b32 s4, s33
6077 ; HSA-NEXT: s_mov_b32 s33, s32
6078 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1
6079 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6080 ; HSA-NEXT: s_mov_b64 exec, s[8:9]
6081 ; HSA-NEXT: s_addk_i32 s32, 0x400
6082 ; HSA-NEXT: v_mov_b32_e32 v0, 11
6083 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
6084 ; HSA-NEXT: v_mov_b32_e32 v0, 12
6085 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6086 ; HSA-NEXT: v_mov_b32_e32 v0, 13
6087 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6088 ; HSA-NEXT: v_mov_b32_e32 v0, 14
6089 ; HSA-NEXT: v_writelane_b32 v40, s4, 2
6090 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6091 ; HSA-NEXT: v_mov_b32_e32 v0, 15
6092 ; HSA-NEXT: v_writelane_b32 v40, s30, 0
6093 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6094 ; HSA-NEXT: v_mov_b32_e32 v0, 0
6095 ; HSA-NEXT: v_mov_b32_e32 v1, 0
6096 ; HSA-NEXT: v_mov_b32_e32 v2, 0
6097 ; HSA-NEXT: v_mov_b32_e32 v3, 1
6098 ; HSA-NEXT: v_mov_b32_e32 v4, 1
6099 ; HSA-NEXT: v_mov_b32_e32 v5, 1
6100 ; HSA-NEXT: v_mov_b32_e32 v6, 2
6101 ; HSA-NEXT: v_mov_b32_e32 v7, 2
6102 ; HSA-NEXT: v_mov_b32_e32 v8, 2
6103 ; HSA-NEXT: v_mov_b32_e32 v9, 3
6104 ; HSA-NEXT: v_mov_b32_e32 v10, 3
6105 ; HSA-NEXT: v_mov_b32_e32 v11, 3
6106 ; HSA-NEXT: v_mov_b32_e32 v12, 4
6107 ; HSA-NEXT: v_mov_b32_e32 v13, 4
6108 ; HSA-NEXT: v_mov_b32_e32 v14, 4
6109 ; HSA-NEXT: v_mov_b32_e32 v15, 5
6110 ; HSA-NEXT: v_mov_b32_e32 v16, 5
6111 ; HSA-NEXT: v_mov_b32_e32 v17, 5
6112 ; HSA-NEXT: v_mov_b32_e32 v18, 6
6113 ; HSA-NEXT: v_mov_b32_e32 v19, 6
6114 ; HSA-NEXT: v_mov_b32_e32 v20, 6
6115 ; HSA-NEXT: v_mov_b32_e32 v21, 7
6116 ; HSA-NEXT: v_mov_b32_e32 v22, 7
6117 ; HSA-NEXT: v_mov_b32_e32 v23, 7
6118 ; HSA-NEXT: v_mov_b32_e32 v24, 8
6119 ; HSA-NEXT: v_mov_b32_e32 v25, 8
6120 ; HSA-NEXT: v_mov_b32_e32 v26, 8
6121 ; HSA-NEXT: v_mov_b32_e32 v27, 9
6122 ; HSA-NEXT: v_mov_b32_e32 v28, 9
6123 ; HSA-NEXT: v_mov_b32_e32 v29, 9
6124 ; HSA-NEXT: v_mov_b32_e32 v30, 10
6125 ; HSA-NEXT: v_writelane_b32 v40, s31, 1
6126 ; HSA-NEXT: s_getpc_b64 s[4:5]
6127 ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
6128 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
6129 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5]
6130 ; HSA-NEXT: v_readlane_b32 s31, v40, 1
6131 ; HSA-NEXT: v_readlane_b32 s30, v40, 0
6132 ; HSA-NEXT: v_readlane_b32 s4, v40, 2
6133 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1
6134 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6135 ; HSA-NEXT: s_mov_b64 exec, s[6:7]
6136 ; HSA-NEXT: s_addk_i32 s32, 0xfc00
6137 ; HSA-NEXT: s_mov_b32 s33, s4
6138 ; HSA-NEXT: s_waitcnt vmcnt(0)
6139 ; HSA-NEXT: s_setpc_b64 s[30:31]
6141 call void @external_void_func_12xv3i32(
6142 <3 x i32><i32 0, i32 0, i32 0>,
6143 <3 x i32><i32 1, i32 1, i32 1>,
6144 <3 x i32><i32 2, i32 2, i32 2>,
6145 <3 x i32><i32 3, i32 3, i32 3>,
6146 <3 x i32><i32 4, i32 4, i32 4>,
6147 <3 x i32><i32 5, i32 5, i32 5>,
6148 <3 x i32><i32 6, i32 6, i32 6>,
6149 <3 x i32><i32 7, i32 7, i32 7>,
6150 <3 x i32><i32 8, i32 8, i32 8>,
6151 <3 x i32><i32 9, i32 9, i32 9>,
6152 <3 x i32><i32 10, i32 11, i32 12>,
6153 <3 x i32><i32 13, i32 14, i32 15>)
6157 define void @stack_12xv3f32() #0 {
6158 ; VI-LABEL: stack_12xv3f32:
6159 ; VI: ; %bb.0: ; %entry
6160 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6161 ; VI-NEXT: s_mov_b32 s4, s33
6162 ; VI-NEXT: s_mov_b32 s33, s32
6163 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1
6164 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6165 ; VI-NEXT: s_mov_b64 exec, s[8:9]
6166 ; VI-NEXT: s_addk_i32 s32, 0x400
6167 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000
6168 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6169 ; VI-NEXT: v_mov_b32_e32 v0, 0x41400000
6170 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6171 ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000
6172 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6173 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000
6174 ; VI-NEXT: v_writelane_b32 v40, s4, 2
6175 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6176 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000
6177 ; VI-NEXT: v_writelane_b32 v40, s30, 0
6178 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6179 ; VI-NEXT: v_mov_b32_e32 v0, 0
6180 ; VI-NEXT: v_mov_b32_e32 v1, 0
6181 ; VI-NEXT: v_mov_b32_e32 v2, 0
6182 ; VI-NEXT: v_mov_b32_e32 v3, 1.0
6183 ; VI-NEXT: v_mov_b32_e32 v4, 1.0
6184 ; VI-NEXT: v_mov_b32_e32 v5, 1.0
6185 ; VI-NEXT: v_mov_b32_e32 v6, 2.0
6186 ; VI-NEXT: v_mov_b32_e32 v7, 2.0
6187 ; VI-NEXT: v_mov_b32_e32 v8, 2.0
6188 ; VI-NEXT: v_mov_b32_e32 v9, 0x40400000
6189 ; VI-NEXT: v_mov_b32_e32 v10, 0x40400000
6190 ; VI-NEXT: v_mov_b32_e32 v11, 0x40400000
6191 ; VI-NEXT: v_mov_b32_e32 v12, 4.0
6192 ; VI-NEXT: v_mov_b32_e32 v13, 4.0
6193 ; VI-NEXT: v_mov_b32_e32 v14, 4.0
6194 ; VI-NEXT: v_mov_b32_e32 v15, 0x40a00000
6195 ; VI-NEXT: v_mov_b32_e32 v16, 0x40a00000
6196 ; VI-NEXT: v_mov_b32_e32 v17, 0x40a00000
6197 ; VI-NEXT: v_mov_b32_e32 v18, 0x40c00000
6198 ; VI-NEXT: v_mov_b32_e32 v19, 0x40c00000
6199 ; VI-NEXT: v_mov_b32_e32 v20, 0x40c00000
6200 ; VI-NEXT: v_mov_b32_e32 v21, 0x40e00000
6201 ; VI-NEXT: v_mov_b32_e32 v22, 0x40e00000
6202 ; VI-NEXT: v_mov_b32_e32 v23, 0x40e00000
6203 ; VI-NEXT: v_mov_b32_e32 v24, 0x41000000
6204 ; VI-NEXT: v_mov_b32_e32 v25, 0x41000000
6205 ; VI-NEXT: v_mov_b32_e32 v26, 0x41000000
6206 ; VI-NEXT: v_mov_b32_e32 v27, 0x41100000
6207 ; VI-NEXT: v_mov_b32_e32 v28, 0x41100000
6208 ; VI-NEXT: v_mov_b32_e32 v29, 0x41100000
6209 ; VI-NEXT: v_mov_b32_e32 v30, 0x41200000
6210 ; VI-NEXT: v_writelane_b32 v40, s31, 1
6211 ; VI-NEXT: s_getpc_b64 s[4:5]
6212 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6213 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6214 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6215 ; VI-NEXT: v_readlane_b32 s31, v40, 1
6216 ; VI-NEXT: v_readlane_b32 s30, v40, 0
6217 ; VI-NEXT: v_readlane_b32 s4, v40, 2
6218 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1
6219 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6220 ; VI-NEXT: s_mov_b64 exec, s[6:7]
6221 ; VI-NEXT: s_addk_i32 s32, 0xfc00
6222 ; VI-NEXT: s_mov_b32 s33, s4
6223 ; VI-NEXT: s_waitcnt vmcnt(0)
6224 ; VI-NEXT: s_setpc_b64 s[30:31]
6226 ; CI-LABEL: stack_12xv3f32:
6227 ; CI: ; %bb.0: ; %entry
6228 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6229 ; CI-NEXT: s_mov_b32 s4, s33
6230 ; CI-NEXT: s_mov_b32 s33, s32
6231 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1
6232 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6233 ; CI-NEXT: s_mov_b64 exec, s[8:9]
6234 ; CI-NEXT: s_addk_i32 s32, 0x400
6235 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000
6236 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6237 ; CI-NEXT: v_mov_b32_e32 v0, 0x41400000
6238 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6239 ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000
6240 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6241 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000
6242 ; CI-NEXT: v_writelane_b32 v40, s4, 2
6243 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6244 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000
6245 ; CI-NEXT: v_writelane_b32 v40, s30, 0
6246 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6247 ; CI-NEXT: v_mov_b32_e32 v0, 0
6248 ; CI-NEXT: v_mov_b32_e32 v1, 0
6249 ; CI-NEXT: v_mov_b32_e32 v2, 0
6250 ; CI-NEXT: v_mov_b32_e32 v3, 1.0
6251 ; CI-NEXT: v_mov_b32_e32 v4, 1.0
6252 ; CI-NEXT: v_mov_b32_e32 v5, 1.0
6253 ; CI-NEXT: v_mov_b32_e32 v6, 2.0
6254 ; CI-NEXT: v_mov_b32_e32 v7, 2.0
6255 ; CI-NEXT: v_mov_b32_e32 v8, 2.0
6256 ; CI-NEXT: v_mov_b32_e32 v9, 0x40400000
6257 ; CI-NEXT: v_mov_b32_e32 v10, 0x40400000
6258 ; CI-NEXT: v_mov_b32_e32 v11, 0x40400000
6259 ; CI-NEXT: v_mov_b32_e32 v12, 4.0
6260 ; CI-NEXT: v_mov_b32_e32 v13, 4.0
6261 ; CI-NEXT: v_mov_b32_e32 v14, 4.0
6262 ; CI-NEXT: v_mov_b32_e32 v15, 0x40a00000
6263 ; CI-NEXT: v_mov_b32_e32 v16, 0x40a00000
6264 ; CI-NEXT: v_mov_b32_e32 v17, 0x40a00000
6265 ; CI-NEXT: v_mov_b32_e32 v18, 0x40c00000
6266 ; CI-NEXT: v_mov_b32_e32 v19, 0x40c00000
6267 ; CI-NEXT: v_mov_b32_e32 v20, 0x40c00000
6268 ; CI-NEXT: v_mov_b32_e32 v21, 0x40e00000
6269 ; CI-NEXT: v_mov_b32_e32 v22, 0x40e00000
6270 ; CI-NEXT: v_mov_b32_e32 v23, 0x40e00000
6271 ; CI-NEXT: v_mov_b32_e32 v24, 0x41000000
6272 ; CI-NEXT: v_mov_b32_e32 v25, 0x41000000
6273 ; CI-NEXT: v_mov_b32_e32 v26, 0x41000000
6274 ; CI-NEXT: v_mov_b32_e32 v27, 0x41100000
6275 ; CI-NEXT: v_mov_b32_e32 v28, 0x41100000
6276 ; CI-NEXT: v_mov_b32_e32 v29, 0x41100000
6277 ; CI-NEXT: v_mov_b32_e32 v30, 0x41200000
6278 ; CI-NEXT: v_writelane_b32 v40, s31, 1
6279 ; CI-NEXT: s_getpc_b64 s[4:5]
6280 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6281 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6282 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6283 ; CI-NEXT: v_readlane_b32 s31, v40, 1
6284 ; CI-NEXT: v_readlane_b32 s30, v40, 0
6285 ; CI-NEXT: v_readlane_b32 s4, v40, 2
6286 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
6287 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6288 ; CI-NEXT: s_mov_b64 exec, s[6:7]
6289 ; CI-NEXT: s_addk_i32 s32, 0xfc00
6290 ; CI-NEXT: s_mov_b32 s33, s4
6291 ; CI-NEXT: s_waitcnt vmcnt(0)
6292 ; CI-NEXT: s_setpc_b64 s[30:31]
6294 ; GFX9-LABEL: stack_12xv3f32:
6295 ; GFX9: ; %bb.0: ; %entry
6296 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6297 ; GFX9-NEXT: s_mov_b32 s4, s33
6298 ; GFX9-NEXT: s_mov_b32 s33, s32
6299 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
6300 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6301 ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
6302 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6303 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000
6304 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
6305 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000
6306 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6307 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000
6308 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6309 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000
6310 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2
6311 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6312 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000
6313 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6314 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6315 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
6316 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
6317 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
6318 ; GFX9-NEXT: v_mov_b32_e32 v3, 1.0
6319 ; GFX9-NEXT: v_mov_b32_e32 v4, 1.0
6320 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0
6321 ; GFX9-NEXT: v_mov_b32_e32 v6, 2.0
6322 ; GFX9-NEXT: v_mov_b32_e32 v7, 2.0
6323 ; GFX9-NEXT: v_mov_b32_e32 v8, 2.0
6324 ; GFX9-NEXT: v_mov_b32_e32 v9, 0x40400000
6325 ; GFX9-NEXT: v_mov_b32_e32 v10, 0x40400000
6326 ; GFX9-NEXT: v_mov_b32_e32 v11, 0x40400000
6327 ; GFX9-NEXT: v_mov_b32_e32 v12, 4.0
6328 ; GFX9-NEXT: v_mov_b32_e32 v13, 4.0
6329 ; GFX9-NEXT: v_mov_b32_e32 v14, 4.0
6330 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40a00000
6331 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40a00000
6332 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40a00000
6333 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40c00000
6334 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40c00000
6335 ; GFX9-NEXT: v_mov_b32_e32 v20, 0x40c00000
6336 ; GFX9-NEXT: v_mov_b32_e32 v21, 0x40e00000
6337 ; GFX9-NEXT: v_mov_b32_e32 v22, 0x40e00000
6338 ; GFX9-NEXT: v_mov_b32_e32 v23, 0x40e00000
6339 ; GFX9-NEXT: v_mov_b32_e32 v24, 0x41000000
6340 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x41000000
6341 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x41000000
6342 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x41100000
6343 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x41100000
6344 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x41100000
6345 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x41200000
6346 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6347 ; GFX9-NEXT: s_getpc_b64 s[4:5]
6348 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6349 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6350 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
6351 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6352 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6353 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
6354 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6355 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6356 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6357 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6358 ; GFX9-NEXT: s_mov_b32 s33, s4
6359 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6360 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6362 ; GFX11-LABEL: stack_12xv3f32:
6363 ; GFX11: ; %bb.0: ; %entry
6364 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6365 ; GFX11-NEXT: s_mov_b32 s0, s33
6366 ; GFX11-NEXT: s_mov_b32 s33, s32
6367 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6368 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6369 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6370 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6371 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41300000
6372 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41400000
6373 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41500000
6374 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41600000
6375 ; GFX11-NEXT: v_dual_mov_b32 v4, 0x41700000 :: v_dual_mov_b32 v5, 1.0
6376 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6377 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6378 ; GFX11-NEXT: s_add_i32 s0, s32, 16
6379 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
6380 ; GFX11-NEXT: scratch_store_b32 off, v4, s0
6381 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
6382 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1.0
6383 ; GFX11-NEXT: v_dual_mov_b32 v4, 1.0 :: v_dual_mov_b32 v7, 2.0
6384 ; GFX11-NEXT: v_dual_mov_b32 v6, 2.0 :: v_dual_mov_b32 v9, 0x40400000
6385 ; GFX11-NEXT: v_dual_mov_b32 v8, 2.0 :: v_dual_mov_b32 v11, 0x40400000
6386 ; GFX11-NEXT: v_dual_mov_b32 v10, 0x40400000 :: v_dual_mov_b32 v13, 4.0
6387 ; GFX11-NEXT: v_dual_mov_b32 v12, 4.0 :: v_dual_mov_b32 v15, 0x40a00000
6388 ; GFX11-NEXT: v_dual_mov_b32 v14, 4.0 :: v_dual_mov_b32 v17, 0x40a00000
6389 ; GFX11-NEXT: v_mov_b32_e32 v16, 0x40a00000
6390 ; GFX11-NEXT: v_dual_mov_b32 v18, 0x40c00000 :: v_dual_mov_b32 v19, 0x40c00000
6391 ; GFX11-NEXT: v_mov_b32_e32 v20, 0x40c00000
6392 ; GFX11-NEXT: v_dual_mov_b32 v21, 0x40e00000 :: v_dual_mov_b32 v22, 0x40e00000
6393 ; GFX11-NEXT: v_mov_b32_e32 v23, 0x40e00000
6394 ; GFX11-NEXT: v_dual_mov_b32 v24, 0x41000000 :: v_dual_mov_b32 v25, 0x41000000
6395 ; GFX11-NEXT: v_mov_b32_e32 v26, 0x41000000
6396 ; GFX11-NEXT: v_dual_mov_b32 v27, 0x41100000 :: v_dual_mov_b32 v28, 0x41100000
6397 ; GFX11-NEXT: v_mov_b32_e32 v29, 0x41100000
6398 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x41200000
6399 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6400 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6401 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3f32@rel32@lo+4
6402 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3f32@rel32@hi+12
6403 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6404 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6405 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6406 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6407 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6408 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6409 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6410 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6411 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6412 ; GFX11-NEXT: s_mov_b32 s33, s0
6413 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6414 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6416 ; HSA-LABEL: stack_12xv3f32:
6417 ; HSA: ; %bb.0: ; %entry
6418 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6419 ; HSA-NEXT: s_mov_b32 s4, s33
6420 ; HSA-NEXT: s_mov_b32 s33, s32
6421 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1
6422 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6423 ; HSA-NEXT: s_mov_b64 exec, s[8:9]
6424 ; HSA-NEXT: s_addk_i32 s32, 0x400
6425 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000
6426 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
6427 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000
6428 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6429 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000
6430 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6431 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000
6432 ; HSA-NEXT: v_writelane_b32 v40, s4, 2
6433 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6434 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000
6435 ; HSA-NEXT: v_writelane_b32 v40, s30, 0
6436 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6437 ; HSA-NEXT: v_mov_b32_e32 v0, 0
6438 ; HSA-NEXT: v_mov_b32_e32 v1, 0
6439 ; HSA-NEXT: v_mov_b32_e32 v2, 0
6440 ; HSA-NEXT: v_mov_b32_e32 v3, 1.0
6441 ; HSA-NEXT: v_mov_b32_e32 v4, 1.0
6442 ; HSA-NEXT: v_mov_b32_e32 v5, 1.0
6443 ; HSA-NEXT: v_mov_b32_e32 v6, 2.0
6444 ; HSA-NEXT: v_mov_b32_e32 v7, 2.0
6445 ; HSA-NEXT: v_mov_b32_e32 v8, 2.0
6446 ; HSA-NEXT: v_mov_b32_e32 v9, 0x40400000
6447 ; HSA-NEXT: v_mov_b32_e32 v10, 0x40400000
6448 ; HSA-NEXT: v_mov_b32_e32 v11, 0x40400000
6449 ; HSA-NEXT: v_mov_b32_e32 v12, 4.0
6450 ; HSA-NEXT: v_mov_b32_e32 v13, 4.0
6451 ; HSA-NEXT: v_mov_b32_e32 v14, 4.0
6452 ; HSA-NEXT: v_mov_b32_e32 v15, 0x40a00000
6453 ; HSA-NEXT: v_mov_b32_e32 v16, 0x40a00000
6454 ; HSA-NEXT: v_mov_b32_e32 v17, 0x40a00000
6455 ; HSA-NEXT: v_mov_b32_e32 v18, 0x40c00000
6456 ; HSA-NEXT: v_mov_b32_e32 v19, 0x40c00000
6457 ; HSA-NEXT: v_mov_b32_e32 v20, 0x40c00000
6458 ; HSA-NEXT: v_mov_b32_e32 v21, 0x40e00000
6459 ; HSA-NEXT: v_mov_b32_e32 v22, 0x40e00000
6460 ; HSA-NEXT: v_mov_b32_e32 v23, 0x40e00000
6461 ; HSA-NEXT: v_mov_b32_e32 v24, 0x41000000
6462 ; HSA-NEXT: v_mov_b32_e32 v25, 0x41000000
6463 ; HSA-NEXT: v_mov_b32_e32 v26, 0x41000000
6464 ; HSA-NEXT: v_mov_b32_e32 v27, 0x41100000
6465 ; HSA-NEXT: v_mov_b32_e32 v28, 0x41100000
6466 ; HSA-NEXT: v_mov_b32_e32 v29, 0x41100000
6467 ; HSA-NEXT: v_mov_b32_e32 v30, 0x41200000
6468 ; HSA-NEXT: v_writelane_b32 v40, s31, 1
6469 ; HSA-NEXT: s_getpc_b64 s[4:5]
6470 ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6471 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6472 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5]
6473 ; HSA-NEXT: v_readlane_b32 s31, v40, 1
6474 ; HSA-NEXT: v_readlane_b32 s30, v40, 0
6475 ; HSA-NEXT: v_readlane_b32 s4, v40, 2
6476 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1
6477 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6478 ; HSA-NEXT: s_mov_b64 exec, s[6:7]
6479 ; HSA-NEXT: s_addk_i32 s32, 0xfc00
6480 ; HSA-NEXT: s_mov_b32 s33, s4
6481 ; HSA-NEXT: s_waitcnt vmcnt(0)
6482 ; HSA-NEXT: s_setpc_b64 s[30:31]
6484 call void @external_void_func_12xv3f32(
6485 <3 x float><float 0.0, float 0.0, float 0.0>,
6486 <3 x float><float 1.0, float 1.0, float 1.0>,
6487 <3 x float><float 2.0, float 2.0, float 2.0>,
6488 <3 x float><float 3.0, float 3.0, float 3.0>,
6489 <3 x float><float 4.0, float 4.0, float 4.0>,
6490 <3 x float><float 5.0, float 5.0, float 5.0>,
6491 <3 x float><float 6.0, float 6.0, float 6.0>,
6492 <3 x float><float 7.0, float 7.0, float 7.0>,
6493 <3 x float><float 8.0, float 8.0, float 8.0>,
6494 <3 x float><float 9.0, float 9.0, float 9.0>,
6495 <3 x float><float 10.0, float 11.0, float 12.0>,
6496 <3 x float><float 13.0, float 14.0, float 15.0>)
6500 define void @stack_8xv5i32() #0 {
6501 ; VI-LABEL: stack_8xv5i32:
6502 ; VI: ; %bb.0: ; %entry
6503 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6504 ; VI-NEXT: s_mov_b32 s4, s33
6505 ; VI-NEXT: s_mov_b32 s33, s32
6506 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1
6507 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6508 ; VI-NEXT: s_mov_b64 exec, s[8:9]
6509 ; VI-NEXT: s_addk_i32 s32, 0x400
6510 ; VI-NEXT: v_mov_b32_e32 v0, 7
6511 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6512 ; VI-NEXT: v_mov_b32_e32 v0, 8
6513 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6514 ; VI-NEXT: v_mov_b32_e32 v0, 9
6515 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6516 ; VI-NEXT: v_mov_b32_e32 v0, 10
6517 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6518 ; VI-NEXT: v_mov_b32_e32 v0, 11
6519 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6520 ; VI-NEXT: v_mov_b32_e32 v0, 12
6521 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6522 ; VI-NEXT: v_mov_b32_e32 v0, 13
6523 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6524 ; VI-NEXT: v_mov_b32_e32 v0, 14
6525 ; VI-NEXT: v_writelane_b32 v40, s4, 2
6526 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6527 ; VI-NEXT: v_mov_b32_e32 v0, 15
6528 ; VI-NEXT: v_writelane_b32 v40, s30, 0
6529 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6530 ; VI-NEXT: v_mov_b32_e32 v0, 0
6531 ; VI-NEXT: v_mov_b32_e32 v1, 0
6532 ; VI-NEXT: v_mov_b32_e32 v2, 0
6533 ; VI-NEXT: v_mov_b32_e32 v3, 0
6534 ; VI-NEXT: v_mov_b32_e32 v4, 0
6535 ; VI-NEXT: v_mov_b32_e32 v5, 1
6536 ; VI-NEXT: v_mov_b32_e32 v6, 1
6537 ; VI-NEXT: v_mov_b32_e32 v7, 1
6538 ; VI-NEXT: v_mov_b32_e32 v8, 1
6539 ; VI-NEXT: v_mov_b32_e32 v9, 1
6540 ; VI-NEXT: v_mov_b32_e32 v10, 2
6541 ; VI-NEXT: v_mov_b32_e32 v11, 2
6542 ; VI-NEXT: v_mov_b32_e32 v12, 2
6543 ; VI-NEXT: v_mov_b32_e32 v13, 2
6544 ; VI-NEXT: v_mov_b32_e32 v14, 2
6545 ; VI-NEXT: v_mov_b32_e32 v15, 3
6546 ; VI-NEXT: v_mov_b32_e32 v16, 3
6547 ; VI-NEXT: v_mov_b32_e32 v17, 3
6548 ; VI-NEXT: v_mov_b32_e32 v18, 3
6549 ; VI-NEXT: v_mov_b32_e32 v19, 3
6550 ; VI-NEXT: v_mov_b32_e32 v20, 4
6551 ; VI-NEXT: v_mov_b32_e32 v21, 4
6552 ; VI-NEXT: v_mov_b32_e32 v22, 4
6553 ; VI-NEXT: v_mov_b32_e32 v23, 4
6554 ; VI-NEXT: v_mov_b32_e32 v24, 4
6555 ; VI-NEXT: v_mov_b32_e32 v25, 5
6556 ; VI-NEXT: v_mov_b32_e32 v26, 5
6557 ; VI-NEXT: v_mov_b32_e32 v27, 5
6558 ; VI-NEXT: v_mov_b32_e32 v28, 5
6559 ; VI-NEXT: v_mov_b32_e32 v29, 5
6560 ; VI-NEXT: v_mov_b32_e32 v30, 6
6561 ; VI-NEXT: v_writelane_b32 v40, s31, 1
6562 ; VI-NEXT: s_getpc_b64 s[4:5]
6563 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6564 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6565 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6566 ; VI-NEXT: v_readlane_b32 s31, v40, 1
6567 ; VI-NEXT: v_readlane_b32 s30, v40, 0
6568 ; VI-NEXT: v_readlane_b32 s4, v40, 2
6569 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1
6570 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6571 ; VI-NEXT: s_mov_b64 exec, s[6:7]
6572 ; VI-NEXT: s_addk_i32 s32, 0xfc00
6573 ; VI-NEXT: s_mov_b32 s33, s4
6574 ; VI-NEXT: s_waitcnt vmcnt(0)
6575 ; VI-NEXT: s_setpc_b64 s[30:31]
6577 ; CI-LABEL: stack_8xv5i32:
6578 ; CI: ; %bb.0: ; %entry
6579 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6580 ; CI-NEXT: s_mov_b32 s4, s33
6581 ; CI-NEXT: s_mov_b32 s33, s32
6582 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1
6583 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6584 ; CI-NEXT: s_mov_b64 exec, s[8:9]
6585 ; CI-NEXT: s_addk_i32 s32, 0x400
6586 ; CI-NEXT: v_mov_b32_e32 v0, 7
6587 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6588 ; CI-NEXT: v_mov_b32_e32 v0, 8
6589 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6590 ; CI-NEXT: v_mov_b32_e32 v0, 9
6591 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6592 ; CI-NEXT: v_mov_b32_e32 v0, 10
6593 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6594 ; CI-NEXT: v_mov_b32_e32 v0, 11
6595 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6596 ; CI-NEXT: v_mov_b32_e32 v0, 12
6597 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6598 ; CI-NEXT: v_mov_b32_e32 v0, 13
6599 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6600 ; CI-NEXT: v_mov_b32_e32 v0, 14
6601 ; CI-NEXT: v_writelane_b32 v40, s4, 2
6602 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6603 ; CI-NEXT: v_mov_b32_e32 v0, 15
6604 ; CI-NEXT: v_writelane_b32 v40, s30, 0
6605 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6606 ; CI-NEXT: v_mov_b32_e32 v0, 0
6607 ; CI-NEXT: v_mov_b32_e32 v1, 0
6608 ; CI-NEXT: v_mov_b32_e32 v2, 0
6609 ; CI-NEXT: v_mov_b32_e32 v3, 0
6610 ; CI-NEXT: v_mov_b32_e32 v4, 0
6611 ; CI-NEXT: v_mov_b32_e32 v5, 1
6612 ; CI-NEXT: v_mov_b32_e32 v6, 1
6613 ; CI-NEXT: v_mov_b32_e32 v7, 1
6614 ; CI-NEXT: v_mov_b32_e32 v8, 1
6615 ; CI-NEXT: v_mov_b32_e32 v9, 1
6616 ; CI-NEXT: v_mov_b32_e32 v10, 2
6617 ; CI-NEXT: v_mov_b32_e32 v11, 2
6618 ; CI-NEXT: v_mov_b32_e32 v12, 2
6619 ; CI-NEXT: v_mov_b32_e32 v13, 2
6620 ; CI-NEXT: v_mov_b32_e32 v14, 2
6621 ; CI-NEXT: v_mov_b32_e32 v15, 3
6622 ; CI-NEXT: v_mov_b32_e32 v16, 3
6623 ; CI-NEXT: v_mov_b32_e32 v17, 3
6624 ; CI-NEXT: v_mov_b32_e32 v18, 3
6625 ; CI-NEXT: v_mov_b32_e32 v19, 3
6626 ; CI-NEXT: v_mov_b32_e32 v20, 4
6627 ; CI-NEXT: v_mov_b32_e32 v21, 4
6628 ; CI-NEXT: v_mov_b32_e32 v22, 4
6629 ; CI-NEXT: v_mov_b32_e32 v23, 4
6630 ; CI-NEXT: v_mov_b32_e32 v24, 4
6631 ; CI-NEXT: v_mov_b32_e32 v25, 5
6632 ; CI-NEXT: v_mov_b32_e32 v26, 5
6633 ; CI-NEXT: v_mov_b32_e32 v27, 5
6634 ; CI-NEXT: v_mov_b32_e32 v28, 5
6635 ; CI-NEXT: v_mov_b32_e32 v29, 5
6636 ; CI-NEXT: v_mov_b32_e32 v30, 6
6637 ; CI-NEXT: v_writelane_b32 v40, s31, 1
6638 ; CI-NEXT: s_getpc_b64 s[4:5]
6639 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6640 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6641 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6642 ; CI-NEXT: v_readlane_b32 s31, v40, 1
6643 ; CI-NEXT: v_readlane_b32 s30, v40, 0
6644 ; CI-NEXT: v_readlane_b32 s4, v40, 2
6645 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
6646 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6647 ; CI-NEXT: s_mov_b64 exec, s[6:7]
6648 ; CI-NEXT: s_addk_i32 s32, 0xfc00
6649 ; CI-NEXT: s_mov_b32 s33, s4
6650 ; CI-NEXT: s_waitcnt vmcnt(0)
6651 ; CI-NEXT: s_setpc_b64 s[30:31]
6653 ; GFX9-LABEL: stack_8xv5i32:
6654 ; GFX9: ; %bb.0: ; %entry
6655 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6656 ; GFX9-NEXT: s_mov_b32 s4, s33
6657 ; GFX9-NEXT: s_mov_b32 s33, s32
6658 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
6659 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6660 ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
6661 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6662 ; GFX9-NEXT: v_mov_b32_e32 v0, 7
6663 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
6664 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
6665 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6666 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
6667 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6668 ; GFX9-NEXT: v_mov_b32_e32 v0, 10
6669 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6670 ; GFX9-NEXT: v_mov_b32_e32 v0, 11
6671 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6672 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
6673 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6674 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
6675 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6676 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
6677 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2
6678 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6679 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
6680 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6681 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6682 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
6683 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
6684 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
6685 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
6686 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
6687 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
6688 ; GFX9-NEXT: v_mov_b32_e32 v6, 1
6689 ; GFX9-NEXT: v_mov_b32_e32 v7, 1
6690 ; GFX9-NEXT: v_mov_b32_e32 v8, 1
6691 ; GFX9-NEXT: v_mov_b32_e32 v9, 1
6692 ; GFX9-NEXT: v_mov_b32_e32 v10, 2
6693 ; GFX9-NEXT: v_mov_b32_e32 v11, 2
6694 ; GFX9-NEXT: v_mov_b32_e32 v12, 2
6695 ; GFX9-NEXT: v_mov_b32_e32 v13, 2
6696 ; GFX9-NEXT: v_mov_b32_e32 v14, 2
6697 ; GFX9-NEXT: v_mov_b32_e32 v15, 3
6698 ; GFX9-NEXT: v_mov_b32_e32 v16, 3
6699 ; GFX9-NEXT: v_mov_b32_e32 v17, 3
6700 ; GFX9-NEXT: v_mov_b32_e32 v18, 3
6701 ; GFX9-NEXT: v_mov_b32_e32 v19, 3
6702 ; GFX9-NEXT: v_mov_b32_e32 v20, 4
6703 ; GFX9-NEXT: v_mov_b32_e32 v21, 4
6704 ; GFX9-NEXT: v_mov_b32_e32 v22, 4
6705 ; GFX9-NEXT: v_mov_b32_e32 v23, 4
6706 ; GFX9-NEXT: v_mov_b32_e32 v24, 4
6707 ; GFX9-NEXT: v_mov_b32_e32 v25, 5
6708 ; GFX9-NEXT: v_mov_b32_e32 v26, 5
6709 ; GFX9-NEXT: v_mov_b32_e32 v27, 5
6710 ; GFX9-NEXT: v_mov_b32_e32 v28, 5
6711 ; GFX9-NEXT: v_mov_b32_e32 v29, 5
6712 ; GFX9-NEXT: v_mov_b32_e32 v30, 6
6713 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6714 ; GFX9-NEXT: s_getpc_b64 s[4:5]
6715 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6716 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6717 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
6718 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6719 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6720 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
6721 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6722 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6723 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6724 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6725 ; GFX9-NEXT: s_mov_b32 s33, s4
6726 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6727 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6729 ; GFX11-LABEL: stack_8xv5i32:
6730 ; GFX11: ; %bb.0: ; %entry
6731 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6732 ; GFX11-NEXT: s_mov_b32 s0, s33
6733 ; GFX11-NEXT: s_mov_b32 s33, s32
6734 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6735 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6736 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6737 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6738 ; GFX11-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, 8
6739 ; GFX11-NEXT: v_dual_mov_b32 v2, 9 :: v_dual_mov_b32 v3, 10
6740 ; GFX11-NEXT: v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v5, 12
6741 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6742 ; GFX11-NEXT: v_dual_mov_b32 v4, 11 :: v_dual_mov_b32 v7, 14
6743 ; GFX11-NEXT: v_mov_b32_e32 v6, 13
6744 ; GFX11-NEXT: s_add_i32 s0, s32, 32
6745 ; GFX11-NEXT: s_add_i32 s1, s32, 16
6746 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6747 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
6748 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
6749 ; GFX11-NEXT: scratch_store_b32 off, v8, s0
6750 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1
6751 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, 0
6752 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 1
6753 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 1
6754 ; GFX11-NEXT: v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v9, 1
6755 ; GFX11-NEXT: v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v11, 2
6756 ; GFX11-NEXT: v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v13, 2
6757 ; GFX11-NEXT: v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v15, 3
6758 ; GFX11-NEXT: v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v17, 3
6759 ; GFX11-NEXT: v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v19, 3
6760 ; GFX11-NEXT: v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v21, 4
6761 ; GFX11-NEXT: v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v23, 4
6762 ; GFX11-NEXT: v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v25, 5
6763 ; GFX11-NEXT: v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v27, 5
6764 ; GFX11-NEXT: v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v29, 5
6765 ; GFX11-NEXT: v_mov_b32_e32 v28, 5
6766 ; GFX11-NEXT: v_mov_b32_e32 v30, 6
6767 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6768 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6769 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
6770 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
6771 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6772 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6773 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6774 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6775 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6776 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6777 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6778 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6779 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6780 ; GFX11-NEXT: s_mov_b32 s33, s0
6781 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6782 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6784 ; HSA-LABEL: stack_8xv5i32:
6785 ; HSA: ; %bb.0: ; %entry
6786 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6787 ; HSA-NEXT: s_mov_b32 s4, s33
6788 ; HSA-NEXT: s_mov_b32 s33, s32
6789 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1
6790 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6791 ; HSA-NEXT: s_mov_b64 exec, s[8:9]
6792 ; HSA-NEXT: s_addk_i32 s32, 0x400
6793 ; HSA-NEXT: v_mov_b32_e32 v0, 7
6794 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
6795 ; HSA-NEXT: v_mov_b32_e32 v0, 8
6796 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6797 ; HSA-NEXT: v_mov_b32_e32 v0, 9
6798 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6799 ; HSA-NEXT: v_mov_b32_e32 v0, 10
6800 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6801 ; HSA-NEXT: v_mov_b32_e32 v0, 11
6802 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6803 ; HSA-NEXT: v_mov_b32_e32 v0, 12
6804 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6805 ; HSA-NEXT: v_mov_b32_e32 v0, 13
6806 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6807 ; HSA-NEXT: v_mov_b32_e32 v0, 14
6808 ; HSA-NEXT: v_writelane_b32 v40, s4, 2
6809 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6810 ; HSA-NEXT: v_mov_b32_e32 v0, 15
6811 ; HSA-NEXT: v_writelane_b32 v40, s30, 0
6812 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6813 ; HSA-NEXT: v_mov_b32_e32 v0, 0
6814 ; HSA-NEXT: v_mov_b32_e32 v1, 0
6815 ; HSA-NEXT: v_mov_b32_e32 v2, 0
6816 ; HSA-NEXT: v_mov_b32_e32 v3, 0
6817 ; HSA-NEXT: v_mov_b32_e32 v4, 0
6818 ; HSA-NEXT: v_mov_b32_e32 v5, 1
6819 ; HSA-NEXT: v_mov_b32_e32 v6, 1
6820 ; HSA-NEXT: v_mov_b32_e32 v7, 1
6821 ; HSA-NEXT: v_mov_b32_e32 v8, 1
6822 ; HSA-NEXT: v_mov_b32_e32 v9, 1
6823 ; HSA-NEXT: v_mov_b32_e32 v10, 2
6824 ; HSA-NEXT: v_mov_b32_e32 v11, 2
6825 ; HSA-NEXT: v_mov_b32_e32 v12, 2
6826 ; HSA-NEXT: v_mov_b32_e32 v13, 2
6827 ; HSA-NEXT: v_mov_b32_e32 v14, 2
6828 ; HSA-NEXT: v_mov_b32_e32 v15, 3
6829 ; HSA-NEXT: v_mov_b32_e32 v16, 3
6830 ; HSA-NEXT: v_mov_b32_e32 v17, 3
6831 ; HSA-NEXT: v_mov_b32_e32 v18, 3
6832 ; HSA-NEXT: v_mov_b32_e32 v19, 3
6833 ; HSA-NEXT: v_mov_b32_e32 v20, 4
6834 ; HSA-NEXT: v_mov_b32_e32 v21, 4
6835 ; HSA-NEXT: v_mov_b32_e32 v22, 4
6836 ; HSA-NEXT: v_mov_b32_e32 v23, 4
6837 ; HSA-NEXT: v_mov_b32_e32 v24, 4
6838 ; HSA-NEXT: v_mov_b32_e32 v25, 5
6839 ; HSA-NEXT: v_mov_b32_e32 v26, 5
6840 ; HSA-NEXT: v_mov_b32_e32 v27, 5
6841 ; HSA-NEXT: v_mov_b32_e32 v28, 5
6842 ; HSA-NEXT: v_mov_b32_e32 v29, 5
6843 ; HSA-NEXT: v_mov_b32_e32 v30, 6
6844 ; HSA-NEXT: v_writelane_b32 v40, s31, 1
6845 ; HSA-NEXT: s_getpc_b64 s[4:5]
6846 ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6847 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6848 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5]
6849 ; HSA-NEXT: v_readlane_b32 s31, v40, 1
6850 ; HSA-NEXT: v_readlane_b32 s30, v40, 0
6851 ; HSA-NEXT: v_readlane_b32 s4, v40, 2
6852 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1
6853 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6854 ; HSA-NEXT: s_mov_b64 exec, s[6:7]
6855 ; HSA-NEXT: s_addk_i32 s32, 0xfc00
6856 ; HSA-NEXT: s_mov_b32 s33, s4
6857 ; HSA-NEXT: s_waitcnt vmcnt(0)
6858 ; HSA-NEXT: s_setpc_b64 s[30:31]
6860 call void @external_void_func_8xv5i32(
6861 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
6862 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
6863 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
6864 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
6865 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
6866 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
6867 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
6868 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
6872 define void @stack_8xv5f32() #0 {
6873 ; VI-LABEL: stack_8xv5f32:
6874 ; VI: ; %bb.0: ; %entry
6875 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6876 ; VI-NEXT: s_mov_b32 s4, s33
6877 ; VI-NEXT: s_mov_b32 s33, s32
6878 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1
6879 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6880 ; VI-NEXT: s_mov_b64 exec, s[8:9]
6881 ; VI-NEXT: s_addk_i32 s32, 0x400
6882 ; VI-NEXT: v_mov_b32_e32 v0, 0x40e00000
6883 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6884 ; VI-NEXT: v_mov_b32_e32 v0, 0x41000000
6885 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6886 ; VI-NEXT: v_mov_b32_e32 v0, 0x41100000
6887 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6888 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
6889 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6890 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000
6891 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6892 ; VI-NEXT: v_mov_b32_e32 v0, 0x41400000
6893 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6894 ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000
6895 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6896 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000
6897 ; VI-NEXT: v_writelane_b32 v40, s4, 2
6898 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6899 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000
6900 ; VI-NEXT: v_writelane_b32 v40, s30, 0
6901 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6902 ; VI-NEXT: v_mov_b32_e32 v0, 0
6903 ; VI-NEXT: v_mov_b32_e32 v1, 0
6904 ; VI-NEXT: v_mov_b32_e32 v2, 0
6905 ; VI-NEXT: v_mov_b32_e32 v3, 0
6906 ; VI-NEXT: v_mov_b32_e32 v4, 0
6907 ; VI-NEXT: v_mov_b32_e32 v5, 1.0
6908 ; VI-NEXT: v_mov_b32_e32 v6, 1.0
6909 ; VI-NEXT: v_mov_b32_e32 v7, 1.0
6910 ; VI-NEXT: v_mov_b32_e32 v8, 1.0
6911 ; VI-NEXT: v_mov_b32_e32 v9, 1.0
6912 ; VI-NEXT: v_mov_b32_e32 v10, 2.0
6913 ; VI-NEXT: v_mov_b32_e32 v11, 2.0
6914 ; VI-NEXT: v_mov_b32_e32 v12, 2.0
6915 ; VI-NEXT: v_mov_b32_e32 v13, 2.0
6916 ; VI-NEXT: v_mov_b32_e32 v14, 2.0
6917 ; VI-NEXT: v_mov_b32_e32 v15, 0x40400000
6918 ; VI-NEXT: v_mov_b32_e32 v16, 0x40400000
6919 ; VI-NEXT: v_mov_b32_e32 v17, 0x40400000
6920 ; VI-NEXT: v_mov_b32_e32 v18, 0x40400000
6921 ; VI-NEXT: v_mov_b32_e32 v19, 0x40400000
6922 ; VI-NEXT: v_mov_b32_e32 v20, 4.0
6923 ; VI-NEXT: v_mov_b32_e32 v21, 4.0
6924 ; VI-NEXT: v_mov_b32_e32 v22, 4.0
6925 ; VI-NEXT: v_mov_b32_e32 v23, 4.0
6926 ; VI-NEXT: v_mov_b32_e32 v24, 4.0
6927 ; VI-NEXT: v_mov_b32_e32 v25, 0x40a00000
6928 ; VI-NEXT: v_mov_b32_e32 v26, 0x40a00000
6929 ; VI-NEXT: v_mov_b32_e32 v27, 0x40a00000
6930 ; VI-NEXT: v_mov_b32_e32 v28, 0x40a00000
6931 ; VI-NEXT: v_mov_b32_e32 v29, 0x40a00000
6932 ; VI-NEXT: v_mov_b32_e32 v30, 0x40c00000
6933 ; VI-NEXT: v_writelane_b32 v40, s31, 1
6934 ; VI-NEXT: s_getpc_b64 s[4:5]
6935 ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
6936 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
6937 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
6938 ; VI-NEXT: v_readlane_b32 s31, v40, 1
6939 ; VI-NEXT: v_readlane_b32 s30, v40, 0
6940 ; VI-NEXT: v_readlane_b32 s4, v40, 2
6941 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1
6942 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6943 ; VI-NEXT: s_mov_b64 exec, s[6:7]
6944 ; VI-NEXT: s_addk_i32 s32, 0xfc00
6945 ; VI-NEXT: s_mov_b32 s33, s4
6946 ; VI-NEXT: s_waitcnt vmcnt(0)
6947 ; VI-NEXT: s_setpc_b64 s[30:31]
6949 ; CI-LABEL: stack_8xv5f32:
6950 ; CI: ; %bb.0: ; %entry
6951 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6952 ; CI-NEXT: s_mov_b32 s4, s33
6953 ; CI-NEXT: s_mov_b32 s33, s32
6954 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1
6955 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6956 ; CI-NEXT: s_mov_b64 exec, s[8:9]
6957 ; CI-NEXT: s_addk_i32 s32, 0x400
6958 ; CI-NEXT: v_mov_b32_e32 v0, 0x40e00000
6959 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32
6960 ; CI-NEXT: v_mov_b32_e32 v0, 0x41000000
6961 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
6962 ; CI-NEXT: v_mov_b32_e32 v0, 0x41100000
6963 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
6964 ; CI-NEXT: v_mov_b32_e32 v0, 0x41200000
6965 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
6966 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000
6967 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
6968 ; CI-NEXT: v_mov_b32_e32 v0, 0x41400000
6969 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
6970 ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000
6971 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
6972 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000
6973 ; CI-NEXT: v_writelane_b32 v40, s4, 2
6974 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
6975 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000
6976 ; CI-NEXT: v_writelane_b32 v40, s30, 0
6977 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
6978 ; CI-NEXT: v_mov_b32_e32 v0, 0
6979 ; CI-NEXT: v_mov_b32_e32 v1, 0
6980 ; CI-NEXT: v_mov_b32_e32 v2, 0
6981 ; CI-NEXT: v_mov_b32_e32 v3, 0
6982 ; CI-NEXT: v_mov_b32_e32 v4, 0
6983 ; CI-NEXT: v_mov_b32_e32 v5, 1.0
6984 ; CI-NEXT: v_mov_b32_e32 v6, 1.0
6985 ; CI-NEXT: v_mov_b32_e32 v7, 1.0
6986 ; CI-NEXT: v_mov_b32_e32 v8, 1.0
6987 ; CI-NEXT: v_mov_b32_e32 v9, 1.0
6988 ; CI-NEXT: v_mov_b32_e32 v10, 2.0
6989 ; CI-NEXT: v_mov_b32_e32 v11, 2.0
6990 ; CI-NEXT: v_mov_b32_e32 v12, 2.0
6991 ; CI-NEXT: v_mov_b32_e32 v13, 2.0
6992 ; CI-NEXT: v_mov_b32_e32 v14, 2.0
6993 ; CI-NEXT: v_mov_b32_e32 v15, 0x40400000
6994 ; CI-NEXT: v_mov_b32_e32 v16, 0x40400000
6995 ; CI-NEXT: v_mov_b32_e32 v17, 0x40400000
6996 ; CI-NEXT: v_mov_b32_e32 v18, 0x40400000
6997 ; CI-NEXT: v_mov_b32_e32 v19, 0x40400000
6998 ; CI-NEXT: v_mov_b32_e32 v20, 4.0
6999 ; CI-NEXT: v_mov_b32_e32 v21, 4.0
7000 ; CI-NEXT: v_mov_b32_e32 v22, 4.0
7001 ; CI-NEXT: v_mov_b32_e32 v23, 4.0
7002 ; CI-NEXT: v_mov_b32_e32 v24, 4.0
7003 ; CI-NEXT: v_mov_b32_e32 v25, 0x40a00000
7004 ; CI-NEXT: v_mov_b32_e32 v26, 0x40a00000
7005 ; CI-NEXT: v_mov_b32_e32 v27, 0x40a00000
7006 ; CI-NEXT: v_mov_b32_e32 v28, 0x40a00000
7007 ; CI-NEXT: v_mov_b32_e32 v29, 0x40a00000
7008 ; CI-NEXT: v_mov_b32_e32 v30, 0x40c00000
7009 ; CI-NEXT: v_writelane_b32 v40, s31, 1
7010 ; CI-NEXT: s_getpc_b64 s[4:5]
7011 ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
7012 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
7013 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
7014 ; CI-NEXT: v_readlane_b32 s31, v40, 1
7015 ; CI-NEXT: v_readlane_b32 s30, v40, 0
7016 ; CI-NEXT: v_readlane_b32 s4, v40, 2
7017 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
7018 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7019 ; CI-NEXT: s_mov_b64 exec, s[6:7]
7020 ; CI-NEXT: s_addk_i32 s32, 0xfc00
7021 ; CI-NEXT: s_mov_b32 s33, s4
7022 ; CI-NEXT: s_waitcnt vmcnt(0)
7023 ; CI-NEXT: s_setpc_b64 s[30:31]
7025 ; GFX9-LABEL: stack_8xv5f32:
7026 ; GFX9: ; %bb.0: ; %entry
7027 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7028 ; GFX9-NEXT: s_mov_b32 s4, s33
7029 ; GFX9-NEXT: s_mov_b32 s33, s32
7030 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
7031 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7032 ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
7033 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7034 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40e00000
7035 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
7036 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000
7037 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
7038 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000
7039 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
7040 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41200000
7041 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
7042 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000
7043 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
7044 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000
7045 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
7046 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000
7047 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
7048 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000
7049 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2
7050 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
7051 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000
7052 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7053 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
7054 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
7055 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
7056 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
7057 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
7058 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
7059 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0
7060 ; GFX9-NEXT: v_mov_b32_e32 v6, 1.0
7061 ; GFX9-NEXT: v_mov_b32_e32 v7, 1.0
7062 ; GFX9-NEXT: v_mov_b32_e32 v8, 1.0
7063 ; GFX9-NEXT: v_mov_b32_e32 v9, 1.0
7064 ; GFX9-NEXT: v_mov_b32_e32 v10, 2.0
7065 ; GFX9-NEXT: v_mov_b32_e32 v11, 2.0
7066 ; GFX9-NEXT: v_mov_b32_e32 v12, 2.0
7067 ; GFX9-NEXT: v_mov_b32_e32 v13, 2.0
7068 ; GFX9-NEXT: v_mov_b32_e32 v14, 2.0
7069 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40400000
7070 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40400000
7071 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40400000
7072 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40400000
7073 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40400000
7074 ; GFX9-NEXT: v_mov_b32_e32 v20, 4.0
7075 ; GFX9-NEXT: v_mov_b32_e32 v21, 4.0
7076 ; GFX9-NEXT: v_mov_b32_e32 v22, 4.0
7077 ; GFX9-NEXT: v_mov_b32_e32 v23, 4.0
7078 ; GFX9-NEXT: v_mov_b32_e32 v24, 4.0
7079 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x40a00000
7080 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x40a00000
7081 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x40a00000
7082 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000
7083 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000
7084 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000
7085 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7086 ; GFX9-NEXT: s_getpc_b64 s[4:5]
7087 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
7088 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
7089 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
7090 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7091 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7092 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
7093 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7094 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7095 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7096 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7097 ; GFX9-NEXT: s_mov_b32 s33, s4
7098 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7099 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7101 ; GFX11-LABEL: stack_8xv5f32:
7102 ; GFX11: ; %bb.0: ; %entry
7103 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7104 ; GFX11-NEXT: s_mov_b32 s0, s33
7105 ; GFX11-NEXT: s_mov_b32 s33, s32
7106 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7107 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7108 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7109 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7110 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40e00000
7111 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41000000
7112 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41100000
7113 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41200000
7114 ; GFX11-NEXT: v_mov_b32_e32 v8, 0x41700000
7115 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7116 ; GFX11-NEXT: v_mov_b32_e32 v4, 0x41300000
7117 ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41400000
7118 ; GFX11-NEXT: v_dual_mov_b32 v6, 0x41500000 :: v_dual_mov_b32 v9, 1.0
7119 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41600000
7120 ; GFX11-NEXT: s_add_i32 s0, s32, 32
7121 ; GFX11-NEXT: s_add_i32 s1, s32, 16
7122 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7123 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
7124 ; GFX11-NEXT: scratch_store_b32 off, v8, s0
7125 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1
7126 ; GFX11-NEXT: v_mov_b32_e32 v6, 1.0
7127 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
7128 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
7129 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0
7130 ; GFX11-NEXT: v_dual_mov_b32 v7, 1.0 :: v_dual_mov_b32 v8, 1.0
7131 ; GFX11-NEXT: v_dual_mov_b32 v11, 2.0 :: v_dual_mov_b32 v10, 2.0
7132 ; GFX11-NEXT: v_dual_mov_b32 v13, 2.0 :: v_dual_mov_b32 v12, 2.0
7133 ; GFX11-NEXT: v_dual_mov_b32 v15, 0x40400000 :: v_dual_mov_b32 v14, 2.0
7134 ; GFX11-NEXT: v_dual_mov_b32 v17, 0x40400000 :: v_dual_mov_b32 v16, 0x40400000
7135 ; GFX11-NEXT: v_dual_mov_b32 v19, 0x40400000 :: v_dual_mov_b32 v18, 0x40400000
7136 ; GFX11-NEXT: v_dual_mov_b32 v21, 4.0 :: v_dual_mov_b32 v20, 4.0
7137 ; GFX11-NEXT: v_dual_mov_b32 v23, 4.0 :: v_dual_mov_b32 v22, 4.0
7138 ; GFX11-NEXT: v_dual_mov_b32 v25, 0x40a00000 :: v_dual_mov_b32 v24, 4.0
7139 ; GFX11-NEXT: v_dual_mov_b32 v27, 0x40a00000 :: v_dual_mov_b32 v26, 0x40a00000
7140 ; GFX11-NEXT: v_dual_mov_b32 v29, 0x40a00000 :: v_dual_mov_b32 v28, 0x40a00000
7141 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x40c00000
7142 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7143 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7144 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
7145 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
7146 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
7147 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7148 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7149 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7150 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7151 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7152 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7153 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7154 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7155 ; GFX11-NEXT: s_mov_b32 s33, s0
7156 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7157 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7159 ; HSA-LABEL: stack_8xv5f32:
7160 ; HSA: ; %bb.0: ; %entry
7161 ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7162 ; HSA-NEXT: s_mov_b32 s4, s33
7163 ; HSA-NEXT: s_mov_b32 s33, s32
7164 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1
7165 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7166 ; HSA-NEXT: s_mov_b64 exec, s[8:9]
7167 ; HSA-NEXT: s_addk_i32 s32, 0x400
7168 ; HSA-NEXT: v_mov_b32_e32 v0, 0x40e00000
7169 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32
7170 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41000000
7171 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
7172 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41100000
7173 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
7174 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41200000
7175 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
7176 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000
7177 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
7178 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000
7179 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
7180 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000
7181 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
7182 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000
7183 ; HSA-NEXT: v_writelane_b32 v40, s4, 2
7184 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
7185 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000
7186 ; HSA-NEXT: v_writelane_b32 v40, s30, 0
7187 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32
7188 ; HSA-NEXT: v_mov_b32_e32 v0, 0
7189 ; HSA-NEXT: v_mov_b32_e32 v1, 0
7190 ; HSA-NEXT: v_mov_b32_e32 v2, 0
7191 ; HSA-NEXT: v_mov_b32_e32 v3, 0
7192 ; HSA-NEXT: v_mov_b32_e32 v4, 0
7193 ; HSA-NEXT: v_mov_b32_e32 v5, 1.0
7194 ; HSA-NEXT: v_mov_b32_e32 v6, 1.0
7195 ; HSA-NEXT: v_mov_b32_e32 v7, 1.0
7196 ; HSA-NEXT: v_mov_b32_e32 v8, 1.0
7197 ; HSA-NEXT: v_mov_b32_e32 v9, 1.0
7198 ; HSA-NEXT: v_mov_b32_e32 v10, 2.0
7199 ; HSA-NEXT: v_mov_b32_e32 v11, 2.0
7200 ; HSA-NEXT: v_mov_b32_e32 v12, 2.0
7201 ; HSA-NEXT: v_mov_b32_e32 v13, 2.0
7202 ; HSA-NEXT: v_mov_b32_e32 v14, 2.0
7203 ; HSA-NEXT: v_mov_b32_e32 v15, 0x40400000
7204 ; HSA-NEXT: v_mov_b32_e32 v16, 0x40400000
7205 ; HSA-NEXT: v_mov_b32_e32 v17, 0x40400000
7206 ; HSA-NEXT: v_mov_b32_e32 v18, 0x40400000
7207 ; HSA-NEXT: v_mov_b32_e32 v19, 0x40400000
7208 ; HSA-NEXT: v_mov_b32_e32 v20, 4.0
7209 ; HSA-NEXT: v_mov_b32_e32 v21, 4.0
7210 ; HSA-NEXT: v_mov_b32_e32 v22, 4.0
7211 ; HSA-NEXT: v_mov_b32_e32 v23, 4.0
7212 ; HSA-NEXT: v_mov_b32_e32 v24, 4.0
7213 ; HSA-NEXT: v_mov_b32_e32 v25, 0x40a00000
7214 ; HSA-NEXT: v_mov_b32_e32 v26, 0x40a00000
7215 ; HSA-NEXT: v_mov_b32_e32 v27, 0x40a00000
7216 ; HSA-NEXT: v_mov_b32_e32 v28, 0x40a00000
7217 ; HSA-NEXT: v_mov_b32_e32 v29, 0x40a00000
7218 ; HSA-NEXT: v_mov_b32_e32 v30, 0x40c00000
7219 ; HSA-NEXT: v_writelane_b32 v40, s31, 1
7220 ; HSA-NEXT: s_getpc_b64 s[4:5]
7221 ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
7222 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
7223 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5]
7224 ; HSA-NEXT: v_readlane_b32 s31, v40, 1
7225 ; HSA-NEXT: v_readlane_b32 s30, v40, 0
7226 ; HSA-NEXT: v_readlane_b32 s4, v40, 2
7227 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1
7228 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7229 ; HSA-NEXT: s_mov_b64 exec, s[6:7]
7230 ; HSA-NEXT: s_addk_i32 s32, 0xfc00
7231 ; HSA-NEXT: s_mov_b32 s33, s4
7232 ; HSA-NEXT: s_waitcnt vmcnt(0)
7233 ; HSA-NEXT: s_setpc_b64 s[30:31]
7235 call void @external_void_func_8xv5f32(
7236 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
7237 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
7238 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
7239 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
7240 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
7241 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
7242 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
7243 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
7247 declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
7248 declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
7249 declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
7250 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
7251 declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
7252 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
7253 declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
7254 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
7255 declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
7256 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
7258 attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
7259 attributes #1 = { nounwind readnone }
7260 attributes #2 = { nounwind noinline }