1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
4 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
5 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10-SCRATCH %s
7 declare hidden amdgpu_gfx void @external_void_func_i1(i1) #0
8 declare hidden amdgpu_gfx void @external_void_func_i1_signext(i1 signext) #0
9 declare hidden amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext) #0
11 declare hidden amdgpu_gfx void @external_void_func_i8(i8) #0
12 declare hidden amdgpu_gfx void @external_void_func_i8_signext(i8 signext) #0
13 declare hidden amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext) #0
14 declare hidden amdgpu_gfx void @external_void_func_v2i8(<2 x i8>) #0
15 declare hidden amdgpu_gfx void @external_void_func_v3i8(<3 x i8>) #0
16 declare hidden amdgpu_gfx void @external_void_func_v4i8(<4 x i8>) #0
17 declare hidden amdgpu_gfx void @external_void_func_v5i8(<5 x i8>) #0
18 declare hidden amdgpu_gfx void @external_void_func_v8i8(<8 x i8>) #0
19 declare hidden amdgpu_gfx void @external_void_func_v16i8(<8 x i8>) #0
20 declare hidden amdgpu_gfx void @external_void_func_v32i8(<32 x i8>) #0
22 declare hidden amdgpu_gfx i8 @external_void_func_i8_ret(i8) #0
23 declare hidden amdgpu_gfx <2 x i8> @external_void_func_v2i8_ret(<2 x i8>) #0
24 declare hidden amdgpu_gfx <3 x i8> @external_void_func_v3i8_ret(<3 x i8>) #0
25 declare hidden amdgpu_gfx <4 x i8> @external_void_func_v4i8_ret(<4 x i8>) #0
26 declare hidden amdgpu_gfx <5 x i8> @external_void_func_v5i8_ret(<5 x i8>) #0
27 declare hidden amdgpu_gfx <8 x i8> @external_void_func_v8i8_ret(<8 x i8>) #0
28 declare hidden amdgpu_gfx <32 x i8> @external_void_func_v32i8_ret(<32 x i8>) #0
30 declare hidden amdgpu_gfx void @external_void_func_i16(i16) #0
31 declare hidden amdgpu_gfx void @external_void_func_i16_signext(i16 signext) #0
32 declare hidden amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext) #0
34 declare hidden amdgpu_gfx void @external_void_func_i32(i32) #0
35 declare hidden amdgpu_gfx void @external_void_func_i64(i64) #0
36 declare hidden amdgpu_gfx void @external_void_func_v2i64(<2 x i64>) #0
37 declare hidden amdgpu_gfx void @external_void_func_v3i64(<3 x i64>) #0
38 declare hidden amdgpu_gfx void @external_void_func_v4i64(<4 x i64>) #0
40 declare hidden amdgpu_gfx void @external_void_func_f16(half) #0
41 declare hidden amdgpu_gfx void @external_void_func_f32(float) #0
42 declare hidden amdgpu_gfx void @external_void_func_f64(double) #0
43 declare hidden amdgpu_gfx void @external_void_func_v2f32(<2 x float>) #0
44 declare hidden amdgpu_gfx void @external_void_func_v2f64(<2 x double>) #0
45 declare hidden amdgpu_gfx void @external_void_func_v3f32(<3 x float>) #0
46 declare hidden amdgpu_gfx void @external_void_func_v3f64(<3 x double>) #0
47 declare hidden amdgpu_gfx void @external_void_func_v5f32(<5 x float>) #0
49 declare hidden amdgpu_gfx void @external_void_func_v2i16(<2 x i16>) #0
50 declare hidden amdgpu_gfx void @external_void_func_v2f16(<2 x half>) #0
51 declare hidden amdgpu_gfx void @external_void_func_v3i16(<3 x i16>) #0
52 declare hidden amdgpu_gfx void @external_void_func_v3f16(<3 x half>) #0
53 declare hidden amdgpu_gfx void @external_void_func_v4i16(<4 x i16>) #0
54 declare hidden amdgpu_gfx void @external_void_func_v4f16(<4 x half>) #0
56 declare hidden amdgpu_gfx void @external_void_func_v2i32(<2 x i32>) #0
57 declare hidden amdgpu_gfx void @external_void_func_v3i32(<3 x i32>) #0
58 declare hidden amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
59 declare hidden amdgpu_gfx void @external_void_func_v4i32(<4 x i32>) #0
60 declare hidden amdgpu_gfx void @external_void_func_v5i32(<5 x i32>) #0
61 declare hidden amdgpu_gfx void @external_void_func_v8i32(<8 x i32>) #0
62 declare hidden amdgpu_gfx void @external_void_func_v16i32(<16 x i32>) #0
63 declare hidden amdgpu_gfx void @external_void_func_v32i32(<32 x i32>) #0
64 declare hidden amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
66 declare hidden amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg) #0
67 declare hidden amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg) #0
68 declare hidden amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg) #0
69 declare hidden amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg) #0
70 declare hidden amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg) #0
71 declare hidden amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg) #0
72 declare hidden amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg) #0
73 declare hidden amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg) #0
75 declare hidden amdgpu_gfx void @external_void_func_f16_inreg(half inreg) #0
76 declare hidden amdgpu_gfx void @external_void_func_f32_inreg(float inreg) #0
77 declare hidden amdgpu_gfx void @external_void_func_f64_inreg(double inreg) #0
78 declare hidden amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg) #0
79 declare hidden amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg) #0
80 declare hidden amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg) #0
81 declare hidden amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg) #0
82 declare hidden amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg) #0
84 declare hidden amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg) #0
85 declare hidden amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
86 declare hidden amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg) #0
87 declare hidden amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
88 declare hidden amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg) #0
89 declare hidden amdgpu_gfx void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
91 declare hidden amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0
92 declare hidden amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg) #0
93 declare hidden amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg, i32 inreg) #0
94 declare hidden amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
95 declare hidden amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg) #0
96 declare hidden amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
97 declare hidden amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
98 declare hidden amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg) #0
99 declare hidden amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg, i32 inreg) #0
101 ; return value and argument
102 declare hidden amdgpu_gfx i32 @external_i32_func_i32(i32) #0
105 declare hidden amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 }) #0
106 declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
107 declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
109 define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 {
110 ; GFX9-LABEL: test_call_external_void_func_i1_imm:
112 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; GFX9-NEXT: s_mov_b32 s34, s33
114 ; GFX9-NEXT: s_mov_b32 s33, s32
115 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
116 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
117 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
118 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
119 ; GFX9-NEXT: s_addk_i32 s32, 0x400
120 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
121 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
122 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi
123 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo
124 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
125 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
126 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
127 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
128 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
129 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
130 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
131 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
132 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
133 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
134 ; GFX9-NEXT: s_mov_b32 s33, s34
135 ; GFX9-NEXT: s_waitcnt vmcnt(0)
136 ; GFX9-NEXT: s_setpc_b64 s[30:31]
138 ; GFX10-LABEL: test_call_external_void_func_i1_imm:
140 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX10-NEXT: s_mov_b32 s34, s33
142 ; GFX10-NEXT: s_mov_b32 s33, s32
143 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
144 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
145 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
146 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
147 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
148 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
149 ; GFX10-NEXT: s_addk_i32 s32, 0x200
150 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi
151 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo
152 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
153 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
154 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
155 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
156 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
157 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
158 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
159 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
160 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
161 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
162 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
163 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
164 ; GFX10-NEXT: s_mov_b32 s33, s34
165 ; GFX10-NEXT: s_waitcnt vmcnt(0)
166 ; GFX10-NEXT: s_setpc_b64 s[30:31]
168 ; GFX11-LABEL: test_call_external_void_func_i1_imm:
170 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX11-NEXT: s_mov_b32 s0, s33
172 ; GFX11-NEXT: s_mov_b32 s33, s32
173 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
174 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
175 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
176 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
177 ; GFX11-NEXT: v_mov_b32_e32 v0, 1
178 ; GFX11-NEXT: s_add_i32 s32, s32, 16
179 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi
180 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo
181 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
182 ; GFX11-NEXT: scratch_store_b8 off, v0, s32
183 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
184 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
185 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
186 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
187 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
188 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
189 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
190 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
191 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
192 ; GFX11-NEXT: s_add_i32 s32, s32, -16
193 ; GFX11-NEXT: s_mov_b32 s33, s0
194 ; GFX11-NEXT: s_waitcnt vmcnt(0)
195 ; GFX11-NEXT: s_setpc_b64 s[30:31]
197 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm:
198 ; GFX10-SCRATCH: ; %bb.0:
199 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
201 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
202 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
203 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
204 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
205 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
206 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
207 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
208 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
209 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi
210 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo
211 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
212 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
213 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
214 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
215 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
216 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
217 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
218 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
219 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
220 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
221 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
222 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
223 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
224 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
225 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
226 call amdgpu_gfx void @external_void_func_i1(i1 true)
230 define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 {
231 ; GFX9-LABEL: test_call_external_void_func_i1_signext:
233 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX9-NEXT: s_mov_b32 s34, s33
235 ; GFX9-NEXT: s_mov_b32 s33, s32
236 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
237 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
238 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
239 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
240 ; GFX9-NEXT: s_waitcnt vmcnt(0)
241 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
242 ; GFX9-NEXT: s_addk_i32 s32, 0x400
243 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
244 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi
245 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo
246 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
247 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
248 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
249 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
250 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
251 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
252 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
253 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
254 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
255 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
256 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
257 ; GFX9-NEXT: s_mov_b32 s33, s34
258 ; GFX9-NEXT: s_waitcnt vmcnt(0)
259 ; GFX9-NEXT: s_setpc_b64 s[30:31]
261 ; GFX10-LABEL: test_call_external_void_func_i1_signext:
263 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX10-NEXT: s_mov_b32 s34, s33
265 ; GFX10-NEXT: s_mov_b32 s33, s32
266 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
267 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
268 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
269 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
270 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
271 ; GFX10-NEXT: s_waitcnt vmcnt(0)
272 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
273 ; GFX10-NEXT: s_addk_i32 s32, 0x200
274 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi
275 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo
276 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
277 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
278 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
279 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
280 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
281 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
282 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
283 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
284 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
285 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
286 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
287 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
288 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
289 ; GFX10-NEXT: s_mov_b32 s33, s34
290 ; GFX10-NEXT: s_waitcnt vmcnt(0)
291 ; GFX10-NEXT: s_setpc_b64 s[30:31]
293 ; GFX11-LABEL: test_call_external_void_func_i1_signext:
295 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX11-NEXT: s_mov_b32 s0, s33
297 ; GFX11-NEXT: s_mov_b32 s33, s32
298 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
299 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
300 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
301 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc
302 ; GFX11-NEXT: s_waitcnt vmcnt(0)
303 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
304 ; GFX11-NEXT: s_add_i32 s32, s32, 16
305 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi
306 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo
307 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
308 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
309 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
310 ; GFX11-NEXT: scratch_store_b8 off, v0, s32
311 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
312 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
313 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
314 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
315 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
316 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
317 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
318 ; GFX11-NEXT: s_add_i32 s32, s32, -16
319 ; GFX11-NEXT: s_mov_b32 s33, s0
320 ; GFX11-NEXT: s_waitcnt vmcnt(0)
321 ; GFX11-NEXT: s_setpc_b64 s[30:31]
323 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext:
324 ; GFX10-SCRATCH: ; %bb.0:
325 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
327 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
328 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
329 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
330 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
331 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
332 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
333 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
334 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
335 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
336 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi
337 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo
338 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
339 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
340 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0
341 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
342 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
343 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
344 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
345 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
346 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
347 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
348 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
349 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
350 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
351 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
352 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
353 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
354 %var = load volatile i1, ptr addrspace(1) undef
355 call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var)
359 define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 {
360 ; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
362 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363 ; GFX9-NEXT: s_mov_b32 s34, s33
364 ; GFX9-NEXT: s_mov_b32 s33, s32
365 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
366 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
367 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
368 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
369 ; GFX9-NEXT: s_waitcnt vmcnt(0)
370 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
371 ; GFX9-NEXT: s_addk_i32 s32, 0x400
372 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
373 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi
374 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo
375 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
376 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
377 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
378 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
379 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
380 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
381 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
382 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
383 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
384 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
385 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
386 ; GFX9-NEXT: s_mov_b32 s33, s34
387 ; GFX9-NEXT: s_waitcnt vmcnt(0)
388 ; GFX9-NEXT: s_setpc_b64 s[30:31]
390 ; GFX10-LABEL: test_call_external_void_func_i1_zeroext:
392 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393 ; GFX10-NEXT: s_mov_b32 s34, s33
394 ; GFX10-NEXT: s_mov_b32 s33, s32
395 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
396 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
397 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
398 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
399 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
400 ; GFX10-NEXT: s_waitcnt vmcnt(0)
401 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
402 ; GFX10-NEXT: s_addk_i32 s32, 0x200
403 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi
404 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo
405 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
406 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
407 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
408 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
409 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
410 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
411 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
412 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
413 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
414 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
415 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
416 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
417 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
418 ; GFX10-NEXT: s_mov_b32 s33, s34
419 ; GFX10-NEXT: s_waitcnt vmcnt(0)
420 ; GFX10-NEXT: s_setpc_b64 s[30:31]
422 ; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
424 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GFX11-NEXT: s_mov_b32 s0, s33
426 ; GFX11-NEXT: s_mov_b32 s33, s32
427 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
428 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
429 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
430 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc
431 ; GFX11-NEXT: s_waitcnt vmcnt(0)
432 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
433 ; GFX11-NEXT: s_add_i32 s32, s32, 16
434 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi
435 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo
436 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
437 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
438 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
439 ; GFX11-NEXT: scratch_store_b8 off, v0, s32
440 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
441 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
442 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
443 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
444 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
445 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
446 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
447 ; GFX11-NEXT: s_add_i32 s32, s32, -16
448 ; GFX11-NEXT: s_mov_b32 s33, s0
449 ; GFX11-NEXT: s_waitcnt vmcnt(0)
450 ; GFX11-NEXT: s_setpc_b64 s[30:31]
452 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext:
453 ; GFX10-SCRATCH: ; %bb.0:
454 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
456 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
457 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
458 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
459 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
460 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
461 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
462 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
463 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
464 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
465 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi
466 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo
467 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
468 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
469 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0
470 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
471 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
472 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
473 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
474 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
475 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
476 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
477 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
478 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
479 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
480 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
481 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
482 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
483 %var = load volatile i1, ptr addrspace(1) undef
484 call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var)
488 define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 {
489 ; GFX9-LABEL: test_call_external_void_func_i8_imm:
491 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492 ; GFX9-NEXT: s_mov_b32 s34, s33
493 ; GFX9-NEXT: s_mov_b32 s33, s32
494 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
495 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
496 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
497 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
498 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
499 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi
500 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo
501 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
502 ; GFX9-NEXT: s_addk_i32 s32, 0x400
503 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
504 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
505 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
506 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
507 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
508 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
509 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
510 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
511 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
512 ; GFX9-NEXT: s_mov_b32 s33, s34
513 ; GFX9-NEXT: s_waitcnt vmcnt(0)
514 ; GFX9-NEXT: s_setpc_b64 s[30:31]
516 ; GFX10-LABEL: test_call_external_void_func_i8_imm:
518 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519 ; GFX10-NEXT: s_mov_b32 s34, s33
520 ; GFX10-NEXT: s_mov_b32 s33, s32
521 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
522 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
523 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
524 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
525 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
526 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
527 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi
528 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo
529 ; GFX10-NEXT: s_addk_i32 s32, 0x200
530 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
531 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
532 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
533 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
534 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
535 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
536 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
537 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
538 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
539 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
540 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
541 ; GFX10-NEXT: s_mov_b32 s33, s34
542 ; GFX10-NEXT: s_waitcnt vmcnt(0)
543 ; GFX10-NEXT: s_setpc_b64 s[30:31]
545 ; GFX11-LABEL: test_call_external_void_func_i8_imm:
547 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548 ; GFX11-NEXT: s_mov_b32 s0, s33
549 ; GFX11-NEXT: s_mov_b32 s33, s32
550 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
551 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
552 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
553 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
554 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
555 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi
556 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo
557 ; GFX11-NEXT: s_add_i32 s32, s32, 16
558 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
559 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
560 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
561 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
562 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
563 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
564 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
565 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
566 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
567 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
568 ; GFX11-NEXT: s_add_i32 s32, s32, -16
569 ; GFX11-NEXT: s_mov_b32 s33, s0
570 ; GFX11-NEXT: s_waitcnt vmcnt(0)
571 ; GFX11-NEXT: s_setpc_b64 s[30:31]
573 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm:
574 ; GFX10-SCRATCH: ; %bb.0:
575 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
577 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
578 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
579 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
580 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
581 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
582 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
583 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
584 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi
585 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo
586 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
587 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
588 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
589 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
590 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
591 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
592 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
593 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
594 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
595 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
596 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
597 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
598 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
599 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
600 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
601 call amdgpu_gfx void @external_void_func_i8(i8 123)
605 define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 {
606 ; GFX9-LABEL: test_call_external_void_func_i8_signext:
608 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609 ; GFX9-NEXT: s_mov_b32 s34, s33
610 ; GFX9-NEXT: s_mov_b32 s33, s32
611 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
612 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
613 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
614 ; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
615 ; GFX9-NEXT: s_waitcnt vmcnt(0)
616 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
617 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
618 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi
619 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo
620 ; GFX9-NEXT: s_addk_i32 s32, 0x400
621 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
622 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
623 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
624 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
625 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
626 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
627 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
628 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
629 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
630 ; GFX9-NEXT: s_mov_b32 s33, s34
631 ; GFX9-NEXT: s_waitcnt vmcnt(0)
632 ; GFX9-NEXT: s_setpc_b64 s[30:31]
634 ; GFX10-LABEL: test_call_external_void_func_i8_signext:
636 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637 ; GFX10-NEXT: s_mov_b32 s34, s33
638 ; GFX10-NEXT: s_mov_b32 s33, s32
639 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
640 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
641 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
642 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
643 ; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc
644 ; GFX10-NEXT: s_waitcnt vmcnt(0)
645 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
646 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi
647 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo
648 ; GFX10-NEXT: s_addk_i32 s32, 0x200
649 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
650 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
651 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
652 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
653 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
654 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
655 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
656 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
657 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
658 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
659 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
660 ; GFX10-NEXT: s_mov_b32 s33, s34
661 ; GFX10-NEXT: s_waitcnt vmcnt(0)
662 ; GFX10-NEXT: s_setpc_b64 s[30:31]
664 ; GFX11-LABEL: test_call_external_void_func_i8_signext:
666 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667 ; GFX11-NEXT: s_mov_b32 s0, s33
668 ; GFX11-NEXT: s_mov_b32 s33, s32
669 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
670 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
671 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
672 ; GFX11-NEXT: global_load_i8 v0, v[0:1], off glc dlc
673 ; GFX11-NEXT: s_waitcnt vmcnt(0)
674 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
675 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi
676 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo
677 ; GFX11-NEXT: s_add_i32 s32, s32, 16
678 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
679 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
680 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
681 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
682 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
683 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
684 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
685 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
686 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
687 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
688 ; GFX11-NEXT: s_add_i32 s32, s32, -16
689 ; GFX11-NEXT: s_mov_b32 s33, s0
690 ; GFX11-NEXT: s_waitcnt vmcnt(0)
691 ; GFX11-NEXT: s_setpc_b64 s[30:31]
693 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext:
694 ; GFX10-SCRATCH: ; %bb.0:
695 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
697 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
698 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
699 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
700 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
701 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
702 ; GFX10-SCRATCH-NEXT: global_load_sbyte v0, v[0:1], off glc dlc
703 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
704 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
705 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi
706 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo
707 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
708 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
709 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
710 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
711 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
712 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
713 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
714 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
715 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
716 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
717 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
718 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
719 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
720 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
721 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
722 %var = load volatile i8, ptr addrspace(1) undef
723 call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var)
727 define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 {
728 ; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
730 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731 ; GFX9-NEXT: s_mov_b32 s34, s33
732 ; GFX9-NEXT: s_mov_b32 s33, s32
733 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
734 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
735 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
736 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
737 ; GFX9-NEXT: s_waitcnt vmcnt(0)
738 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
739 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
740 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi
741 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo
742 ; GFX9-NEXT: s_addk_i32 s32, 0x400
743 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
744 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
745 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
746 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
747 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
748 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
749 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
750 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
751 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
752 ; GFX9-NEXT: s_mov_b32 s33, s34
753 ; GFX9-NEXT: s_waitcnt vmcnt(0)
754 ; GFX9-NEXT: s_setpc_b64 s[30:31]
756 ; GFX10-LABEL: test_call_external_void_func_i8_zeroext:
758 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759 ; GFX10-NEXT: s_mov_b32 s34, s33
760 ; GFX10-NEXT: s_mov_b32 s33, s32
761 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
762 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
763 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
764 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
765 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
766 ; GFX10-NEXT: s_waitcnt vmcnt(0)
767 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
768 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi
769 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo
770 ; GFX10-NEXT: s_addk_i32 s32, 0x200
771 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
772 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
773 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
774 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
775 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
776 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
777 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
778 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
779 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
780 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
781 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
782 ; GFX10-NEXT: s_mov_b32 s33, s34
783 ; GFX10-NEXT: s_waitcnt vmcnt(0)
784 ; GFX10-NEXT: s_setpc_b64 s[30:31]
786 ; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
788 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
789 ; GFX11-NEXT: s_mov_b32 s0, s33
790 ; GFX11-NEXT: s_mov_b32 s33, s32
791 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
792 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
793 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
794 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc
795 ; GFX11-NEXT: s_waitcnt vmcnt(0)
796 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
797 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi
798 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo
799 ; GFX11-NEXT: s_add_i32 s32, s32, 16
800 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
801 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
802 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
803 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
804 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
805 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
806 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
807 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
808 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
809 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
810 ; GFX11-NEXT: s_add_i32 s32, s32, -16
811 ; GFX11-NEXT: s_mov_b32 s33, s0
812 ; GFX11-NEXT: s_waitcnt vmcnt(0)
813 ; GFX11-NEXT: s_setpc_b64 s[30:31]
815 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext:
816 ; GFX10-SCRATCH: ; %bb.0:
817 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
818 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
819 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
820 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
821 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
822 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
823 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
824 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
825 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
826 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
827 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi
828 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo
829 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
830 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
831 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
832 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
833 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
834 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
835 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
836 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
837 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
838 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
839 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
840 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
841 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
842 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
843 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
844 %var = load volatile i8, ptr addrspace(1) undef
845 call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var)
849 define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 {
850 ; GFX9-LABEL: test_call_external_void_func_i16_imm:
852 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
853 ; GFX9-NEXT: s_mov_b32 s34, s33
854 ; GFX9-NEXT: s_mov_b32 s33, s32
855 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
856 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
857 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
858 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
859 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
860 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi
861 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo
862 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
863 ; GFX9-NEXT: s_addk_i32 s32, 0x400
864 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
865 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
866 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
867 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
868 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
869 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
870 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
871 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
872 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
873 ; GFX9-NEXT: s_mov_b32 s33, s34
874 ; GFX9-NEXT: s_waitcnt vmcnt(0)
875 ; GFX9-NEXT: s_setpc_b64 s[30:31]
877 ; GFX10-LABEL: test_call_external_void_func_i16_imm:
879 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
880 ; GFX10-NEXT: s_mov_b32 s34, s33
881 ; GFX10-NEXT: s_mov_b32 s33, s32
882 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
883 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
884 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
885 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
886 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
887 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
888 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi
889 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo
890 ; GFX10-NEXT: s_addk_i32 s32, 0x200
891 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
892 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
893 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
894 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
895 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
896 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
897 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
898 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
899 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
900 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
901 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
902 ; GFX10-NEXT: s_mov_b32 s33, s34
903 ; GFX10-NEXT: s_waitcnt vmcnt(0)
904 ; GFX10-NEXT: s_setpc_b64 s[30:31]
906 ; GFX11-LABEL: test_call_external_void_func_i16_imm:
908 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909 ; GFX11-NEXT: s_mov_b32 s0, s33
910 ; GFX11-NEXT: s_mov_b32 s33, s32
911 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
912 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
913 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
914 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
915 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
916 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi
917 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo
918 ; GFX11-NEXT: s_add_i32 s32, s32, 16
919 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
920 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
921 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
922 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
923 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
924 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
925 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
926 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
927 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
928 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
929 ; GFX11-NEXT: s_add_i32 s32, s32, -16
930 ; GFX11-NEXT: s_mov_b32 s33, s0
931 ; GFX11-NEXT: s_waitcnt vmcnt(0)
932 ; GFX11-NEXT: s_setpc_b64 s[30:31]
934 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm:
935 ; GFX10-SCRATCH: ; %bb.0:
936 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
938 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
939 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
940 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
941 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
942 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
943 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
944 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
945 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi
946 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo
947 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
948 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
949 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
950 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
951 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
952 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
953 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
954 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
955 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
956 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
957 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
958 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
959 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
960 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
961 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
962 call amdgpu_gfx void @external_void_func_i16(i16 123)
966 define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 {
967 ; GFX9-LABEL: test_call_external_void_func_i16_signext:
969 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970 ; GFX9-NEXT: s_mov_b32 s34, s33
971 ; GFX9-NEXT: s_mov_b32 s33, s32
972 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
973 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
974 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
975 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
976 ; GFX9-NEXT: s_waitcnt vmcnt(0)
977 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
978 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
979 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi
980 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo
981 ; GFX9-NEXT: s_addk_i32 s32, 0x400
982 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
983 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
984 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
985 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
986 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
987 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
988 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
989 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
990 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
991 ; GFX9-NEXT: s_mov_b32 s33, s34
992 ; GFX9-NEXT: s_waitcnt vmcnt(0)
993 ; GFX9-NEXT: s_setpc_b64 s[30:31]
995 ; GFX10-LABEL: test_call_external_void_func_i16_signext:
997 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998 ; GFX10-NEXT: s_mov_b32 s34, s33
999 ; GFX10-NEXT: s_mov_b32 s33, s32
1000 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1001 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1002 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1003 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1004 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc
1005 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1006 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1007 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi
1008 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo
1009 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1010 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1011 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1012 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1013 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1014 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1015 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1016 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1017 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1018 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1019 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1020 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1021 ; GFX10-NEXT: s_mov_b32 s33, s34
1022 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1023 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1025 ; GFX11-LABEL: test_call_external_void_func_i16_signext:
1027 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1028 ; GFX11-NEXT: s_mov_b32 s0, s33
1029 ; GFX11-NEXT: s_mov_b32 s33, s32
1030 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1031 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1032 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1033 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc
1034 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1035 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1036 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi
1037 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo
1038 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1039 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1040 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1041 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1042 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1043 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1044 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1045 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1046 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1047 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1048 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1049 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1050 ; GFX11-NEXT: s_mov_b32 s33, s0
1051 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1052 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1054 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext:
1055 ; GFX10-SCRATCH: ; %bb.0:
1056 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1058 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1059 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1060 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1061 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1062 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1063 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc
1064 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1065 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1066 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi
1067 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo
1068 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1069 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1070 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1071 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1072 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1073 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1074 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1075 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1076 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1077 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1078 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1079 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1080 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1081 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1082 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1083 %var = load volatile i16, ptr addrspace(1) undef
1084 call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var)
1088 define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 {
1089 ; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
1091 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092 ; GFX9-NEXT: s_mov_b32 s34, s33
1093 ; GFX9-NEXT: s_mov_b32 s33, s32
1094 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1095 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1096 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1097 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
1098 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1099 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1100 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1101 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi
1102 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo
1103 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1104 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1105 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1106 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1107 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1108 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1109 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1110 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1111 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1112 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1113 ; GFX9-NEXT: s_mov_b32 s33, s34
1114 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1115 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1117 ; GFX10-LABEL: test_call_external_void_func_i16_zeroext:
1119 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1120 ; GFX10-NEXT: s_mov_b32 s34, s33
1121 ; GFX10-NEXT: s_mov_b32 s33, s32
1122 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1123 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1124 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1125 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1126 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc
1127 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1128 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1129 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi
1130 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo
1131 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1132 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1133 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1134 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1135 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1136 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1137 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1138 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1139 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1140 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1141 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1142 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1143 ; GFX10-NEXT: s_mov_b32 s33, s34
1144 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1145 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1147 ; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
1149 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150 ; GFX11-NEXT: s_mov_b32 s0, s33
1151 ; GFX11-NEXT: s_mov_b32 s33, s32
1152 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1153 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1154 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1155 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc
1156 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1157 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1158 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi
1159 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo
1160 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1161 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1162 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1163 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1164 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1165 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1166 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1167 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1168 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1169 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1170 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1171 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1172 ; GFX11-NEXT: s_mov_b32 s33, s0
1173 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1174 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1176 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext:
1177 ; GFX10-SCRATCH: ; %bb.0:
1178 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1179 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1180 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1181 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1182 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1183 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1184 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1185 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc
1186 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1187 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1188 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi
1189 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo
1190 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1191 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1192 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1193 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1194 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1195 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1196 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1197 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1198 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1199 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1200 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1201 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1202 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1203 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1204 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1205 %var = load volatile i16, ptr addrspace(1) undef
1206 call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var)
1210 define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 {
1211 ; GFX9-LABEL: test_call_external_void_func_i32_imm:
1213 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1214 ; GFX9-NEXT: s_mov_b32 s34, s33
1215 ; GFX9-NEXT: s_mov_b32 s33, s32
1216 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1217 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1218 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1219 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1220 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1221 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi
1222 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo
1223 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
1224 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1225 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1226 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1227 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1228 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1229 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1230 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1231 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1232 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1233 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1234 ; GFX9-NEXT: s_mov_b32 s33, s34
1235 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1236 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1238 ; GFX10-LABEL: test_call_external_void_func_i32_imm:
1240 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241 ; GFX10-NEXT: s_mov_b32 s34, s33
1242 ; GFX10-NEXT: s_mov_b32 s33, s32
1243 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1244 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1245 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1246 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1247 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1248 ; GFX10-NEXT: v_mov_b32_e32 v0, 42
1249 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi
1250 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo
1251 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1252 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1253 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1254 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1255 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1256 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1257 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1258 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1259 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1260 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1261 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1262 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1263 ; GFX10-NEXT: s_mov_b32 s33, s34
1264 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1265 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1267 ; GFX11-LABEL: test_call_external_void_func_i32_imm:
1269 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1270 ; GFX11-NEXT: s_mov_b32 s0, s33
1271 ; GFX11-NEXT: s_mov_b32 s33, s32
1272 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1273 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1274 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1275 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1276 ; GFX11-NEXT: v_mov_b32_e32 v0, 42
1277 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi
1278 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo
1279 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1280 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1281 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1282 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1283 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1284 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1285 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1286 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1287 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1288 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1289 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1290 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1291 ; GFX11-NEXT: s_mov_b32 s33, s0
1292 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1293 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1295 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm:
1296 ; GFX10-SCRATCH: ; %bb.0:
1297 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1299 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1300 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1301 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1302 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1303 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1304 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1305 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42
1306 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi
1307 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo
1308 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1309 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1310 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1311 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1312 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1313 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1314 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1315 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1316 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1317 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1318 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1319 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1320 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1321 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1322 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1323 call amdgpu_gfx void @external_void_func_i32(i32 42)
1327 define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 {
1328 ; GFX9-LABEL: test_call_external_void_func_i64_imm:
1330 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331 ; GFX9-NEXT: s_mov_b32 s34, s33
1332 ; GFX9-NEXT: s_mov_b32 s33, s32
1333 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1334 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1335 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1336 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1337 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1338 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi
1339 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo
1340 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
1341 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1342 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1343 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1344 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1345 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1346 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1347 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1348 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1349 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1350 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1351 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1352 ; GFX9-NEXT: s_mov_b32 s33, s34
1353 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1354 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1356 ; GFX10-LABEL: test_call_external_void_func_i64_imm:
1358 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359 ; GFX10-NEXT: s_mov_b32 s34, s33
1360 ; GFX10-NEXT: s_mov_b32 s33, s32
1361 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1362 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1363 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1364 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1365 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1366 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
1367 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1368 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi
1369 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo
1370 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1371 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1372 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1373 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1374 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1375 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1376 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1377 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1378 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1379 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1380 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1381 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1382 ; GFX10-NEXT: s_mov_b32 s33, s34
1383 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1384 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1386 ; GFX11-LABEL: test_call_external_void_func_i64_imm:
1388 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389 ; GFX11-NEXT: s_mov_b32 s0, s33
1390 ; GFX11-NEXT: s_mov_b32 s33, s32
1391 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1392 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1393 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1394 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1395 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
1396 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi
1397 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo
1398 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1399 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1400 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1401 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1402 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1403 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1404 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1405 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1406 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1407 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1408 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1409 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1410 ; GFX11-NEXT: s_mov_b32 s33, s0
1411 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1412 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1414 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm:
1415 ; GFX10-SCRATCH: ; %bb.0:
1416 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1418 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1419 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1420 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1421 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1422 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1423 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1424 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
1425 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1426 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi
1427 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo
1428 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1429 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1430 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1431 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1432 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1433 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1434 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1435 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1436 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1437 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1438 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1439 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1440 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1441 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1442 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1443 call amdgpu_gfx void @external_void_func_i64(i64 123)
1447 define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 {
1448 ; GFX9-LABEL: test_call_external_void_func_v2i64:
1450 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1451 ; GFX9-NEXT: s_mov_b32 s34, s33
1452 ; GFX9-NEXT: s_mov_b32 s33, s32
1453 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1454 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1455 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1456 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1457 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1458 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1459 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1460 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1461 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi
1462 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo
1463 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1464 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1465 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1466 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1467 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1468 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1469 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1470 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1471 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1472 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1473 ; GFX9-NEXT: s_mov_b32 s33, s34
1474 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1475 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1477 ; GFX10-LABEL: test_call_external_void_func_v2i64:
1479 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1480 ; GFX10-NEXT: s_mov_b32 s34, s33
1481 ; GFX10-NEXT: s_mov_b32 s33, s32
1482 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1483 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1484 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1485 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1486 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1487 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1488 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1489 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi
1490 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo
1491 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1492 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1493 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1494 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1495 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1496 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1497 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1498 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1499 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1500 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1501 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1502 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1503 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1504 ; GFX10-NEXT: s_mov_b32 s33, s34
1505 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1506 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1508 ; GFX11-LABEL: test_call_external_void_func_v2i64:
1510 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1511 ; GFX11-NEXT: s_mov_b32 s0, s33
1512 ; GFX11-NEXT: s_mov_b32 s33, s32
1513 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1514 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1515 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1516 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1517 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
1518 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1519 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi
1520 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo
1521 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1522 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1523 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1524 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1525 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1526 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1527 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1528 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1529 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1530 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1531 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1532 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1533 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1534 ; GFX11-NEXT: s_mov_b32 s33, s0
1535 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1536 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1538 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64:
1539 ; GFX10-SCRATCH: ; %bb.0:
1540 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1541 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1542 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1543 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1544 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1545 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1546 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1547 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
1548 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1549 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1550 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi
1551 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo
1552 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1553 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1554 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1555 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1556 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1557 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1558 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1559 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1560 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1561 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1562 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1563 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1564 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1565 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1566 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1567 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1568 %val = load <2 x i64>, ptr addrspace(1) null
1569 call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> %val)
1573 define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 {
1574 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
1576 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577 ; GFX9-NEXT: s_mov_b32 s34, s33
1578 ; GFX9-NEXT: s_mov_b32 s33, s32
1579 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1580 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1581 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1582 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1583 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1584 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi
1585 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo
1586 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
1587 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
1588 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
1589 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
1590 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1591 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1592 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1593 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1594 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1595 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1596 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1597 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1598 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1599 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1600 ; GFX9-NEXT: s_mov_b32 s33, s34
1601 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1602 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1604 ; GFX10-LABEL: test_call_external_void_func_v2i64_imm:
1606 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1607 ; GFX10-NEXT: s_mov_b32 s34, s33
1608 ; GFX10-NEXT: s_mov_b32 s33, s32
1609 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1610 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1611 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1612 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1613 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1614 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
1615 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
1616 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
1617 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
1618 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1619 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi
1620 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo
1621 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1622 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1623 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1624 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1625 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1626 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1627 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1628 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1629 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1630 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1631 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1632 ; GFX10-NEXT: s_mov_b32 s33, s34
1633 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1634 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1636 ; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
1638 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1639 ; GFX11-NEXT: s_mov_b32 s0, s33
1640 ; GFX11-NEXT: s_mov_b32 s33, s32
1641 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1642 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1643 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1644 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1645 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
1646 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
1647 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1648 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi
1649 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo
1650 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1651 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1652 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1653 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1654 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1655 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1656 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1657 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1658 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1659 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1660 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1661 ; GFX11-NEXT: s_mov_b32 s33, s0
1662 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1663 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1665 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm:
1666 ; GFX10-SCRATCH: ; %bb.0:
1667 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1669 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1670 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1671 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1672 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1673 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1674 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1675 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
1676 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
1677 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
1678 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
1679 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1680 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi
1681 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo
1682 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1683 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1684 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1685 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1686 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1687 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1688 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1689 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1690 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1691 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1692 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1693 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1694 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1695 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1696 call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
1700 define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
1701 ; GFX9-LABEL: test_call_external_void_func_v3i64:
1703 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1704 ; GFX9-NEXT: s_mov_b32 s34, s33
1705 ; GFX9-NEXT: s_mov_b32 s33, s32
1706 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1707 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1708 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1709 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1710 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1711 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1712 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1713 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1714 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi
1715 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo
1716 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1717 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1718 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1719 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1720 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1721 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1722 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1723 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1724 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1725 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1726 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1727 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1728 ; GFX9-NEXT: s_mov_b32 s33, s34
1729 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1730 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1732 ; GFX10-LABEL: test_call_external_void_func_v3i64:
1734 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1735 ; GFX10-NEXT: s_mov_b32 s34, s33
1736 ; GFX10-NEXT: s_mov_b32 s33, s32
1737 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1738 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1739 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1740 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1741 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1742 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1743 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1744 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
1745 ; GFX10-NEXT: v_mov_b32_e32 v5, 2
1746 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi
1747 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1748 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1749 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo
1750 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1751 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1752 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1753 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1754 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1755 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1756 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1757 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1758 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1759 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1760 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1761 ; GFX10-NEXT: s_mov_b32 s33, s34
1762 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1763 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1765 ; GFX11-LABEL: test_call_external_void_func_v3i64:
1767 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1768 ; GFX11-NEXT: s_mov_b32 s0, s33
1769 ; GFX11-NEXT: s_mov_b32 s33, s32
1770 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1771 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1772 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1773 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2
1774 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1
1775 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1776 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi
1777 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo
1778 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1779 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1780 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1781 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1782 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1783 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1784 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1785 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1786 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1787 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1788 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1789 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1790 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1791 ; GFX11-NEXT: s_mov_b32 s33, s0
1792 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1793 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1795 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64:
1796 ; GFX10-SCRATCH: ; %bb.0:
1797 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1798 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1799 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1800 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1801 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1802 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1803 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1804 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
1805 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1806 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1807 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
1808 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2
1809 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi
1810 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1811 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1812 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo
1813 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1814 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1815 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1816 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1817 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1818 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1819 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1820 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1821 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1822 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1823 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1824 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1825 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1826 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1827 %load = load <2 x i64>, ptr addrspace(1) null
1828 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
1830 call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val)
1834 define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 {
1835 ; GFX9-LABEL: test_call_external_void_func_v4i64:
1837 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838 ; GFX9-NEXT: s_mov_b32 s34, s33
1839 ; GFX9-NEXT: s_mov_b32 s33, s32
1840 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1841 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1842 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1843 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1844 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1845 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1846 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1847 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1848 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi
1849 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo
1850 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1851 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1852 ; GFX9-NEXT: v_mov_b32_e32 v6, 3
1853 ; GFX9-NEXT: v_mov_b32_e32 v7, 4
1854 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1855 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1856 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1857 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1858 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1859 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1860 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1861 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1862 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1863 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1864 ; GFX9-NEXT: s_mov_b32 s33, s34
1865 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1866 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1868 ; GFX10-LABEL: test_call_external_void_func_v4i64:
1870 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1871 ; GFX10-NEXT: s_mov_b32 s34, s33
1872 ; GFX10-NEXT: s_mov_b32 s33, s32
1873 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1874 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1875 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1876 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1877 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1878 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1879 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1880 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
1881 ; GFX10-NEXT: v_mov_b32_e32 v5, 2
1882 ; GFX10-NEXT: v_mov_b32_e32 v6, 3
1883 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1884 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1885 ; GFX10-NEXT: v_mov_b32_e32 v7, 4
1886 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi
1887 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo
1888 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1889 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1890 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1891 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1892 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1893 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1894 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1895 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1896 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1897 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1898 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1899 ; GFX10-NEXT: s_mov_b32 s33, s34
1900 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1901 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1903 ; GFX11-LABEL: test_call_external_void_func_v4i64:
1905 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1906 ; GFX11-NEXT: s_mov_b32 s0, s33
1907 ; GFX11-NEXT: s_mov_b32 s33, s32
1908 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1909 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1910 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1911 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2
1912 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1
1913 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1914 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4
1915 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1916 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi
1917 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1918 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo
1919 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1920 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1921 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1922 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1923 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1924 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1925 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1926 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1927 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1928 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1929 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1930 ; GFX11-NEXT: s_mov_b32 s33, s0
1931 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1932 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1934 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64:
1935 ; GFX10-SCRATCH: ; %bb.0:
1936 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1937 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1938 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1939 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1940 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1941 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1942 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1943 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
1944 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1945 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1946 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
1947 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2
1948 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 3
1949 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1950 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1951 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4
1952 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi
1953 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo
1954 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1955 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1956 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1957 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1958 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1959 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1960 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1961 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1962 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1963 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1964 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1965 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1966 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1967 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1968 %load = load <2 x i64>, ptr addrspace(1) null
1969 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1970 call amdgpu_gfx void @external_void_func_v4i64(<4 x i64> %val)
1974 define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 {
1975 ; GFX9-LABEL: test_call_external_void_func_f16_imm:
1977 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1978 ; GFX9-NEXT: s_mov_b32 s34, s33
1979 ; GFX9-NEXT: s_mov_b32 s33, s32
1980 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1981 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1982 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1983 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1984 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1985 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi
1986 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo
1987 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
1988 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1989 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1990 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1991 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1992 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1993 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1994 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1995 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1996 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1997 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1998 ; GFX9-NEXT: s_mov_b32 s33, s34
1999 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2000 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2002 ; GFX10-LABEL: test_call_external_void_func_f16_imm:
2004 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2005 ; GFX10-NEXT: s_mov_b32 s34, s33
2006 ; GFX10-NEXT: s_mov_b32 s33, s32
2007 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2008 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2009 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2010 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2011 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2012 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400
2013 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi
2014 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo
2015 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2016 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2017 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2018 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2019 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2020 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2021 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2022 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2023 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2024 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2025 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2026 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2027 ; GFX10-NEXT: s_mov_b32 s33, s34
2028 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2029 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2031 ; GFX11-LABEL: test_call_external_void_func_f16_imm:
2033 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2034 ; GFX11-NEXT: s_mov_b32 s0, s33
2035 ; GFX11-NEXT: s_mov_b32 s33, s32
2036 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2037 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2038 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2039 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2040 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400
2041 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi
2042 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo
2043 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2044 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2045 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2046 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2047 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2048 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2049 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2050 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2051 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2052 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2053 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2054 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2055 ; GFX11-NEXT: s_mov_b32 s33, s0
2056 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2057 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2059 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm:
2060 ; GFX10-SCRATCH: ; %bb.0:
2061 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2062 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2063 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2064 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2065 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2066 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2067 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2068 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2069 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400
2070 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi
2071 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo
2072 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2073 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2074 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2075 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2076 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2077 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2078 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2079 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2080 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2081 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2082 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2083 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2084 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2085 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2086 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2087 call amdgpu_gfx void @external_void_func_f16(half 4.0)
2091 define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 {
2092 ; GFX9-LABEL: test_call_external_void_func_f32_imm:
2094 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2095 ; GFX9-NEXT: s_mov_b32 s34, s33
2096 ; GFX9-NEXT: s_mov_b32 s33, s32
2097 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2098 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2099 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2100 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2101 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2102 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi
2103 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo
2104 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
2105 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2106 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2107 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2108 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2109 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2110 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2111 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2112 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2113 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2114 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2115 ; GFX9-NEXT: s_mov_b32 s33, s34
2116 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2117 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2119 ; GFX10-LABEL: test_call_external_void_func_f32_imm:
2121 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2122 ; GFX10-NEXT: s_mov_b32 s34, s33
2123 ; GFX10-NEXT: s_mov_b32 s33, s32
2124 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2125 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2126 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2127 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2128 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2129 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0
2130 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi
2131 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo
2132 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2133 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2134 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2135 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2136 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2137 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2138 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2139 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2140 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2141 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2142 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2143 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2144 ; GFX10-NEXT: s_mov_b32 s33, s34
2145 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2146 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2148 ; GFX11-LABEL: test_call_external_void_func_f32_imm:
2150 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2151 ; GFX11-NEXT: s_mov_b32 s0, s33
2152 ; GFX11-NEXT: s_mov_b32 s33, s32
2153 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2154 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2155 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2156 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2157 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
2158 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi
2159 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo
2160 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2161 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2162 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2163 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2164 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2165 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2166 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2167 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2168 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2169 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2170 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2171 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2172 ; GFX11-NEXT: s_mov_b32 s33, s0
2173 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2174 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2176 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm:
2177 ; GFX10-SCRATCH: ; %bb.0:
2178 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2179 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2180 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2181 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2182 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2183 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2184 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2185 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2186 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0
2187 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi
2188 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo
2189 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2190 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2191 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2192 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2193 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2194 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2195 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2196 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2197 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2198 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2199 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2200 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2201 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2202 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2203 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2204 call amdgpu_gfx void @external_void_func_f32(float 4.0)
2208 define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 {
2209 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
2211 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2212 ; GFX9-NEXT: s_mov_b32 s34, s33
2213 ; GFX9-NEXT: s_mov_b32 s33, s32
2214 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2215 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2216 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2217 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2218 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2219 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi
2220 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo
2221 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
2222 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2223 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2224 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2225 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2226 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2227 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2228 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2229 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2230 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2231 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2232 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2233 ; GFX9-NEXT: s_mov_b32 s33, s34
2234 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2235 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2237 ; GFX10-LABEL: test_call_external_void_func_v2f32_imm:
2239 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2240 ; GFX10-NEXT: s_mov_b32 s34, s33
2241 ; GFX10-NEXT: s_mov_b32 s33, s32
2242 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2243 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2244 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2245 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2246 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2247 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
2248 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2249 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi
2250 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo
2251 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2252 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2253 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2254 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2255 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2256 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2257 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2258 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2259 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2260 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2261 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2262 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2263 ; GFX10-NEXT: s_mov_b32 s33, s34
2264 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2265 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2267 ; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
2269 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2270 ; GFX11-NEXT: s_mov_b32 s0, s33
2271 ; GFX11-NEXT: s_mov_b32 s33, s32
2272 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2273 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2274 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2275 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2276 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2277 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi
2278 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo
2279 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2280 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2281 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2282 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2283 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2284 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2285 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2286 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2287 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2288 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2289 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2290 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2291 ; GFX11-NEXT: s_mov_b32 s33, s0
2292 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2293 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2295 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm:
2296 ; GFX10-SCRATCH: ; %bb.0:
2297 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2298 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2299 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2300 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2301 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2302 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2303 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2304 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2305 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
2306 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2307 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi
2308 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo
2309 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2310 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2311 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2312 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2313 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2314 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2315 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2316 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2317 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2318 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2319 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2320 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2321 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2322 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2323 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2324 call amdgpu_gfx void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
2328 define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 {
2329 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
2331 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2332 ; GFX9-NEXT: s_mov_b32 s34, s33
2333 ; GFX9-NEXT: s_mov_b32 s33, s32
2334 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2335 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2336 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2337 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2338 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2339 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi
2340 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo
2341 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
2342 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2343 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
2344 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2345 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2346 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2347 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2348 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2349 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2350 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2351 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2352 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2353 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2354 ; GFX9-NEXT: s_mov_b32 s33, s34
2355 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2356 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2358 ; GFX10-LABEL: test_call_external_void_func_v3f32_imm:
2360 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2361 ; GFX10-NEXT: s_mov_b32 s34, s33
2362 ; GFX10-NEXT: s_mov_b32 s33, s32
2363 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2364 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2365 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2366 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2367 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2368 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
2369 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2370 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0
2371 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi
2372 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2373 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo
2374 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2375 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2376 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2377 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2378 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2379 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2380 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2381 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2382 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2383 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2384 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2385 ; GFX10-NEXT: s_mov_b32 s33, s34
2386 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2387 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2389 ; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
2391 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392 ; GFX11-NEXT: s_mov_b32 s0, s33
2393 ; GFX11-NEXT: s_mov_b32 s33, s32
2394 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2395 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2396 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2397 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2398 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2399 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0
2400 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi
2401 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2402 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo
2403 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2404 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2405 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2406 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2407 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2408 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2409 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2410 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2411 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2412 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2413 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2414 ; GFX11-NEXT: s_mov_b32 s33, s0
2415 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2416 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2418 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm:
2419 ; GFX10-SCRATCH: ; %bb.0:
2420 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2421 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2422 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2423 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2424 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2425 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2426 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2427 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2428 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
2429 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2430 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0
2431 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi
2432 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2433 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo
2434 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2435 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2436 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2437 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2438 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2439 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2440 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2441 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2442 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2443 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2444 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2445 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2446 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2447 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2448 call amdgpu_gfx void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
2452 define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 {
2453 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
2455 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456 ; GFX9-NEXT: s_mov_b32 s34, s33
2457 ; GFX9-NEXT: s_mov_b32 s33, s32
2458 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2459 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2460 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2461 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2462 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2463 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi
2464 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo
2465 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
2466 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2467 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
2468 ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0
2469 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5
2470 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2471 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2472 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2473 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2474 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2475 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2476 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2477 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2478 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2479 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2480 ; GFX9-NEXT: s_mov_b32 s33, s34
2481 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2482 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2484 ; GFX10-LABEL: test_call_external_void_func_v5f32_imm:
2486 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2487 ; GFX10-NEXT: s_mov_b32 s34, s33
2488 ; GFX10-NEXT: s_mov_b32 s33, s32
2489 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2490 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2491 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2492 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2493 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2494 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
2495 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2496 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0
2497 ; GFX10-NEXT: v_mov_b32_e32 v3, -1.0
2498 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2499 ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5
2500 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi
2501 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo
2502 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2503 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2504 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2505 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2506 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2507 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2508 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2509 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2510 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2511 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2512 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2513 ; GFX10-NEXT: s_mov_b32 s33, s34
2514 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2515 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2517 ; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
2519 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520 ; GFX11-NEXT: s_mov_b32 s0, s33
2521 ; GFX11-NEXT: s_mov_b32 s33, s32
2522 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2523 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2524 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2525 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2526 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2527 ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0
2528 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2529 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5
2530 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi
2531 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo
2532 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2533 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2534 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2535 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2536 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2537 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2538 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2539 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2540 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2541 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2542 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2543 ; GFX11-NEXT: s_mov_b32 s33, s0
2544 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2545 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2547 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm:
2548 ; GFX10-SCRATCH: ; %bb.0:
2549 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2550 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2551 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2552 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2553 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2554 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2555 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2556 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2557 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
2558 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2559 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0
2560 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, -1.0
2561 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2562 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5
2563 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi
2564 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo
2565 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2566 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2567 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2568 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2569 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2570 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2571 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2572 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2573 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2574 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2575 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2576 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2577 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2578 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2579 call amdgpu_gfx void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
2583 define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 {
2584 ; GFX9-LABEL: test_call_external_void_func_f64_imm:
2586 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2587 ; GFX9-NEXT: s_mov_b32 s34, s33
2588 ; GFX9-NEXT: s_mov_b32 s33, s32
2589 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2590 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2591 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2592 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2593 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2594 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi
2595 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo
2596 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2597 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
2598 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2599 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2600 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2601 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2602 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2603 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2604 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2605 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2606 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2607 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2608 ; GFX9-NEXT: s_mov_b32 s33, s34
2609 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2610 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2612 ; GFX10-LABEL: test_call_external_void_func_f64_imm:
2614 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2615 ; GFX10-NEXT: s_mov_b32 s34, s33
2616 ; GFX10-NEXT: s_mov_b32 s33, s32
2617 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2618 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2619 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2620 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2621 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2622 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2623 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000
2624 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi
2625 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo
2626 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2627 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2628 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2629 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2630 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2631 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2632 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2633 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2634 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2635 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2636 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2637 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2638 ; GFX10-NEXT: s_mov_b32 s33, s34
2639 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2640 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2642 ; GFX11-LABEL: test_call_external_void_func_f64_imm:
2644 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2645 ; GFX11-NEXT: s_mov_b32 s0, s33
2646 ; GFX11-NEXT: s_mov_b32 s33, s32
2647 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2648 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2649 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2650 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2651 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000
2652 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi
2653 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo
2654 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2655 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2656 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2657 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2658 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2659 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2660 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2661 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2662 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2663 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2664 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2665 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2666 ; GFX11-NEXT: s_mov_b32 s33, s0
2667 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2668 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2670 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm:
2671 ; GFX10-SCRATCH: ; %bb.0:
2672 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2673 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2674 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2675 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2676 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2677 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2678 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2679 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2680 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2681 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000
2682 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi
2683 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo
2684 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2685 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2686 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2687 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2688 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2689 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2690 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2691 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2692 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2693 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2694 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2695 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2696 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2697 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2698 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2699 call amdgpu_gfx void @external_void_func_f64(double 4.0)
2703 define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 {
2704 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
2706 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2707 ; GFX9-NEXT: s_mov_b32 s34, s33
2708 ; GFX9-NEXT: s_mov_b32 s33, s32
2709 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2710 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2711 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2712 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2713 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2714 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi
2715 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo
2716 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2717 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2718 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2719 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2720 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2721 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2722 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2723 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2724 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2725 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2726 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2727 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2728 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2729 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2730 ; GFX9-NEXT: s_mov_b32 s33, s34
2731 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2732 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2734 ; GFX10-LABEL: test_call_external_void_func_v2f64_imm:
2736 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2737 ; GFX10-NEXT: s_mov_b32 s34, s33
2738 ; GFX10-NEXT: s_mov_b32 s33, s32
2739 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2740 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2741 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2742 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2743 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2744 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2745 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2746 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
2747 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000
2748 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2749 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi
2750 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo
2751 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2752 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2753 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2754 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2755 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2756 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2757 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2758 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2759 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2760 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2761 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2762 ; GFX10-NEXT: s_mov_b32 s33, s34
2763 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2764 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2766 ; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
2768 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2769 ; GFX11-NEXT: s_mov_b32 s0, s33
2770 ; GFX11-NEXT: s_mov_b32 s33, s32
2771 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2772 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2773 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2774 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2775 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2776 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2777 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2778 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi
2779 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo
2780 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2781 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2782 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2783 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2784 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2785 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2786 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2787 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2788 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2789 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2790 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2791 ; GFX11-NEXT: s_mov_b32 s33, s0
2792 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2793 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2795 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm:
2796 ; GFX10-SCRATCH: ; %bb.0:
2797 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2798 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2799 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2800 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2801 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2802 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2803 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2804 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2805 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2806 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2807 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
2808 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000
2809 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2810 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi
2811 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo
2812 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2813 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2814 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2815 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2816 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2817 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2818 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2819 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2820 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2821 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2822 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2823 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2824 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2825 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2826 call amdgpu_gfx void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
2830 define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 {
2831 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
2833 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2834 ; GFX9-NEXT: s_mov_b32 s34, s33
2835 ; GFX9-NEXT: s_mov_b32 s33, s32
2836 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2837 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2838 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2839 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2840 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2841 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi
2842 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f64@abs32@lo
2843 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2844 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2845 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2846 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2847 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
2848 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000
2849 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2850 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2851 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2852 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2853 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2854 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2855 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2856 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2857 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2858 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2859 ; GFX9-NEXT: s_mov_b32 s33, s34
2860 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2861 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2863 ; GFX10-LABEL: test_call_external_void_func_v3f64_imm:
2865 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2866 ; GFX10-NEXT: s_mov_b32 s34, s33
2867 ; GFX10-NEXT: s_mov_b32 s33, s32
2868 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2869 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2870 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2871 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2872 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2873 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2874 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2875 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
2876 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000
2877 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2878 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
2879 ; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000
2880 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi
2881 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64@abs32@lo
2882 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2883 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2884 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2885 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2886 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2887 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2888 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2889 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2890 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2891 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2892 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2893 ; GFX10-NEXT: s_mov_b32 s33, s34
2894 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2895 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2897 ; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
2899 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2900 ; GFX11-NEXT: s_mov_b32 s0, s33
2901 ; GFX11-NEXT: s_mov_b32 s33, s32
2902 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2903 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2904 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2905 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2906 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2907 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2908 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2909 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000
2910 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi
2911 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo
2912 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2913 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2914 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2915 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2916 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2917 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2918 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2919 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2920 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2921 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2922 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2923 ; GFX11-NEXT: s_mov_b32 s33, s0
2924 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2925 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2927 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm:
2928 ; GFX10-SCRATCH: ; %bb.0:
2929 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2930 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2931 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2932 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2933 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2934 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2935 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2936 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2937 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2938 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2939 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
2940 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000
2941 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2942 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
2943 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000
2944 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi
2945 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo
2946 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2947 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2948 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2949 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2950 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2951 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2952 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2953 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2954 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2955 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2956 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2957 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2958 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2959 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2960 call amdgpu_gfx void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
2964 define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 {
2965 ; GFX9-LABEL: test_call_external_void_func_v2i8:
2967 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2968 ; GFX9-NEXT: s_mov_b32 s34, s33
2969 ; GFX9-NEXT: s_mov_b32 s33, s32
2970 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2971 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2972 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2973 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2974 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
2975 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off
2976 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2977 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2978 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi
2979 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo
2980 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2981 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2982 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2983 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 8, v0
2984 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2985 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2986 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2987 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2988 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2989 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2990 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2991 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2992 ; GFX9-NEXT: s_mov_b32 s33, s34
2993 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2994 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2996 ; GFX10-LABEL: test_call_external_void_func_v2i8:
2998 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2999 ; GFX10-NEXT: s_mov_b32 s34, s33
3000 ; GFX10-NEXT: s_mov_b32 s33, s32
3001 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3002 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3003 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3004 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3005 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3006 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3007 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3008 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi
3009 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo
3010 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3011 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off
3012 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3013 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3014 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3015 ; GFX10-NEXT: v_lshrrev_b16 v1, 8, v0
3016 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3017 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3018 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3019 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3020 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3021 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3022 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3023 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3024 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3025 ; GFX10-NEXT: s_mov_b32 s33, s34
3026 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3027 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3029 ; GFX11-LABEL: test_call_external_void_func_v2i8:
3031 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3032 ; GFX11-NEXT: s_mov_b32 s0, s33
3033 ; GFX11-NEXT: s_mov_b32 s33, s32
3034 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3035 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3036 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3037 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3038 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3039 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3040 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi
3041 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo
3042 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3043 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off
3044 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3045 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3046 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3047 ; GFX11-NEXT: v_lshrrev_b16 v1, 8, v0
3048 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3049 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
3050 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3051 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3052 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3053 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3054 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3055 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3056 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3057 ; GFX11-NEXT: s_mov_b32 s33, s0
3058 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3059 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3061 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i8:
3062 ; GFX10-SCRATCH: ; %bb.0:
3063 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3064 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3065 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3066 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3067 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3068 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3069 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3070 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3071 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3072 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3073 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi
3074 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo
3075 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3076 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off
3077 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3078 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3079 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3080 ; GFX10-SCRATCH-NEXT: v_lshrrev_b16 v1, 8, v0
3081 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3082 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3083 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3084 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3085 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3086 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3087 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3088 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3089 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3090 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3091 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3092 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3093 %val = load <2 x i8>, ptr addrspace(1) null
3094 call amdgpu_gfx void @external_void_func_v2i8(<2 x i8> %val)
3098 define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 {
3099 ; GFX9-LABEL: test_call_external_void_func_v3i8:
3101 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3102 ; GFX9-NEXT: s_mov_b32 s34, s33
3103 ; GFX9-NEXT: s_mov_b32 s33, s32
3104 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3105 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3106 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3107 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3108 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3109 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
3110 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3111 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3112 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi
3113 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo
3114 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3115 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3116 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3117 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3118 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3119 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3120 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3121 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3122 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3123 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3124 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3125 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3126 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3127 ; GFX9-NEXT: s_mov_b32 s33, s34
3128 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3129 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3131 ; GFX10-LABEL: test_call_external_void_func_v3i8:
3133 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3134 ; GFX10-NEXT: s_mov_b32 s34, s33
3135 ; GFX10-NEXT: s_mov_b32 s33, s32
3136 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3137 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3138 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3139 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3140 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3141 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3142 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3143 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi
3144 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo
3145 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3146 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
3147 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3148 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3149 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3150 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3151 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3152 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3153 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3154 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3155 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3156 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3157 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3158 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3159 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3160 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3161 ; GFX10-NEXT: s_mov_b32 s33, s34
3162 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3163 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3165 ; GFX11-LABEL: test_call_external_void_func_v3i8:
3167 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3168 ; GFX11-NEXT: s_mov_b32 s0, s33
3169 ; GFX11-NEXT: s_mov_b32 s33, s32
3170 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3171 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3172 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3173 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3174 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3175 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3176 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi
3177 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo
3178 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3179 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
3180 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3181 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3182 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3183 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3184 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3185 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3186 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3187 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3188 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3189 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3190 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3191 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3192 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3193 ; GFX11-NEXT: s_mov_b32 s33, s0
3194 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3195 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3197 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i8:
3198 ; GFX10-SCRATCH: ; %bb.0:
3199 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3200 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3201 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3202 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3203 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3204 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3205 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3206 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3207 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3208 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3209 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi
3210 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo
3211 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3212 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
3213 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3214 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3215 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3216 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3217 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3218 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3219 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3220 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3221 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3222 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3223 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3224 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3225 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3226 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3227 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3228 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3229 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3230 %val = load <3 x i8>, ptr addrspace(1) null
3231 call amdgpu_gfx void @external_void_func_v3i8(<3 x i8> %val)
3235 define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 {
3236 ; GFX9-LABEL: test_call_external_void_func_v4i8:
3238 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3239 ; GFX9-NEXT: s_mov_b32 s34, s33
3240 ; GFX9-NEXT: s_mov_b32 s33, s32
3241 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3242 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3243 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3244 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3245 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3246 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
3247 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3248 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3249 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi
3250 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo
3251 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3252 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3253 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3254 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3255 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3256 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3257 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3258 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3259 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3260 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3261 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3262 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3263 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3264 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3265 ; GFX9-NEXT: s_mov_b32 s33, s34
3266 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3267 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3269 ; GFX10-LABEL: test_call_external_void_func_v4i8:
3271 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3272 ; GFX10-NEXT: s_mov_b32 s34, s33
3273 ; GFX10-NEXT: s_mov_b32 s33, s32
3274 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3275 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3276 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3277 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3278 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3279 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3280 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3281 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi
3282 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo
3283 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3284 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
3285 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3286 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3287 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3288 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3289 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3290 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3291 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3292 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3293 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3294 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3295 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3296 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3297 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3298 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3299 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3300 ; GFX10-NEXT: s_mov_b32 s33, s34
3301 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3302 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3304 ; GFX11-LABEL: test_call_external_void_func_v4i8:
3306 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3307 ; GFX11-NEXT: s_mov_b32 s0, s33
3308 ; GFX11-NEXT: s_mov_b32 s33, s32
3309 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3310 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3311 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3312 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3313 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3314 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3315 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi
3316 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo
3317 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3318 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
3319 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3320 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3321 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3322 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3323 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3324 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3325 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3326 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3327 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3328 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3329 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3330 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3331 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3332 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3333 ; GFX11-NEXT: s_mov_b32 s33, s0
3334 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3335 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3337 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i8:
3338 ; GFX10-SCRATCH: ; %bb.0:
3339 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3340 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3341 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3342 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3343 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3344 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3345 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3346 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3347 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3348 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3349 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi
3350 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo
3351 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3352 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
3353 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3354 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3355 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3356 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3357 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3358 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3359 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3360 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3361 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3362 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3363 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3364 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3365 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3366 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3367 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3368 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3369 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3370 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3371 %val = load <4 x i8>, ptr addrspace(1) null
3372 call amdgpu_gfx void @external_void_func_v4i8(<4 x i8> %val)
3376 define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 {
3377 ; GFX9-LABEL: test_call_external_void_func_v5i8:
3379 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3380 ; GFX9-NEXT: s_mov_b32 s34, s33
3381 ; GFX9-NEXT: s_mov_b32 s33, s32
3382 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3383 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3384 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3385 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3386 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3387 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
3388 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3389 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3390 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi
3391 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo
3392 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3393 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3394 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3395 ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
3396 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5
3397 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v5
3398 ; GFX9-NEXT: v_mov_b32_e32 v0, v5
3399 ; GFX9-NEXT: v_mov_b32_e32 v4, v6
3400 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3401 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3402 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3403 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3404 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3405 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3406 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3407 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3408 ; GFX9-NEXT: s_mov_b32 s33, s34
3409 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3410 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3412 ; GFX10-LABEL: test_call_external_void_func_v5i8:
3414 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3415 ; GFX10-NEXT: s_mov_b32 s34, s33
3416 ; GFX10-NEXT: s_mov_b32 s33, s32
3417 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3418 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3419 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3420 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3421 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3422 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3423 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3424 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi
3425 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo
3426 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3427 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
3428 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3429 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3430 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3431 ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
3432 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v5
3433 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v5
3434 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
3435 ; GFX10-NEXT: v_mov_b32_e32 v4, v6
3436 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3437 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3438 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3439 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3440 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3441 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3442 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3443 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3444 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3445 ; GFX10-NEXT: s_mov_b32 s33, s34
3446 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3447 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3449 ; GFX11-LABEL: test_call_external_void_func_v5i8:
3451 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3452 ; GFX11-NEXT: s_mov_b32 s0, s33
3453 ; GFX11-NEXT: s_mov_b32 s33, s32
3454 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3455 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3456 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3457 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3458 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3459 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3460 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi
3461 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo
3462 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3463 ; GFX11-NEXT: global_load_b64 v[5:6], v[0:1], off
3464 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3465 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3466 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3467 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
3468 ; GFX11-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
3469 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v5
3470 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v5
3471 ; GFX11-NEXT: v_mov_b32_e32 v4, v6
3472 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3473 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3474 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3475 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3476 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3477 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3478 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3479 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3480 ; GFX11-NEXT: s_mov_b32 s33, s0
3481 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3482 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3484 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i8:
3485 ; GFX10-SCRATCH: ; %bb.0:
3486 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3487 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3488 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3489 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3490 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3491 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3492 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3493 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3494 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3495 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3496 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi
3497 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo
3498 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3499 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
3500 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3501 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3502 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3503 ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
3504 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v5
3505 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v5
3506 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, v5
3507 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v6
3508 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3509 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3510 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3511 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3512 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3513 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3514 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3515 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3516 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3517 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3518 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3519 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3520 %val = load <5 x i8>, ptr addrspace(1) null
3521 call amdgpu_gfx void @external_void_func_v5i8(<5 x i8> %val)
3525 define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 {
3526 ; GFX9-LABEL: test_call_external_void_func_v8i8:
3528 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3529 ; GFX9-NEXT: s_mov_b32 s34, s33
3530 ; GFX9-NEXT: s_mov_b32 s33, s32
3531 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3532 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3533 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3534 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3535 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3536 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
3537 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3538 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3539 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi
3540 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo
3541 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3542 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3543 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3544 ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0
3545 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3546 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3547 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3548 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3549 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3550 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
3551 ; GFX9-NEXT: v_mov_b32_e32 v1, v8
3552 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3553 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3554 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3555 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3556 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3557 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3558 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3559 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3560 ; GFX9-NEXT: s_mov_b32 s33, s34
3561 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3562 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3564 ; GFX10-LABEL: test_call_external_void_func_v8i8:
3566 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3567 ; GFX10-NEXT: s_mov_b32 s34, s33
3568 ; GFX10-NEXT: s_mov_b32 s33, s32
3569 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3570 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3571 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3572 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3573 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3574 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3575 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3576 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi
3577 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo
3578 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3579 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
3580 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3581 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3582 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3583 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0
3584 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3585 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3586 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3587 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3588 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3589 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
3590 ; GFX10-NEXT: v_mov_b32_e32 v1, v8
3591 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3592 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3593 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3594 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3595 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3596 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3597 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3598 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3599 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3600 ; GFX10-NEXT: s_mov_b32 s33, s34
3601 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3602 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3604 ; GFX11-LABEL: test_call_external_void_func_v8i8:
3606 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3607 ; GFX11-NEXT: s_mov_b32 s0, s33
3608 ; GFX11-NEXT: s_mov_b32 s33, s32
3609 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3610 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3611 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3612 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3613 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3614 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3615 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi
3616 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo
3617 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3618 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
3619 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3620 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3621 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3622 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 8, v0
3623 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3624 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3625 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3626 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3627 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3628 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v8
3629 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3630 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3631 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3632 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3633 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3634 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3635 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3636 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3637 ; GFX11-NEXT: s_mov_b32 s33, s0
3638 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3639 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3641 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i8:
3642 ; GFX10-SCRATCH: ; %bb.0:
3643 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3644 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3645 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3646 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3647 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3648 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3649 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3650 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3651 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3652 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3653 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi
3654 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo
3655 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3656 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
3657 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3658 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3659 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3660 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0
3661 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3662 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3663 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3664 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3665 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3666 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
3667 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v8
3668 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3669 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3670 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3671 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3672 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3673 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3674 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3675 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3676 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3677 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3678 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3679 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3680 %val = load <8 x i8>, ptr addrspace(1) null
3681 call amdgpu_gfx void @external_void_func_v8i8(<8 x i8> %val)
3685 define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 {
3686 ; GFX9-LABEL: test_call_external_void_func_v32i8:
3688 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3689 ; GFX9-NEXT: s_mov_b32 s34, s33
3690 ; GFX9-NEXT: s_mov_b32 s33, s32
3691 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3692 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3693 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3694 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3695 ; GFX9-NEXT: v_mov_b32_e32 v4, 16
3696 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3697 ; GFX9-NEXT: v_mov_b32_e32 v5, 0
3698 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
3699 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3700 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off
3701 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3702 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi
3703 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo
3704 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3705 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3706 ; GFX9-NEXT: s_waitcnt vmcnt(1)
3707 ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0
3708 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0
3709 ; GFX9-NEXT: v_lshrrev_b32_e32 v37, 24, v0
3710 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3711 ; GFX9-NEXT: v_lshrrev_b32_e32 v32, 8, v16
3712 ; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v16
3713 ; GFX9-NEXT: v_lshrrev_b32_e32 v34, 24, v16
3714 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3715 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3716 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3717 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
3718 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
3719 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
3720 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
3721 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
3722 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
3723 ; GFX9-NEXT: v_lshrrev_b32_e32 v21, 8, v17
3724 ; GFX9-NEXT: v_lshrrev_b32_e32 v22, 16, v17
3725 ; GFX9-NEXT: v_lshrrev_b32_e32 v23, 24, v17
3726 ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 8, v18
3727 ; GFX9-NEXT: v_lshrrev_b32_e32 v26, 16, v18
3728 ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 24, v18
3729 ; GFX9-NEXT: v_lshrrev_b32_e32 v29, 8, v19
3730 ; GFX9-NEXT: v_lshrrev_b32_e32 v30, 16, v19
3731 ; GFX9-NEXT: v_lshrrev_b32_e32 v31, 24, v19
3732 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
3733 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
3734 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
3735 ; GFX9-NEXT: v_mov_b32_e32 v20, v17
3736 ; GFX9-NEXT: v_mov_b32_e32 v24, v18
3737 ; GFX9-NEXT: v_mov_b32_e32 v28, v19
3738 ; GFX9-NEXT: v_mov_b32_e32 v1, v35
3739 ; GFX9-NEXT: v_mov_b32_e32 v2, v36
3740 ; GFX9-NEXT: v_mov_b32_e32 v3, v37
3741 ; GFX9-NEXT: v_mov_b32_e32 v17, v32
3742 ; GFX9-NEXT: v_mov_b32_e32 v18, v33
3743 ; GFX9-NEXT: v_mov_b32_e32 v19, v34
3744 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3745 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3746 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3747 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3748 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3749 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3750 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3751 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3752 ; GFX9-NEXT: s_mov_b32 s33, s34
3753 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3754 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3756 ; GFX10-LABEL: test_call_external_void_func_v32i8:
3758 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3759 ; GFX10-NEXT: s_mov_b32 s34, s33
3760 ; GFX10-NEXT: s_mov_b32 s33, s32
3761 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3762 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3763 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3764 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3765 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3766 ; GFX10-NEXT: v_mov_b32_e32 v4, 16
3767 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3768 ; GFX10-NEXT: v_mov_b32_e32 v5, 0
3769 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3770 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi
3771 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo
3772 ; GFX10-NEXT: s_clause 0x1
3773 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
3774 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[4:5], off
3775 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3776 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3777 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3778 ; GFX10-NEXT: s_waitcnt vmcnt(1)
3779 ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0
3780 ; GFX10-NEXT: v_lshrrev_b32_e32 v36, 16, v0
3781 ; GFX10-NEXT: v_lshrrev_b32_e32 v37, 24, v0
3782 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3783 ; GFX10-NEXT: v_lshrrev_b32_e32 v32, 8, v16
3784 ; GFX10-NEXT: v_lshrrev_b32_e32 v33, 16, v16
3785 ; GFX10-NEXT: v_lshrrev_b32_e32 v34, 24, v16
3786 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3787 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3788 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3789 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2
3790 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
3791 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
3792 ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v3
3793 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v3
3794 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v3
3795 ; GFX10-NEXT: v_lshrrev_b32_e32 v21, 8, v17
3796 ; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v17
3797 ; GFX10-NEXT: v_lshrrev_b32_e32 v23, 24, v17
3798 ; GFX10-NEXT: v_lshrrev_b32_e32 v25, 8, v18
3799 ; GFX10-NEXT: v_lshrrev_b32_e32 v26, 16, v18
3800 ; GFX10-NEXT: v_lshrrev_b32_e32 v27, 24, v18
3801 ; GFX10-NEXT: v_lshrrev_b32_e32 v29, 8, v19
3802 ; GFX10-NEXT: v_lshrrev_b32_e32 v30, 16, v19
3803 ; GFX10-NEXT: v_lshrrev_b32_e32 v31, 24, v19
3804 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
3805 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
3806 ; GFX10-NEXT: v_mov_b32_e32 v12, v3
3807 ; GFX10-NEXT: v_mov_b32_e32 v20, v17
3808 ; GFX10-NEXT: v_mov_b32_e32 v24, v18
3809 ; GFX10-NEXT: v_mov_b32_e32 v28, v19
3810 ; GFX10-NEXT: v_mov_b32_e32 v1, v35
3811 ; GFX10-NEXT: v_mov_b32_e32 v2, v36
3812 ; GFX10-NEXT: v_mov_b32_e32 v3, v37
3813 ; GFX10-NEXT: v_mov_b32_e32 v17, v32
3814 ; GFX10-NEXT: v_mov_b32_e32 v18, v33
3815 ; GFX10-NEXT: v_mov_b32_e32 v19, v34
3816 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3817 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3818 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3819 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3820 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3821 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3822 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3823 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3824 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3825 ; GFX10-NEXT: s_mov_b32 s33, s34
3826 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3827 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3829 ; GFX11-LABEL: test_call_external_void_func_v32i8:
3831 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3832 ; GFX11-NEXT: s_mov_b32 s0, s33
3833 ; GFX11-NEXT: s_mov_b32 s33, s32
3834 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3835 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3836 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3837 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3838 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 16
3839 ; GFX11-NEXT: v_mov_b32_e32 v5, 0
3840 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3841 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi
3842 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
3843 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo
3844 ; GFX11-NEXT: global_load_b128 v[16:19], v[4:5], off
3845 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3846 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3847 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3848 ; GFX11-NEXT: s_waitcnt vmcnt(1)
3849 ; GFX11-NEXT: v_lshrrev_b32_e32 v35, 8, v0
3850 ; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v0
3851 ; GFX11-NEXT: v_lshrrev_b32_e32 v37, 24, v0
3852 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3853 ; GFX11-NEXT: v_lshrrev_b32_e32 v32, 8, v16
3854 ; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v16
3855 ; GFX11-NEXT: v_lshrrev_b32_e32 v34, 24, v16
3856 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3857 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3858 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3859 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
3860 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
3861 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
3862 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
3863 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
3864 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
3865 ; GFX11-NEXT: v_lshrrev_b32_e32 v21, 8, v17
3866 ; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v17
3867 ; GFX11-NEXT: v_lshrrev_b32_e32 v23, 24, v17
3868 ; GFX11-NEXT: v_lshrrev_b32_e32 v25, 8, v18
3869 ; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v18
3870 ; GFX11-NEXT: v_lshrrev_b32_e32 v27, 24, v18
3871 ; GFX11-NEXT: v_lshrrev_b32_e32 v29, 8, v19
3872 ; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v19
3873 ; GFX11-NEXT: v_lshrrev_b32_e32 v31, 24, v19
3874 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v35
3875 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
3876 ; GFX11-NEXT: v_mov_b32_e32 v12, v3
3877 ; GFX11-NEXT: v_mov_b32_e32 v20, v17
3878 ; GFX11-NEXT: v_mov_b32_e32 v24, v18
3879 ; GFX11-NEXT: v_dual_mov_b32 v28, v19 :: v_dual_mov_b32 v19, v34
3880 ; GFX11-NEXT: v_dual_mov_b32 v2, v36 :: v_dual_mov_b32 v3, v37
3881 ; GFX11-NEXT: v_dual_mov_b32 v17, v32 :: v_dual_mov_b32 v18, v33
3882 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3883 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3884 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3885 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3886 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3887 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3888 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3889 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3890 ; GFX11-NEXT: s_mov_b32 s33, s0
3891 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3892 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3894 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i8:
3895 ; GFX10-SCRATCH: ; %bb.0:
3896 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3897 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3898 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3899 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3900 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3901 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3902 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3903 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3904 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 16
3905 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3906 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0
3907 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3908 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi
3909 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo
3910 ; GFX10-SCRATCH-NEXT: s_clause 0x1
3911 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
3912 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[4:5], off
3913 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3914 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3915 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3916 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1)
3917 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0
3918 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v36, 16, v0
3919 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v37, 24, v0
3920 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3921 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v32, 8, v16
3922 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v33, 16, v16
3923 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v34, 24, v16
3924 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3925 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3926 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3927 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v9, 8, v2
3928 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v10, 16, v2
3929 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v11, 24, v2
3930 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v13, 8, v3
3931 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v14, 16, v3
3932 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v15, 24, v3
3933 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v21, 8, v17
3934 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v22, 16, v17
3935 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v23, 24, v17
3936 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v25, 8, v18
3937 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v26, 16, v18
3938 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v27, 24, v18
3939 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v29, 8, v19
3940 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v30, 16, v19
3941 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v31, 24, v19
3942 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
3943 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, v2
3944 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, v3
3945 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, v17
3946 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, v18
3947 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, v19
3948 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v35
3949 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v36
3950 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v37
3951 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, v32
3952 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, v33
3953 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, v34
3954 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3955 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3956 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3957 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3958 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3959 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3960 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3961 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3962 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3963 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3964 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3965 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3966 %val = load <32 x i8>, ptr addrspace(1) null
3967 call amdgpu_gfx void @external_void_func_v32i8(<32 x i8> %val)
3972 define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 {
3973 ; GFX9-LABEL: test_call_external_void_func_i8_ret:
3975 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3976 ; GFX9-NEXT: s_mov_b32 s34, s33
3977 ; GFX9-NEXT: s_mov_b32 s33, s32
3978 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3979 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
3980 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3981 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
3982 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
3983 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
3984 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
3985 ; GFX9-NEXT: global_load_ubyte v0, v[41:42], off
3986 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3987 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3988 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi
3989 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo
3990 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3991 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3992 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3993 ; GFX9-NEXT: global_store_byte v[41:42], v0, off
3994 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
3995 ; GFX9-NEXT: s_nop 0
3996 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
3997 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3998 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3999 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4000 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4001 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4002 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4003 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4004 ; GFX9-NEXT: s_mov_b32 s33, s34
4005 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4006 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4008 ; GFX10-LABEL: test_call_external_void_func_i8_ret:
4010 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4011 ; GFX10-NEXT: s_mov_b32 s34, s33
4012 ; GFX10-NEXT: s_mov_b32 s33, s32
4013 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4014 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4015 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4016 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4017 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4018 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4019 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4020 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4021 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4022 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi
4023 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo
4024 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4025 ; GFX10-NEXT: global_load_ubyte v0, v[41:42], off
4026 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4027 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4028 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4029 ; GFX10-NEXT: global_store_byte v[41:42], v0, off
4030 ; GFX10-NEXT: s_clause 0x1
4031 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4032 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4033 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4034 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4035 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4036 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4037 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4038 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4039 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4040 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4041 ; GFX10-NEXT: s_mov_b32 s33, s34
4042 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4043 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4045 ; GFX11-LABEL: test_call_external_void_func_i8_ret:
4047 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4048 ; GFX11-NEXT: s_mov_b32 s0, s33
4049 ; GFX11-NEXT: s_mov_b32 s33, s32
4050 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4051 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4052 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4053 ; GFX11-NEXT: s_clause 0x1
4054 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4055 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4056 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4057 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4058 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4059 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi
4060 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo
4061 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4062 ; GFX11-NEXT: global_load_u8 v0, v[41:42], off
4063 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4064 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4065 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4066 ; GFX11-NEXT: global_store_b8 v[41:42], v0, off
4067 ; GFX11-NEXT: s_clause 0x1
4068 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4069 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4070 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4071 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4072 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4073 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4074 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4075 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4076 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4077 ; GFX11-NEXT: s_mov_b32 s33, s0
4078 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4079 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4081 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_ret:
4082 ; GFX10-SCRATCH: ; %bb.0:
4083 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4084 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4085 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4086 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4087 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4088 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4089 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4090 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4091 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4092 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4093 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4094 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4095 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi
4096 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo
4097 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4098 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[41:42], off
4099 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4100 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4101 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4102 ; GFX10-SCRATCH-NEXT: global_store_byte v[41:42], v0, off
4103 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4104 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4105 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4106 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4107 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4108 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4109 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4110 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4111 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4112 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4113 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4114 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4115 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4116 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4117 %val = load i8, ptr addrspace(1) null
4118 %tmp = call amdgpu_gfx i8 @external_void_func_i8_ret(i8 %val)
4119 store i8 %tmp, ptr addrspace(1) null
4124 define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 {
4125 ; GFX9-LABEL: test_call_external_void_func_v2i8_ret:
4127 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4128 ; GFX9-NEXT: s_mov_b32 s34, s33
4129 ; GFX9-NEXT: s_mov_b32 s33, s32
4130 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4131 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4132 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4133 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4134 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4135 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4136 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4137 ; GFX9-NEXT: global_load_ushort v0, v[41:42], off
4138 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4139 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4140 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi
4141 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo
4142 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4143 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4144 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4145 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 8, v0
4146 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4147 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4148 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4149 ; GFX9-NEXT: global_store_short v[41:42], v0, off
4150 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4151 ; GFX9-NEXT: s_nop 0
4152 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4153 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4154 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4155 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4156 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4157 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4158 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4159 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4160 ; GFX9-NEXT: s_mov_b32 s33, s34
4161 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4162 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4164 ; GFX10-LABEL: test_call_external_void_func_v2i8_ret:
4166 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4167 ; GFX10-NEXT: s_mov_b32 s34, s33
4168 ; GFX10-NEXT: s_mov_b32 s33, s32
4169 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4170 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4171 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4172 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4173 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4174 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4175 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4176 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4177 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4178 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi
4179 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo
4180 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4181 ; GFX10-NEXT: global_load_ushort v0, v[41:42], off
4182 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4183 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4184 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4185 ; GFX10-NEXT: v_lshrrev_b16 v1, 8, v0
4186 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4187 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4188 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4189 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4190 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4191 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4192 ; GFX10-NEXT: global_store_short v[41:42], v0, off
4193 ; GFX10-NEXT: s_clause 0x1
4194 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4195 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4196 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4197 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4198 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4199 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4200 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4201 ; GFX10-NEXT: s_mov_b32 s33, s34
4202 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4203 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4205 ; GFX11-LABEL: test_call_external_void_func_v2i8_ret:
4207 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4208 ; GFX11-NEXT: s_mov_b32 s0, s33
4209 ; GFX11-NEXT: s_mov_b32 s33, s32
4210 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4211 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4212 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4213 ; GFX11-NEXT: s_clause 0x1
4214 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4215 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4216 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4217 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4218 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4219 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi
4220 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo
4221 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4222 ; GFX11-NEXT: global_load_u16 v0, v[41:42], off
4223 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4224 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4225 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4226 ; GFX11-NEXT: v_lshrrev_b16 v1, 8, v0
4227 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4228 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
4229 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
4230 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
4231 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4232 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4233 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4234 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4235 ; GFX11-NEXT: global_store_b16 v[41:42], v0, off
4236 ; GFX11-NEXT: s_clause 0x1
4237 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4238 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4239 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4240 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4241 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4242 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4243 ; GFX11-NEXT: s_mov_b32 s33, s0
4244 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4245 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4247 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i8_ret:
4248 ; GFX10-SCRATCH: ; %bb.0:
4249 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4250 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4251 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4252 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4253 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4254 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4255 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4256 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4257 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4258 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4259 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4260 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4261 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi
4262 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo
4263 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4264 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[41:42], off
4265 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4266 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4267 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4268 ; GFX10-SCRATCH-NEXT: v_lshrrev_b16 v1, 8, v0
4269 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4270 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
4271 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4272 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4273 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4274 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4275 ; GFX10-SCRATCH-NEXT: global_store_short v[41:42], v0, off
4276 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4277 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4278 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4279 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4280 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4281 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4282 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4283 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4284 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4285 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4286 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4287 %val = load <2 x i8>, ptr addrspace(1) null
4288 %tmp = call amdgpu_gfx <2 x i8> @external_void_func_v2i8_ret(<2 x i8> %val)
4289 store <2 x i8> %tmp, ptr addrspace(1) null
4294 define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 {
4295 ; GFX9-LABEL: test_call_external_void_func_v3i8_ret:
4297 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4298 ; GFX9-NEXT: s_mov_b32 s34, s33
4299 ; GFX9-NEXT: s_mov_b32 s33, s32
4300 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4301 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4302 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4303 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4304 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4305 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4306 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4307 ; GFX9-NEXT: global_load_dword v0, v[41:42], off
4308 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4309 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4310 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi
4311 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo
4312 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4313 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4314 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4315 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4316 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4317 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4318 ; GFX9-NEXT: v_mov_b32_e32 v3, 2
4319 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4320 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
4321 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4322 ; GFX9-NEXT: global_store_byte v[3:4], v2, off
4323 ; GFX9-NEXT: global_store_short v[41:42], v0, off
4324 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4325 ; GFX9-NEXT: s_nop 0
4326 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4327 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4328 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4329 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4330 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4331 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4332 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4333 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4334 ; GFX9-NEXT: s_mov_b32 s33, s34
4335 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4336 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4338 ; GFX10-LABEL: test_call_external_void_func_v3i8_ret:
4340 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4341 ; GFX10-NEXT: s_mov_b32 s34, s33
4342 ; GFX10-NEXT: s_mov_b32 s33, s32
4343 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4344 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4345 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4346 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4347 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4348 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4349 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4350 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4351 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4352 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi
4353 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo
4354 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4355 ; GFX10-NEXT: global_load_dword v0, v[41:42], off
4356 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4357 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4358 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4359 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4360 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4361 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4362 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4363 ; GFX10-NEXT: v_mov_b32_e32 v3, 2
4364 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
4365 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4366 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4367 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4368 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4369 ; GFX10-NEXT: global_store_byte v[3:4], v2, off
4370 ; GFX10-NEXT: global_store_short v[41:42], v0, off
4371 ; GFX10-NEXT: s_clause 0x1
4372 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4373 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4374 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4375 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4376 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4377 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4378 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4379 ; GFX10-NEXT: s_mov_b32 s33, s34
4380 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4381 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4383 ; GFX11-LABEL: test_call_external_void_func_v3i8_ret:
4385 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4386 ; GFX11-NEXT: s_mov_b32 s0, s33
4387 ; GFX11-NEXT: s_mov_b32 s33, s32
4388 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4389 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4390 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4391 ; GFX11-NEXT: s_clause 0x1
4392 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4393 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4394 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4395 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4396 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4397 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi
4398 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo
4399 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4400 ; GFX11-NEXT: global_load_b32 v0, v[41:42], off
4401 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4402 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4403 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4404 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4405 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4406 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4407 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
4408 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v1
4409 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0
4410 ; GFX11-NEXT: v_mov_b32_e32 v0, 2
4411 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
4412 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4413 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4414 ; GFX11-NEXT: v_or_b32_e32 v3, v4, v3
4415 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4416 ; GFX11-NEXT: s_clause 0x1
4417 ; GFX11-NEXT: global_store_b8 v[0:1], v2, off
4418 ; GFX11-NEXT: global_store_b16 v[41:42], v3, off
4419 ; GFX11-NEXT: s_clause 0x1
4420 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4421 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4422 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4423 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4424 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4425 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4426 ; GFX11-NEXT: s_mov_b32 s33, s0
4427 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4428 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4430 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i8_ret:
4431 ; GFX10-SCRATCH: ; %bb.0:
4432 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4433 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4434 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4435 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4436 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4437 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4438 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4439 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4440 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4441 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4442 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4443 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4444 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi
4445 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo
4446 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4447 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[41:42], off
4448 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4449 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4450 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4451 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4452 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4453 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4454 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
4455 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 2
4456 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
4457 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4458 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4459 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4460 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4461 ; GFX10-SCRATCH-NEXT: global_store_byte v[3:4], v2, off
4462 ; GFX10-SCRATCH-NEXT: global_store_short v[41:42], v0, off
4463 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4464 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4465 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4466 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4467 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4468 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4469 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4470 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4471 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4472 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4473 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4474 %val = load <3 x i8>, ptr addrspace(1) null
4475 %tmp = call amdgpu_gfx <3 x i8> @external_void_func_v3i8_ret(<3 x i8> %val)
4476 store <3 x i8> %tmp, ptr addrspace(1) null
4481 define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 {
4482 ; GFX9-LABEL: test_call_external_void_func_v4i8_ret:
4484 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4485 ; GFX9-NEXT: s_mov_b32 s34, s33
4486 ; GFX9-NEXT: s_mov_b32 s33, s32
4487 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4488 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4489 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4490 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4491 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4492 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4493 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4494 ; GFX9-NEXT: global_load_dword v0, v[41:42], off
4495 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4496 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4497 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi
4498 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo
4499 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4500 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4501 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4502 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4503 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4504 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4505 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4506 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4507 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4508 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v3
4509 ; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4510 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4511 ; GFX9-NEXT: global_store_dword v[41:42], v0, off
4512 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4513 ; GFX9-NEXT: s_nop 0
4514 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4515 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4516 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4517 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4518 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4519 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4520 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4521 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4522 ; GFX9-NEXT: s_mov_b32 s33, s34
4523 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4524 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4526 ; GFX10-LABEL: test_call_external_void_func_v4i8_ret:
4528 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4529 ; GFX10-NEXT: s_mov_b32 s34, s33
4530 ; GFX10-NEXT: s_mov_b32 s33, s32
4531 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4532 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4533 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4534 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4535 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4536 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4537 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4538 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4539 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4540 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi
4541 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo
4542 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4543 ; GFX10-NEXT: global_load_dword v0, v[41:42], off
4544 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4545 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4546 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4547 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4548 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4549 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4550 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4551 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4552 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3
4553 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4554 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4555 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4556 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4557 ; GFX10-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4558 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4559 ; GFX10-NEXT: global_store_dword v[41:42], v0, off
4560 ; GFX10-NEXT: s_clause 0x1
4561 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4562 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4563 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4564 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4565 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4566 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4567 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4568 ; GFX10-NEXT: s_mov_b32 s33, s34
4569 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4570 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4572 ; GFX11-LABEL: test_call_external_void_func_v4i8_ret:
4574 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4575 ; GFX11-NEXT: s_mov_b32 s0, s33
4576 ; GFX11-NEXT: s_mov_b32 s33, s32
4577 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4578 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4579 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4580 ; GFX11-NEXT: s_clause 0x1
4581 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4582 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4583 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4584 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4585 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4586 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi
4587 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo
4588 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4589 ; GFX11-NEXT: global_load_b32 v0, v[41:42], off
4590 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4591 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4592 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4593 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4594 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4595 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4596 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4597 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
4598 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
4599 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
4600 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
4601 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
4602 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4603 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4604 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4605 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4606 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
4607 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
4608 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
4609 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4610 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4611 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4612 ; GFX11-NEXT: global_store_b32 v[41:42], v0, off
4613 ; GFX11-NEXT: s_clause 0x1
4614 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4615 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4616 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4617 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4618 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4619 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4620 ; GFX11-NEXT: s_mov_b32 s33, s0
4621 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4622 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4624 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i8_ret:
4625 ; GFX10-SCRATCH: ; %bb.0:
4626 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4627 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4628 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4629 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4630 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4631 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4632 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4633 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4634 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4635 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4636 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4637 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4638 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi
4639 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo
4640 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4641 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[41:42], off
4642 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4643 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4644 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4645 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4646 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4647 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4648 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4649 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
4650 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3
4651 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4652 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4653 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4654 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4655 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4656 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4657 ; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v0, off
4658 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4659 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4660 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4661 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4662 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4663 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4664 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4665 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4666 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4667 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4668 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4669 %val = load <4 x i8>, ptr addrspace(1) null
4670 %tmp = call amdgpu_gfx <4 x i8> @external_void_func_v4i8_ret(<4 x i8> %val)
4671 store <4 x i8> %tmp, ptr addrspace(1) null
4676 define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 {
4677 ; GFX9-LABEL: test_call_external_void_func_v5i8_ret:
4679 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4680 ; GFX9-NEXT: s_mov_b32 s34, s33
4681 ; GFX9-NEXT: s_mov_b32 s33, s32
4682 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4683 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4684 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4685 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4686 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4687 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4688 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4689 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[41:42], off
4690 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4691 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4692 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi
4693 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo
4694 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4695 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4696 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4697 ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
4698 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5
4699 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v5
4700 ; GFX9-NEXT: v_mov_b32_e32 v0, v5
4701 ; GFX9-NEXT: v_mov_b32_e32 v4, v6
4702 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4703 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4704 ; GFX9-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4705 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v3
4706 ; GFX9-NEXT: v_or_b32_sdwa v2, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4707 ; GFX9-NEXT: v_mov_b32_e32 v0, 4
4708 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
4709 ; GFX9-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4710 ; GFX9-NEXT: global_store_byte v[0:1], v4, off
4711 ; GFX9-NEXT: global_store_dword v[41:42], v2, off
4712 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4713 ; GFX9-NEXT: s_nop 0
4714 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4715 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4716 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4717 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4718 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4719 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4720 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4721 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4722 ; GFX9-NEXT: s_mov_b32 s33, s34
4723 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4724 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4726 ; GFX10-LABEL: test_call_external_void_func_v5i8_ret:
4728 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4729 ; GFX10-NEXT: s_mov_b32 s34, s33
4730 ; GFX10-NEXT: s_mov_b32 s33, s32
4731 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4732 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4733 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4734 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4735 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4736 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4737 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4738 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4739 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4740 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi
4741 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo
4742 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4743 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[41:42], off
4744 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4745 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4746 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4747 ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
4748 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v5
4749 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v5
4750 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
4751 ; GFX10-NEXT: v_mov_b32_e32 v4, v6
4752 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4753 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4754 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3
4755 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4756 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4757 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4758 ; GFX10-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4759 ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4760 ; GFX10-NEXT: v_mov_b32_e32 v0, 4
4761 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
4762 ; GFX10-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4763 ; GFX10-NEXT: global_store_byte v[0:1], v4, off
4764 ; GFX10-NEXT: global_store_dword v[41:42], v2, off
4765 ; GFX10-NEXT: s_clause 0x1
4766 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4767 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4768 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4769 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4770 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4771 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4772 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4773 ; GFX10-NEXT: s_mov_b32 s33, s34
4774 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4775 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4777 ; GFX11-LABEL: test_call_external_void_func_v5i8_ret:
4779 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4780 ; GFX11-NEXT: s_mov_b32 s0, s33
4781 ; GFX11-NEXT: s_mov_b32 s33, s32
4782 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4783 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4784 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4785 ; GFX11-NEXT: s_clause 0x1
4786 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4787 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4788 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4789 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4790 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4791 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi
4792 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo
4793 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4794 ; GFX11-NEXT: global_load_b64 v[5:6], v[41:42], off
4795 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4796 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4797 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4798 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
4799 ; GFX11-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
4800 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v5
4801 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v5
4802 ; GFX11-NEXT: v_mov_b32_e32 v4, v6
4803 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4804 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
4805 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
4806 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
4807 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
4808 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
4809 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4810 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4811 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4812 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4813 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
4814 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
4815 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v0
4816 ; GFX11-NEXT: v_dual_mov_b32 v0, 4 :: v_dual_lshlrev_b32 v3, 16, v1
4817 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
4818 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
4819 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
4820 ; GFX11-NEXT: s_clause 0x1
4821 ; GFX11-NEXT: global_store_b8 v[0:1], v4, off
4822 ; GFX11-NEXT: global_store_b32 v[41:42], v2, off
4823 ; GFX11-NEXT: s_clause 0x1
4824 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4825 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4826 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4827 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4828 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4829 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4830 ; GFX11-NEXT: s_mov_b32 s33, s0
4831 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4832 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4834 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i8_ret:
4835 ; GFX10-SCRATCH: ; %bb.0:
4836 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4837 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4838 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4839 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4840 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4841 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4842 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4843 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4844 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4845 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4846 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4847 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4848 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi
4849 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo
4850 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4851 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[41:42], off
4852 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4853 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4854 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4855 ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
4856 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v5
4857 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v5
4858 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, v5
4859 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v6
4860 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4861 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
4862 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3
4863 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4864 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4865 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4866 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4867 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4868 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4
4869 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
4870 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4871 ; GFX10-SCRATCH-NEXT: global_store_byte v[0:1], v4, off
4872 ; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v2, off
4873 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4874 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4875 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4876 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4877 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4878 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4879 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4880 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4881 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4882 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4883 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4884 %val = load <5 x i8>, ptr addrspace(1) null
4885 %tmp = call amdgpu_gfx <5 x i8> @external_void_func_v5i8_ret(<5 x i8> %val)
4886 store <5 x i8> %tmp, ptr addrspace(1) null
4891 define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 {
4892 ; GFX9-LABEL: test_call_external_void_func_v8i8_ret:
4894 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4895 ; GFX9-NEXT: s_mov_b32 s34, s33
4896 ; GFX9-NEXT: s_mov_b32 s33, s32
4897 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4898 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4899 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4900 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4901 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4902 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4903 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4904 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[41:42], off
4905 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4906 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4907 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi
4908 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo
4909 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4910 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4911 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4912 ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0
4913 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4914 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4915 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
4916 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
4917 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
4918 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
4919 ; GFX9-NEXT: v_mov_b32_e32 v1, v8
4920 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4921 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v5
4922 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4923 ; GFX9-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4924 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v7
4925 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4926 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v3
4927 ; GFX9-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4928 ; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4929 ; GFX9-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4930 ; GFX9-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4931 ; GFX9-NEXT: global_store_dwordx2 v[41:42], v[3:4], off
4932 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4933 ; GFX9-NEXT: s_nop 0
4934 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4935 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4936 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4937 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4938 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4939 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4940 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4941 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4942 ; GFX9-NEXT: s_mov_b32 s33, s34
4943 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4944 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4946 ; GFX10-LABEL: test_call_external_void_func_v8i8_ret:
4948 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4949 ; GFX10-NEXT: s_mov_b32 s34, s33
4950 ; GFX10-NEXT: s_mov_b32 s33, s32
4951 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4952 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4953 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4954 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4955 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4956 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4957 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4958 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4959 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4960 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi
4961 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo
4962 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4963 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[41:42], off
4964 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4965 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4966 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4967 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0
4968 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4969 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4970 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
4971 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
4972 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
4973 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
4974 ; GFX10-NEXT: v_mov_b32_e32 v1, v8
4975 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4976 ; GFX10-NEXT: v_lshlrev_b16 v5, 8, v5
4977 ; GFX10-NEXT: v_lshlrev_b16 v7, 8, v7
4978 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4979 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3
4980 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4981 ; GFX10-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4982 ; GFX10-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4983 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4984 ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4985 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4986 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4987 ; GFX10-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4988 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4989 ; GFX10-NEXT: global_store_dwordx2 v[41:42], v[0:1], off
4990 ; GFX10-NEXT: s_clause 0x1
4991 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4992 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4993 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4994 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4995 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4996 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4997 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4998 ; GFX10-NEXT: s_mov_b32 s33, s34
4999 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5000 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5002 ; GFX11-LABEL: test_call_external_void_func_v8i8_ret:
5004 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5005 ; GFX11-NEXT: s_mov_b32 s0, s33
5006 ; GFX11-NEXT: s_mov_b32 s33, s32
5007 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5008 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
5009 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5010 ; GFX11-NEXT: s_clause 0x1
5011 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
5012 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
5013 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
5014 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
5015 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5016 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi
5017 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo
5018 ; GFX11-NEXT: s_add_i32 s32, s32, 16
5019 ; GFX11-NEXT: global_load_b64 v[0:1], v[41:42], off
5020 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5021 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5022 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5023 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 8, v0
5024 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5025 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
5026 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5027 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5028 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5029 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v8
5030 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5031 ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
5032 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5033 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
5034 ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
5035 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
5036 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
5037 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
5038 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
5039 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
5040 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
5041 ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
5042 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
5043 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5044 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
5045 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v4
5046 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v5
5047 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
5048 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5049 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v1
5050 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
5051 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
5052 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
5053 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
5054 ; GFX11-NEXT: global_store_b64 v[41:42], v[0:1], off
5055 ; GFX11-NEXT: s_clause 0x1
5056 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
5057 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
5058 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5059 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
5060 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5061 ; GFX11-NEXT: s_add_i32 s32, s32, -16
5062 ; GFX11-NEXT: s_mov_b32 s33, s0
5063 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5064 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5066 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i8_ret:
5067 ; GFX10-SCRATCH: ; %bb.0:
5068 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5069 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
5070 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5071 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5072 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
5073 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5074 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5075 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
5076 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
5077 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
5078 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
5079 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
5080 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi
5081 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo
5082 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5083 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[41:42], off
5084 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5085 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5086 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5087 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0
5088 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5089 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0
5090 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5091 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5092 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5093 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
5094 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v8
5095 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5096 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v5, 8, v5
5097 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v7, 8, v7
5098 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
5099 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3
5100 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5101 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5102 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5103 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5104 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5105 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5106 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5107 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5108 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5109 ; GFX10-SCRATCH-NEXT: global_store_dwordx2 v[41:42], v[0:1], off
5110 ; GFX10-SCRATCH-NEXT: s_clause 0x1
5111 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
5112 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
5113 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5114 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
5115 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5116 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5117 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5118 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
5119 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5120 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
5121 %val = load <8 x i8>, ptr addrspace(1) null
5122 %tmp = call amdgpu_gfx <8 x i8> @external_void_func_v8i8_ret(<8 x i8> %val)
5123 store <8 x i8> %tmp, ptr addrspace(1) null
5128 define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 {
5129 ; GFX9-LABEL: test_call_external_void_func_v32i8_ret:
5131 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5132 ; GFX9-NEXT: s_mov_b32 s34, s33
5133 ; GFX9-NEXT: s_mov_b32 s33, s32
5134 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5135 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
5136 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5137 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
5138 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
5139 ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
5140 ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill
5141 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
5142 ; GFX9-NEXT: v_mov_b32_e32 v43, 16
5143 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
5144 ; GFX9-NEXT: v_mov_b32_e32 v44, 0
5145 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[41:42], off
5146 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[43:44], off
5147 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
5148 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5149 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi
5150 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo
5151 ; GFX9-NEXT: s_addk_i32 s32, 0x800
5152 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5153 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5154 ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0
5155 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0
5156 ; GFX9-NEXT: v_lshrrev_b32_e32 v37, 24, v0
5157 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5158 ; GFX9-NEXT: v_lshrrev_b32_e32 v32, 8, v16
5159 ; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v16
5160 ; GFX9-NEXT: v_lshrrev_b32_e32 v34, 24, v16
5161 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5162 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5163 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5164 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5165 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5166 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5167 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5168 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5169 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5170 ; GFX9-NEXT: v_lshrrev_b32_e32 v21, 8, v17
5171 ; GFX9-NEXT: v_lshrrev_b32_e32 v22, 16, v17
5172 ; GFX9-NEXT: v_lshrrev_b32_e32 v23, 24, v17
5173 ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 8, v18
5174 ; GFX9-NEXT: v_lshrrev_b32_e32 v26, 16, v18
5175 ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 24, v18
5176 ; GFX9-NEXT: v_lshrrev_b32_e32 v29, 8, v19
5177 ; GFX9-NEXT: v_lshrrev_b32_e32 v30, 16, v19
5178 ; GFX9-NEXT: v_lshrrev_b32_e32 v31, 24, v19
5179 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
5180 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
5181 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
5182 ; GFX9-NEXT: v_mov_b32_e32 v20, v17
5183 ; GFX9-NEXT: v_mov_b32_e32 v24, v18
5184 ; GFX9-NEXT: v_mov_b32_e32 v28, v19
5185 ; GFX9-NEXT: v_mov_b32_e32 v1, v35
5186 ; GFX9-NEXT: v_mov_b32_e32 v2, v36
5187 ; GFX9-NEXT: v_mov_b32_e32 v3, v37
5188 ; GFX9-NEXT: v_mov_b32_e32 v17, v32
5189 ; GFX9-NEXT: v_mov_b32_e32 v18, v33
5190 ; GFX9-NEXT: v_mov_b32_e32 v19, v34
5191 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
5192 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
5193 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v5
5194 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5195 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v3
5196 ; GFX9-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5197 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v7
5198 ; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5199 ; GFX9-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5200 ; GFX9-NEXT: v_or_b32_sdwa v6, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5201 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v29
5202 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v31
5203 ; GFX9-NEXT: v_or_b32_sdwa v0, v28, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5204 ; GFX9-NEXT: v_or_b32_sdwa v1, v30, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5205 ; GFX9-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5206 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v25
5207 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v27
5208 ; GFX9-NEXT: v_or_b32_sdwa v0, v24, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5209 ; GFX9-NEXT: v_or_b32_sdwa v1, v26, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5210 ; GFX9-NEXT: v_or_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5211 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v21
5212 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v23
5213 ; GFX9-NEXT: v_or_b32_sdwa v0, v20, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5214 ; GFX9-NEXT: v_or_b32_sdwa v1, v22, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5215 ; GFX9-NEXT: v_lshlrev_b16_e32 v13, 8, v13
5216 ; GFX9-NEXT: v_lshlrev_b16_e32 v9, 8, v9
5217 ; GFX9-NEXT: v_or_b32_sdwa v7, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5218 ; GFX9-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5219 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v17
5220 ; GFX9-NEXT: v_lshlrev_b16_e32 v4, 8, v19
5221 ; GFX9-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5222 ; GFX9-NEXT: v_lshlrev_b16_e32 v13, 8, v15
5223 ; GFX9-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5224 ; GFX9-NEXT: v_lshlrev_b16_e32 v9, 8, v11
5225 ; GFX9-NEXT: v_or_b32_sdwa v0, v16, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5226 ; GFX9-NEXT: v_or_b32_sdwa v4, v18, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5227 ; GFX9-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5228 ; GFX9-NEXT: v_or_b32_sdwa v10, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5229 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5230 ; GFX9-NEXT: v_or_b32_sdwa v9, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5231 ; GFX9-NEXT: v_or_b32_sdwa v8, v8, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5232 ; GFX9-NEXT: global_store_dwordx4 v[43:44], v[0:3], off
5233 ; GFX9-NEXT: global_store_dwordx4 v[41:42], v[6:9], off
5234 ; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload
5235 ; GFX9-NEXT: s_nop 0
5236 ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
5237 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
5238 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
5239 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5240 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5241 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
5242 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5243 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
5244 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5245 ; GFX9-NEXT: s_addk_i32 s32, 0xf800
5246 ; GFX9-NEXT: s_mov_b32 s33, s34
5247 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5248 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5250 ; GFX10-LABEL: test_call_external_void_func_v32i8_ret:
5252 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5253 ; GFX10-NEXT: s_mov_b32 s34, s33
5254 ; GFX10-NEXT: s_mov_b32 s33, s32
5255 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5256 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
5257 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5258 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5259 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
5260 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
5261 ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
5262 ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill
5263 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
5264 ; GFX10-NEXT: v_mov_b32_e32 v43, 16
5265 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
5266 ; GFX10-NEXT: v_mov_b32_e32 v44, 0
5267 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
5268 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi
5269 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo
5270 ; GFX10-NEXT: s_clause 0x1
5271 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[41:42], off
5272 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[43:44], off
5273 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5274 ; GFX10-NEXT: s_addk_i32 s32, 0x400
5275 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5276 ; GFX10-NEXT: s_waitcnt vmcnt(1)
5277 ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0
5278 ; GFX10-NEXT: v_lshrrev_b32_e32 v36, 16, v0
5279 ; GFX10-NEXT: v_lshrrev_b32_e32 v37, 24, v0
5280 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5281 ; GFX10-NEXT: v_lshrrev_b32_e32 v32, 8, v16
5282 ; GFX10-NEXT: v_lshrrev_b32_e32 v33, 16, v16
5283 ; GFX10-NEXT: v_lshrrev_b32_e32 v34, 24, v16
5284 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5285 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5286 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5287 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5288 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5289 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5290 ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5291 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5292 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5293 ; GFX10-NEXT: v_lshrrev_b32_e32 v21, 8, v17
5294 ; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v17
5295 ; GFX10-NEXT: v_lshrrev_b32_e32 v23, 24, v17
5296 ; GFX10-NEXT: v_lshrrev_b32_e32 v25, 8, v18
5297 ; GFX10-NEXT: v_lshrrev_b32_e32 v26, 16, v18
5298 ; GFX10-NEXT: v_lshrrev_b32_e32 v27, 24, v18
5299 ; GFX10-NEXT: v_lshrrev_b32_e32 v29, 8, v19
5300 ; GFX10-NEXT: v_lshrrev_b32_e32 v30, 16, v19
5301 ; GFX10-NEXT: v_lshrrev_b32_e32 v31, 24, v19
5302 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
5303 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
5304 ; GFX10-NEXT: v_mov_b32_e32 v12, v3
5305 ; GFX10-NEXT: v_mov_b32_e32 v20, v17
5306 ; GFX10-NEXT: v_mov_b32_e32 v24, v18
5307 ; GFX10-NEXT: v_mov_b32_e32 v28, v19
5308 ; GFX10-NEXT: v_mov_b32_e32 v1, v35
5309 ; GFX10-NEXT: v_mov_b32_e32 v2, v36
5310 ; GFX10-NEXT: v_mov_b32_e32 v3, v37
5311 ; GFX10-NEXT: v_mov_b32_e32 v17, v32
5312 ; GFX10-NEXT: v_mov_b32_e32 v18, v33
5313 ; GFX10-NEXT: v_mov_b32_e32 v19, v34
5314 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
5315 ; GFX10-NEXT: v_lshlrev_b16 v13, 8, v13
5316 ; GFX10-NEXT: v_lshlrev_b16 v9, 8, v9
5317 ; GFX10-NEXT: v_lshlrev_b16 v11, 8, v11
5318 ; GFX10-NEXT: v_lshlrev_b16 v5, 8, v5
5319 ; GFX10-NEXT: v_lshlrev_b16 v7, 8, v7
5320 ; GFX10-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5321 ; GFX10-NEXT: v_lshlrev_b16 v13, 8, v15
5322 ; GFX10-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5323 ; GFX10-NEXT: v_or_b32_sdwa v9, v10, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5324 ; GFX10-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5325 ; GFX10-NEXT: v_or_b32_sdwa v7, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5326 ; GFX10-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5327 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
5328 ; GFX10-NEXT: v_or_b32_sdwa v5, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5329 ; GFX10-NEXT: v_lshlrev_b16 v8, 8, v31
5330 ; GFX10-NEXT: v_or_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5331 ; GFX10-NEXT: v_lshlrev_b16 v7, 8, v29
5332 ; GFX10-NEXT: v_lshlrev_b16 v9, 8, v25
5333 ; GFX10-NEXT: v_or_b32_sdwa v6, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5334 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5335 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v3
5336 ; GFX10-NEXT: v_or_b32_sdwa v3, v28, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5337 ; GFX10-NEXT: v_or_b32_sdwa v7, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5338 ; GFX10-NEXT: v_or_b32_sdwa v8, v24, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5339 ; GFX10-NEXT: v_lshlrev_b16 v9, 8, v27
5340 ; GFX10-NEXT: v_lshlrev_b16 v10, 8, v21
5341 ; GFX10-NEXT: v_lshlrev_b16 v11, 8, v23
5342 ; GFX10-NEXT: v_lshlrev_b16 v12, 8, v17
5343 ; GFX10-NEXT: v_lshlrev_b16 v13, 8, v19
5344 ; GFX10-NEXT: v_or_b32_sdwa v9, v26, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5345 ; GFX10-NEXT: v_or_b32_sdwa v14, v20, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5346 ; GFX10-NEXT: v_or_b32_sdwa v11, v22, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5347 ; GFX10-NEXT: v_or_b32_sdwa v12, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5348 ; GFX10-NEXT: v_or_b32_sdwa v13, v18, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5349 ; GFX10-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5350 ; GFX10-NEXT: v_or_b32_sdwa v10, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5351 ; GFX10-NEXT: v_or_b32_sdwa v9, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5352 ; GFX10-NEXT: v_or_b32_sdwa v8, v14, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5353 ; GFX10-NEXT: v_or_b32_sdwa v7, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5354 ; GFX10-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5355 ; GFX10-NEXT: global_store_dwordx4 v[43:44], v[7:10], off
5356 ; GFX10-NEXT: global_store_dwordx4 v[41:42], v[3:6], off
5357 ; GFX10-NEXT: s_clause 0x3
5358 ; GFX10-NEXT: buffer_load_dword v44, off, s[0:3], s33
5359 ; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4
5360 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8
5361 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12
5362 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
5363 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
5364 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
5365 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5366 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
5367 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5368 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5369 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00
5370 ; GFX10-NEXT: s_mov_b32 s33, s34
5371 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5372 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5374 ; GFX11-LABEL: test_call_external_void_func_v32i8_ret:
5376 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5377 ; GFX11-NEXT: s_mov_b32 s0, s33
5378 ; GFX11-NEXT: s_mov_b32 s33, s32
5379 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5380 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill
5381 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5382 ; GFX11-NEXT: s_clause 0x3
5383 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12
5384 ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8
5385 ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:4
5386 ; GFX11-NEXT: scratch_store_b32 off, v44, s33
5387 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
5388 ; GFX11-NEXT: v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v43, 16
5389 ; GFX11-NEXT: v_mov_b32_e32 v44, 0
5390 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5391 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi
5392 ; GFX11-NEXT: global_load_b128 v[0:3], v[41:42], off
5393 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo
5394 ; GFX11-NEXT: global_load_b128 v[16:19], v[43:44], off
5395 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5396 ; GFX11-NEXT: s_add_i32 s32, s32, 32
5397 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5398 ; GFX11-NEXT: s_waitcnt vmcnt(1)
5399 ; GFX11-NEXT: v_lshrrev_b32_e32 v35, 8, v0
5400 ; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v0
5401 ; GFX11-NEXT: v_lshrrev_b32_e32 v37, 24, v0
5402 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5403 ; GFX11-NEXT: v_lshrrev_b32_e32 v32, 8, v16
5404 ; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v16
5405 ; GFX11-NEXT: v_lshrrev_b32_e32 v34, 24, v16
5406 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5407 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5408 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5409 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5410 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5411 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5412 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5413 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5414 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5415 ; GFX11-NEXT: v_lshrrev_b32_e32 v21, 8, v17
5416 ; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v17
5417 ; GFX11-NEXT: v_lshrrev_b32_e32 v23, 24, v17
5418 ; GFX11-NEXT: v_lshrrev_b32_e32 v25, 8, v18
5419 ; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v18
5420 ; GFX11-NEXT: v_lshrrev_b32_e32 v27, 24, v18
5421 ; GFX11-NEXT: v_lshrrev_b32_e32 v29, 8, v19
5422 ; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v19
5423 ; GFX11-NEXT: v_lshrrev_b32_e32 v31, 24, v19
5424 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v35
5425 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
5426 ; GFX11-NEXT: v_mov_b32_e32 v12, v3
5427 ; GFX11-NEXT: v_mov_b32_e32 v20, v17
5428 ; GFX11-NEXT: v_mov_b32_e32 v24, v18
5429 ; GFX11-NEXT: v_dual_mov_b32 v28, v19 :: v_dual_mov_b32 v19, v34
5430 ; GFX11-NEXT: v_dual_mov_b32 v2, v36 :: v_dual_mov_b32 v3, v37
5431 ; GFX11-NEXT: v_dual_mov_b32 v17, v32 :: v_dual_mov_b32 v18, v33
5432 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5433 ; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9
5434 ; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
5435 ; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11
5436 ; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10
5437 ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
5438 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
5439 ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
5440 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
5441 ; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13
5442 ; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12
5443 ; GFX11-NEXT: v_or_b32_e32 v8, v8, v9
5444 ; GFX11-NEXT: v_or_b32_e32 v9, v10, v11
5445 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
5446 ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
5447 ; GFX11-NEXT: v_or_b32_e32 v12, v12, v13
5448 ; GFX11-NEXT: v_lshlrev_b16 v13, 8, v15
5449 ; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14
5450 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v8
5451 ; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v9
5452 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4
5453 ; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v5
5454 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
5455 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
5456 ; GFX11-NEXT: v_or_b32_e32 v13, v14, v13
5457 ; GFX11-NEXT: v_or_b32_e32 v5, v7, v8
5458 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v9
5459 ; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v28
5460 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
5461 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v29
5462 ; GFX11-NEXT: v_lshlrev_b16 v8, 8, v31
5463 ; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v30
5464 ; GFX11-NEXT: v_lshlrev_b16 v10, 8, v25
5465 ; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v24
5466 ; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12
5467 ; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v13
5468 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
5469 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
5470 ; GFX11-NEXT: v_or_b32_e32 v1, v7, v1
5471 ; GFX11-NEXT: v_or_b32_e32 v7, v9, v8
5472 ; GFX11-NEXT: v_or_b32_e32 v8, v11, v10
5473 ; GFX11-NEXT: v_or_b32_e32 v6, v12, v6
5474 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
5475 ; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v26
5476 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v7
5477 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v8
5478 ; GFX11-NEXT: v_lshlrev_b16 v8, 8, v27
5479 ; GFX11-NEXT: v_lshlrev_b16 v10, 8, v21
5480 ; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v20
5481 ; GFX11-NEXT: v_lshlrev_b16 v12, 8, v23
5482 ; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v22
5483 ; GFX11-NEXT: v_lshlrev_b16 v14, 8, v17
5484 ; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v16
5485 ; GFX11-NEXT: v_lshlrev_b16 v16, 8, v19
5486 ; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v18
5487 ; GFX11-NEXT: v_or_b32_e32 v8, v9, v8
5488 ; GFX11-NEXT: v_or_b32_e32 v9, v11, v10
5489 ; GFX11-NEXT: v_or_b32_e32 v10, v13, v12
5490 ; GFX11-NEXT: v_or_b32_e32 v11, v15, v14
5491 ; GFX11-NEXT: v_or_b32_e32 v12, v17, v16
5492 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
5493 ; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v8
5494 ; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v9
5495 ; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v10
5496 ; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11
5497 ; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v12
5498 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
5499 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2
5500 ; GFX11-NEXT: v_or_b32_e32 v10, v1, v3
5501 ; GFX11-NEXT: v_or_b32_e32 v9, v7, v8
5502 ; GFX11-NEXT: v_or_b32_e32 v8, v13, v14
5503 ; GFX11-NEXT: v_or_b32_e32 v7, v11, v12
5504 ; GFX11-NEXT: v_or_b32_e32 v3, v0, v2
5505 ; GFX11-NEXT: s_clause 0x1
5506 ; GFX11-NEXT: global_store_b128 v[43:44], v[7:10], off
5507 ; GFX11-NEXT: global_store_b128 v[41:42], v[3:6], off
5508 ; GFX11-NEXT: s_clause 0x3
5509 ; GFX11-NEXT: scratch_load_b32 v44, off, s33
5510 ; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:4
5511 ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8
5512 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:12
5513 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5514 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5515 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
5516 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5517 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 ; 4-byte Folded Reload
5518 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5519 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0
5520 ; GFX11-NEXT: s_mov_b32 s33, s0
5521 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5522 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5524 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i8_ret:
5525 ; GFX10-SCRATCH: ; %bb.0:
5526 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5527 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
5528 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5529 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5530 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:16 ; 4-byte Folded Spill
5531 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5532 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5533 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:12 ; 4-byte Folded Spill
5534 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill
5535 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v43, s33 offset:4 ; 4-byte Folded Spill
5536 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v44, s33 ; 4-byte Folded Spill
5537 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
5538 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v43, 16
5539 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
5540 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v44, 0
5541 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
5542 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi
5543 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo
5544 ; GFX10-SCRATCH-NEXT: s_clause 0x1
5545 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[41:42], off
5546 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[43:44], off
5547 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5548 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32
5549 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5550 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1)
5551 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0
5552 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v36, 16, v0
5553 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v37, 24, v0
5554 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5555 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v32, 8, v16
5556 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v33, 16, v16
5557 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v34, 24, v16
5558 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5559 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5560 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5561 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5562 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5563 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5564 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5565 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5566 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5567 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v21, 8, v17
5568 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v22, 16, v17
5569 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v23, 24, v17
5570 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v25, 8, v18
5571 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v26, 16, v18
5572 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v27, 24, v18
5573 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v29, 8, v19
5574 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v30, 16, v19
5575 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v31, 24, v19
5576 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
5577 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, v2
5578 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, v3
5579 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, v17
5580 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, v18
5581 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, v19
5582 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v35
5583 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v36
5584 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v37
5585 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, v32
5586 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, v33
5587 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, v34
5588 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5589 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v13, 8, v13
5590 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v9, 8, v9
5591 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v11, 8, v11
5592 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v5, 8, v5
5593 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v7, 8, v7
5594 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5595 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v13, 8, v15
5596 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5597 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v9, v10, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5598 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5599 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v7, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5600 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5601 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
5602 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5603 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v8, 8, v31
5604 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5605 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v7, 8, v29
5606 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v9, 8, v25
5607 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v6, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5608 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5609 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v3
5610 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v3, v28, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5611 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v7, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5612 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v8, v24, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5613 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v9, 8, v27
5614 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v10, 8, v21
5615 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v11, 8, v23
5616 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v12, 8, v17
5617 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v13, 8, v19
5618 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v9, v26, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5619 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v14, v20, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5620 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v11, v22, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5621 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v12, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5622 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v13, v18, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5623 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5624 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v10, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5625 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v9, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5626 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v8, v14, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5627 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v7, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5628 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5629 ; GFX10-SCRATCH-NEXT: global_store_dwordx4 v[43:44], v[7:10], off
5630 ; GFX10-SCRATCH-NEXT: global_store_dwordx4 v[41:42], v[3:6], off
5631 ; GFX10-SCRATCH-NEXT: s_clause 0x3
5632 ; GFX10-SCRATCH-NEXT: scratch_load_dword v44, off, s33
5633 ; GFX10-SCRATCH-NEXT: scratch_load_dword v43, off, s33 offset:4
5634 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 offset:8
5635 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:12
5636 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5637 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5638 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5639 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5640 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:16 ; 4-byte Folded Reload
5641 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5642 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5643 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
5644 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
5645 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5646 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
5647 %val = load <32 x i8>, ptr addrspace(1) null
5648 %tmp = call amdgpu_gfx <32 x i8> @external_void_func_v3i8_ret(<32 x i8> %val)
5649 store <32 x i8> %tmp, ptr addrspace(1) null
5655 define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 {
5656 ; GFX9-LABEL: test_call_external_void_func_v2i16:
5658 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5659 ; GFX9-NEXT: s_mov_b32 s34, s33
5660 ; GFX9-NEXT: s_mov_b32 s33, s32
5661 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5662 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5663 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5664 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
5665 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
5666 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5667 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi
5668 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo
5669 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5670 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5671 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
5672 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5673 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5674 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
5675 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5676 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5677 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5678 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5679 ; GFX9-NEXT: s_mov_b32 s33, s34
5680 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5681 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5683 ; GFX10-LABEL: test_call_external_void_func_v2i16:
5685 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5686 ; GFX10-NEXT: s_mov_b32 s34, s33
5687 ; GFX10-NEXT: s_mov_b32 s33, s32
5688 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5689 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5690 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5691 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5692 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
5693 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
5694 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi
5695 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo
5696 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5697 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5698 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5699 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
5700 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
5701 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
5702 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
5703 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5704 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5705 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5706 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5707 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5708 ; GFX10-NEXT: s_mov_b32 s33, s34
5709 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5710 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5712 ; GFX11-LABEL: test_call_external_void_func_v2i16:
5714 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5715 ; GFX11-NEXT: s_mov_b32 s0, s33
5716 ; GFX11-NEXT: s_mov_b32 s33, s32
5717 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5718 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
5719 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5720 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
5721 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5722 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi
5723 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo
5724 ; GFX11-NEXT: s_add_i32 s32, s32, 16
5725 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5726 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5727 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5728 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5729 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5730 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5731 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
5732 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5733 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
5734 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5735 ; GFX11-NEXT: s_add_i32 s32, s32, -16
5736 ; GFX11-NEXT: s_mov_b32 s33, s0
5737 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5738 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5740 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16:
5741 ; GFX10-SCRATCH: ; %bb.0:
5742 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5743 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
5744 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5745 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5746 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
5747 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5748 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5749 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
5750 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
5751 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi
5752 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo
5753 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5754 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5755 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5756 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5757 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5758 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5759 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5760 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5761 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
5762 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5763 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5764 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5765 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
5766 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5767 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
5768 %val = load <2 x i16>, ptr addrspace(1) undef
5769 call amdgpu_gfx void @external_void_func_v2i16(<2 x i16> %val)
5773 define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 {
5774 ; GFX9-LABEL: test_call_external_void_func_v3i16:
5776 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5777 ; GFX9-NEXT: s_mov_b32 s34, s33
5778 ; GFX9-NEXT: s_mov_b32 s33, s32
5779 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5780 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5781 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5782 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
5783 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
5784 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5785 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi
5786 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo
5787 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5788 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5789 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
5790 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5791 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5792 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
5793 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5794 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5795 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5796 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5797 ; GFX9-NEXT: s_mov_b32 s33, s34
5798 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5799 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5801 ; GFX10-LABEL: test_call_external_void_func_v3i16:
5803 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5804 ; GFX10-NEXT: s_mov_b32 s34, s33
5805 ; GFX10-NEXT: s_mov_b32 s33, s32
5806 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5807 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5808 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5809 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5810 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
5811 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
5812 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi
5813 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo
5814 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5815 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5816 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5817 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
5818 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
5819 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
5820 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
5821 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5822 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5823 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5824 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5825 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5826 ; GFX10-NEXT: s_mov_b32 s33, s34
5827 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5828 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5830 ; GFX11-LABEL: test_call_external_void_func_v3i16:
5832 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5833 ; GFX11-NEXT: s_mov_b32 s0, s33
5834 ; GFX11-NEXT: s_mov_b32 s33, s32
5835 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5836 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
5837 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5838 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
5839 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5840 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi
5841 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo
5842 ; GFX11-NEXT: s_add_i32 s32, s32, 16
5843 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5844 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5845 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5846 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5847 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5848 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5849 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
5850 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5851 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
5852 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5853 ; GFX11-NEXT: s_add_i32 s32, s32, -16
5854 ; GFX11-NEXT: s_mov_b32 s33, s0
5855 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5856 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5858 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16:
5859 ; GFX10-SCRATCH: ; %bb.0:
5860 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5861 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
5862 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5863 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5864 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
5865 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5866 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5867 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
5868 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
5869 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi
5870 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo
5871 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5872 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5873 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5874 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5875 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5876 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5877 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5878 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5879 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
5880 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5881 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5882 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5883 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
5884 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5885 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
5886 %val = load <3 x i16>, ptr addrspace(1) undef
5887 call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> %val)
5891 define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 {
5892 ; GFX9-LABEL: test_call_external_void_func_v3f16:
5894 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5895 ; GFX9-NEXT: s_mov_b32 s34, s33
5896 ; GFX9-NEXT: s_mov_b32 s33, s32
5897 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5898 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5899 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5900 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
5901 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
5902 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5903 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi
5904 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo
5905 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5906 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5907 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
5908 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5909 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5910 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
5911 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5912 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5913 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5914 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5915 ; GFX9-NEXT: s_mov_b32 s33, s34
5916 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5917 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5919 ; GFX10-LABEL: test_call_external_void_func_v3f16:
5921 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5922 ; GFX10-NEXT: s_mov_b32 s34, s33
5923 ; GFX10-NEXT: s_mov_b32 s33, s32
5924 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5925 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5926 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5927 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5928 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
5929 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
5930 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi
5931 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo
5932 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5933 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5934 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5935 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
5936 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
5937 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
5938 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
5939 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5940 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5941 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5942 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5943 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5944 ; GFX10-NEXT: s_mov_b32 s33, s34
5945 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5946 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5948 ; GFX11-LABEL: test_call_external_void_func_v3f16:
5950 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5951 ; GFX11-NEXT: s_mov_b32 s0, s33
5952 ; GFX11-NEXT: s_mov_b32 s33, s32
5953 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5954 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
5955 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5956 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
5957 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5958 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi
5959 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo
5960 ; GFX11-NEXT: s_add_i32 s32, s32, 16
5961 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5962 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5963 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5964 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5965 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5966 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5967 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
5968 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5969 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
5970 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5971 ; GFX11-NEXT: s_add_i32 s32, s32, -16
5972 ; GFX11-NEXT: s_mov_b32 s33, s0
5973 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5974 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5976 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16:
5977 ; GFX10-SCRATCH: ; %bb.0:
5978 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5979 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
5980 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5981 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5982 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
5983 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5984 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5985 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
5986 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
5987 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi
5988 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo
5989 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5990 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5991 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5992 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5993 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5994 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5995 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5996 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5997 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
5998 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5999 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6000 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6001 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6002 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6003 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6004 %val = load <3 x half>, ptr addrspace(1) undef
6005 call amdgpu_gfx void @external_void_func_v3f16(<3 x half> %val)
6009 define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 {
6010 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
6012 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6013 ; GFX9-NEXT: s_mov_b32 s34, s33
6014 ; GFX9-NEXT: s_mov_b32 s33, s32
6015 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6016 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6017 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6018 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6019 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6020 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi
6021 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo
6022 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
6023 ; GFX9-NEXT: v_mov_b32_e32 v1, 3
6024 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6025 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6026 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6027 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6028 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6029 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6030 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6031 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6032 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6033 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6034 ; GFX9-NEXT: s_mov_b32 s33, s34
6035 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6036 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6038 ; GFX10-LABEL: test_call_external_void_func_v3i16_imm:
6040 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6041 ; GFX10-NEXT: s_mov_b32 s34, s33
6042 ; GFX10-NEXT: s_mov_b32 s33, s32
6043 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6044 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6045 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6046 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6047 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6048 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
6049 ; GFX10-NEXT: v_mov_b32_e32 v1, 3
6050 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi
6051 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo
6052 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6053 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6054 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6055 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6056 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6057 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6058 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6059 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6060 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6061 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6062 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6063 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6064 ; GFX10-NEXT: s_mov_b32 s33, s34
6065 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6066 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6068 ; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
6070 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6071 ; GFX11-NEXT: s_mov_b32 s0, s33
6072 ; GFX11-NEXT: s_mov_b32 s33, s32
6073 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6074 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6075 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6076 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6077 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
6078 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi
6079 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo
6080 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6081 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6082 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6083 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6084 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6085 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6086 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6087 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6088 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6089 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6090 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6091 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6092 ; GFX11-NEXT: s_mov_b32 s33, s0
6093 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6094 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6096 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm:
6097 ; GFX10-SCRATCH: ; %bb.0:
6098 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6099 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6100 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6101 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6102 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6103 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6104 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6105 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6106 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001
6107 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3
6108 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi
6109 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo
6110 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6111 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6112 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6113 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6114 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6115 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6116 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6117 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6118 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6119 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6120 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6121 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6122 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6123 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6124 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6125 call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
6129 define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 {
6130 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
6132 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6133 ; GFX9-NEXT: s_mov_b32 s34, s33
6134 ; GFX9-NEXT: s_mov_b32 s33, s32
6135 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6136 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6137 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6138 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6139 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6140 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi
6141 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo
6142 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
6143 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
6144 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6145 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6146 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6147 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6148 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6149 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6150 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6151 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6152 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6153 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6154 ; GFX9-NEXT: s_mov_b32 s33, s34
6155 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6156 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6158 ; GFX10-LABEL: test_call_external_void_func_v3f16_imm:
6160 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6161 ; GFX10-NEXT: s_mov_b32 s34, s33
6162 ; GFX10-NEXT: s_mov_b32 s33, s32
6163 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6164 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6165 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6166 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6167 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6168 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00
6169 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400
6170 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi
6171 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo
6172 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6173 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6174 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6175 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6176 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6177 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6178 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6179 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6180 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6181 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6182 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6183 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6184 ; GFX10-NEXT: s_mov_b32 s33, s34
6185 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6186 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6188 ; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
6190 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6191 ; GFX11-NEXT: s_mov_b32 s0, s33
6192 ; GFX11-NEXT: s_mov_b32 s33, s32
6193 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6194 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6195 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6196 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6197 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00
6198 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400
6199 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi
6200 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo
6201 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6202 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6203 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6204 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6205 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6206 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6207 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6208 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6209 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6210 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6211 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6212 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6213 ; GFX11-NEXT: s_mov_b32 s33, s0
6214 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6215 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6217 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm:
6218 ; GFX10-SCRATCH: ; %bb.0:
6219 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6220 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6221 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6222 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6223 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6224 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6225 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6226 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6227 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00
6228 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400
6229 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi
6230 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo
6231 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6232 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6233 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6234 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6235 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6236 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6237 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6238 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6239 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6240 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6241 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6242 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6243 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6244 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6245 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6246 call amdgpu_gfx void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
6250 define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 {
6251 ; GFX9-LABEL: test_call_external_void_func_v4i16:
6253 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6254 ; GFX9-NEXT: s_mov_b32 s34, s33
6255 ; GFX9-NEXT: s_mov_b32 s33, s32
6256 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6257 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6258 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6259 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6260 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6261 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6262 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi
6263 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo
6264 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6265 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6266 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6267 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6268 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6269 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6270 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6271 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6272 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6273 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6274 ; GFX9-NEXT: s_mov_b32 s33, s34
6275 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6276 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6278 ; GFX10-LABEL: test_call_external_void_func_v4i16:
6280 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6281 ; GFX10-NEXT: s_mov_b32 s34, s33
6282 ; GFX10-NEXT: s_mov_b32 s33, s32
6283 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6284 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6285 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6286 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6287 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6288 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6289 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi
6290 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo
6291 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6292 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6293 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6294 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6295 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6296 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6297 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6298 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6299 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6300 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6301 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6302 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6303 ; GFX10-NEXT: s_mov_b32 s33, s34
6304 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6305 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6307 ; GFX11-LABEL: test_call_external_void_func_v4i16:
6309 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6310 ; GFX11-NEXT: s_mov_b32 s0, s33
6311 ; GFX11-NEXT: s_mov_b32 s33, s32
6312 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6313 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6314 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6315 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
6316 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6317 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi
6318 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo
6319 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6320 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6321 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6322 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6323 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6324 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6325 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6326 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6327 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6328 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6329 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6330 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6331 ; GFX11-NEXT: s_mov_b32 s33, s0
6332 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6333 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6335 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16:
6336 ; GFX10-SCRATCH: ; %bb.0:
6337 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6338 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6339 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6340 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6341 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6342 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6343 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6344 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6345 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6346 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi
6347 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo
6348 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6349 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6350 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6351 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6352 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6353 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6354 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6355 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6356 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6357 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6358 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6359 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6360 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6361 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6362 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6363 %val = load <4 x i16>, ptr addrspace(1) undef
6364 call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> %val)
6368 define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 {
6369 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
6371 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6372 ; GFX9-NEXT: s_mov_b32 s34, s33
6373 ; GFX9-NEXT: s_mov_b32 s33, s32
6374 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6375 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6376 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6377 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6378 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6379 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi
6380 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo
6381 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
6382 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
6383 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6384 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6385 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6386 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6387 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6388 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6389 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6390 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6391 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6392 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6393 ; GFX9-NEXT: s_mov_b32 s33, s34
6394 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6395 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6397 ; GFX10-LABEL: test_call_external_void_func_v4i16_imm:
6399 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6400 ; GFX10-NEXT: s_mov_b32 s34, s33
6401 ; GFX10-NEXT: s_mov_b32 s33, s32
6402 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6403 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6404 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6405 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6406 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6407 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
6408 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003
6409 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi
6410 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo
6411 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6412 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6413 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6414 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6415 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6416 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6417 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6418 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6419 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6420 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6421 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6422 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6423 ; GFX10-NEXT: s_mov_b32 s33, s34
6424 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6425 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6427 ; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
6429 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6430 ; GFX11-NEXT: s_mov_b32 s0, s33
6431 ; GFX11-NEXT: s_mov_b32 s33, s32
6432 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6433 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6434 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6435 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6436 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
6437 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003
6438 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi
6439 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo
6440 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6441 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6442 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6443 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6444 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6445 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6446 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6447 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6448 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6449 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6450 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6451 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6452 ; GFX11-NEXT: s_mov_b32 s33, s0
6453 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6454 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6456 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm:
6457 ; GFX10-SCRATCH: ; %bb.0:
6458 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6459 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6460 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6461 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6462 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6463 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6464 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6465 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6466 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001
6467 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003
6468 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi
6469 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo
6470 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6471 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6472 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6473 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6474 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6475 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6476 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6477 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6478 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6479 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6480 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6481 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6482 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6483 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6484 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6485 call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
6489 define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 {
6490 ; GFX9-LABEL: test_call_external_void_func_v2f16:
6492 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6493 ; GFX9-NEXT: s_mov_b32 s34, s33
6494 ; GFX9-NEXT: s_mov_b32 s33, s32
6495 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6496 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6497 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6498 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
6499 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6500 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6501 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi
6502 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo
6503 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6504 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6505 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6506 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6507 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6508 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6509 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6510 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6511 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6512 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6513 ; GFX9-NEXT: s_mov_b32 s33, s34
6514 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6515 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6517 ; GFX10-LABEL: test_call_external_void_func_v2f16:
6519 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6520 ; GFX10-NEXT: s_mov_b32 s34, s33
6521 ; GFX10-NEXT: s_mov_b32 s33, s32
6522 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6523 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6524 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6525 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6526 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
6527 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6528 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi
6529 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo
6530 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6531 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6532 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6533 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6534 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6535 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6536 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6537 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6538 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6539 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6540 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6541 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6542 ; GFX10-NEXT: s_mov_b32 s33, s34
6543 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6544 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6546 ; GFX11-LABEL: test_call_external_void_func_v2f16:
6548 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6549 ; GFX11-NEXT: s_mov_b32 s0, s33
6550 ; GFX11-NEXT: s_mov_b32 s33, s32
6551 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6552 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6553 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6554 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
6555 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6556 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi
6557 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo
6558 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6559 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6560 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6561 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6562 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6563 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6564 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6565 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6566 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6567 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6568 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6569 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6570 ; GFX11-NEXT: s_mov_b32 s33, s0
6571 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6572 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6574 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16:
6575 ; GFX10-SCRATCH: ; %bb.0:
6576 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6577 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6578 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6579 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6580 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6581 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6582 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6583 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
6584 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6585 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi
6586 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo
6587 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6588 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6589 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6590 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6591 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6592 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6593 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6594 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6595 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6596 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6597 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6598 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6599 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6600 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6601 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6602 %val = load <2 x half>, ptr addrspace(1) undef
6603 call amdgpu_gfx void @external_void_func_v2f16(<2 x half> %val)
6607 define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 {
6608 ; GFX9-LABEL: test_call_external_void_func_v2i32:
6610 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6611 ; GFX9-NEXT: s_mov_b32 s34, s33
6612 ; GFX9-NEXT: s_mov_b32 s33, s32
6613 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6614 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6615 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6616 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6617 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6618 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6619 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi
6620 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo
6621 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6622 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6623 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6624 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6625 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6626 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6627 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6628 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6629 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6630 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6631 ; GFX9-NEXT: s_mov_b32 s33, s34
6632 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6633 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6635 ; GFX10-LABEL: test_call_external_void_func_v2i32:
6637 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6638 ; GFX10-NEXT: s_mov_b32 s34, s33
6639 ; GFX10-NEXT: s_mov_b32 s33, s32
6640 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6641 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6642 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6643 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6644 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6645 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6646 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi
6647 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo
6648 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6649 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6650 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6651 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6652 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6653 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6654 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6655 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6656 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6657 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6658 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6659 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6660 ; GFX10-NEXT: s_mov_b32 s33, s34
6661 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6662 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6664 ; GFX11-LABEL: test_call_external_void_func_v2i32:
6666 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6667 ; GFX11-NEXT: s_mov_b32 s0, s33
6668 ; GFX11-NEXT: s_mov_b32 s33, s32
6669 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6670 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6671 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6672 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
6673 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6674 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi
6675 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo
6676 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6677 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6678 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6679 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6680 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6681 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6682 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6683 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6684 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6685 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6686 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6687 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6688 ; GFX11-NEXT: s_mov_b32 s33, s0
6689 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6690 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6692 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32:
6693 ; GFX10-SCRATCH: ; %bb.0:
6694 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6695 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6696 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6697 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6698 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6699 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6700 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6701 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6702 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6703 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi
6704 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo
6705 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6706 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6707 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6708 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6709 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6710 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6711 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6712 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6713 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6714 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6715 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6716 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6717 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6718 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6719 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6720 %val = load <2 x i32>, ptr addrspace(1) undef
6721 call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> %val)
6725 define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 {
6726 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
6728 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6729 ; GFX9-NEXT: s_mov_b32 s34, s33
6730 ; GFX9-NEXT: s_mov_b32 s33, s32
6731 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6732 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6733 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6734 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6735 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6736 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi
6737 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo
6738 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
6739 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
6740 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6741 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6742 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6743 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6744 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6745 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6746 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6747 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6748 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6749 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6750 ; GFX9-NEXT: s_mov_b32 s33, s34
6751 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6752 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6754 ; GFX10-LABEL: test_call_external_void_func_v2i32_imm:
6756 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6757 ; GFX10-NEXT: s_mov_b32 s34, s33
6758 ; GFX10-NEXT: s_mov_b32 s33, s32
6759 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6760 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6761 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6762 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6763 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6764 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
6765 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
6766 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi
6767 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo
6768 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6769 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6770 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6771 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6772 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6773 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6774 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6775 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6776 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6777 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6778 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6779 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6780 ; GFX10-NEXT: s_mov_b32 s33, s34
6781 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6782 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6784 ; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
6786 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6787 ; GFX11-NEXT: s_mov_b32 s0, s33
6788 ; GFX11-NEXT: s_mov_b32 s33, s32
6789 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6790 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6791 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6792 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6793 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
6794 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi
6795 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo
6796 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6797 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6798 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6799 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6800 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6801 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6802 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6803 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6804 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6805 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6806 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6807 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6808 ; GFX11-NEXT: s_mov_b32 s33, s0
6809 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6810 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6812 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm:
6813 ; GFX10-SCRATCH: ; %bb.0:
6814 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6815 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6816 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6817 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6818 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6819 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6820 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6821 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6822 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
6823 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
6824 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi
6825 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo
6826 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6827 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6828 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6829 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6830 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6831 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6832 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6833 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6834 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6835 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6836 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6837 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6838 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6839 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6840 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6841 call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
6845 define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 {
6846 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
6848 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6849 ; GFX9-NEXT: s_mov_b32 s34, s33
6850 ; GFX9-NEXT: s_mov_b32 s33, s32
6851 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6852 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6853 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6854 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6855 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6856 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi
6857 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo
6858 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
6859 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
6860 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
6861 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6862 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6863 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6864 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6865 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6866 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6867 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6868 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6869 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6870 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6871 ; GFX9-NEXT: s_mov_b32 s33, s34
6872 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6873 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6875 ; GFX10-LABEL: test_call_external_void_func_v3i32_imm:
6877 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6878 ; GFX10-NEXT: s_mov_b32 s34, s33
6879 ; GFX10-NEXT: s_mov_b32 s33, s32
6880 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6881 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6882 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6883 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6884 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6885 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
6886 ; GFX10-NEXT: v_mov_b32_e32 v1, 4
6887 ; GFX10-NEXT: v_mov_b32_e32 v2, 5
6888 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi
6889 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6890 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo
6891 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6892 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6893 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6894 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6895 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6896 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6897 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6898 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6899 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6900 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6901 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6902 ; GFX10-NEXT: s_mov_b32 s33, s34
6903 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6904 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6906 ; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
6908 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6909 ; GFX11-NEXT: s_mov_b32 s0, s33
6910 ; GFX11-NEXT: s_mov_b32 s33, s32
6911 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6912 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6913 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6914 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6915 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
6916 ; GFX11-NEXT: v_mov_b32_e32 v2, 5
6917 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi
6918 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6919 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo
6920 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6921 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6922 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6923 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6924 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6925 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6926 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6927 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6928 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6929 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6930 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6931 ; GFX11-NEXT: s_mov_b32 s33, s0
6932 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6933 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6935 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm:
6936 ; GFX10-SCRATCH: ; %bb.0:
6937 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6938 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6939 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6940 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6941 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6942 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6943 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6944 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6945 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
6946 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4
6947 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5
6948 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi
6949 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6950 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo
6951 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6952 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6953 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6954 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6955 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6956 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6957 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6958 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6959 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6960 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6961 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6962 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6963 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6964 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6965 call amdgpu_gfx void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
6969 define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 {
6970 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
6972 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6973 ; GFX9-NEXT: s_mov_b32 s34, s33
6974 ; GFX9-NEXT: s_mov_b32 s33, s32
6975 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6976 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6977 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6978 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6979 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6980 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi
6981 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo
6982 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
6983 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
6984 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
6985 ; GFX9-NEXT: v_mov_b32_e32 v3, 6
6986 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6987 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6988 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6989 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6990 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6991 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6992 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6993 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6994 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6995 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6996 ; GFX9-NEXT: s_mov_b32 s33, s34
6997 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6998 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7000 ; GFX10-LABEL: test_call_external_void_func_v3i32_i32:
7002 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7003 ; GFX10-NEXT: s_mov_b32 s34, s33
7004 ; GFX10-NEXT: s_mov_b32 s33, s32
7005 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7006 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7007 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7008 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7009 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7010 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
7011 ; GFX10-NEXT: v_mov_b32_e32 v1, 4
7012 ; GFX10-NEXT: v_mov_b32_e32 v2, 5
7013 ; GFX10-NEXT: v_mov_b32_e32 v3, 6
7014 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7015 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi
7016 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo
7017 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7018 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7019 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7020 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7021 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7022 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7023 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7024 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7025 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7026 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7027 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7028 ; GFX10-NEXT: s_mov_b32 s33, s34
7029 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7030 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7032 ; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
7034 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7035 ; GFX11-NEXT: s_mov_b32 s0, s33
7036 ; GFX11-NEXT: s_mov_b32 s33, s32
7037 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7038 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7039 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7040 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7041 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
7042 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6
7043 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7044 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi
7045 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo
7046 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7047 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7048 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7049 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7050 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7051 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7052 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7053 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7054 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7055 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7056 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7057 ; GFX11-NEXT: s_mov_b32 s33, s0
7058 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7059 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7061 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32:
7062 ; GFX10-SCRATCH: ; %bb.0:
7063 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7064 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7065 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7066 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7067 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7068 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7069 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7070 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7071 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
7072 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4
7073 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5
7074 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6
7075 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7076 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi
7077 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo
7078 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7079 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7080 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7081 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7082 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7083 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7084 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7085 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7086 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7087 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7088 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7089 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7090 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7091 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7092 call amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
7096 define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 {
7097 ; GFX9-LABEL: test_call_external_void_func_v4i32:
7099 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7100 ; GFX9-NEXT: s_mov_b32 s34, s33
7101 ; GFX9-NEXT: s_mov_b32 s33, s32
7102 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7103 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7104 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7105 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
7106 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7107 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7108 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi
7109 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo
7110 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7111 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7112 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7113 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7114 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7115 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7116 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7117 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7118 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7119 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7120 ; GFX9-NEXT: s_mov_b32 s33, s34
7121 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7122 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7124 ; GFX10-LABEL: test_call_external_void_func_v4i32:
7126 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7127 ; GFX10-NEXT: s_mov_b32 s34, s33
7128 ; GFX10-NEXT: s_mov_b32 s33, s32
7129 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7130 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7131 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7132 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7133 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
7134 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7135 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi
7136 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo
7137 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7138 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7139 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7140 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7141 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7142 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7143 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7144 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7145 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7146 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7147 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7148 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7149 ; GFX10-NEXT: s_mov_b32 s33, s34
7150 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7151 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7153 ; GFX11-LABEL: test_call_external_void_func_v4i32:
7155 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7156 ; GFX11-NEXT: s_mov_b32 s0, s33
7157 ; GFX11-NEXT: s_mov_b32 s33, s32
7158 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7159 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7160 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7161 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
7162 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7163 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi
7164 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo
7165 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7166 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7167 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7168 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7169 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7170 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7171 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7172 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7173 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7174 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7175 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7176 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7177 ; GFX11-NEXT: s_mov_b32 s33, s0
7178 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7179 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7181 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32:
7182 ; GFX10-SCRATCH: ; %bb.0:
7183 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7184 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7185 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7186 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7187 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7188 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7189 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7190 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
7191 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7192 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi
7193 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo
7194 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7195 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7196 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7197 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7198 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7199 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7200 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7201 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7202 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7203 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7204 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7205 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7206 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7207 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7208 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7209 %val = load <4 x i32>, ptr addrspace(1) undef
7210 call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> %val)
7214 define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 {
7215 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
7217 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7218 ; GFX9-NEXT: s_mov_b32 s34, s33
7219 ; GFX9-NEXT: s_mov_b32 s33, s32
7220 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7221 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7222 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7223 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7224 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7225 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi
7226 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo
7227 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
7228 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
7229 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
7230 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
7231 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7232 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7233 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7234 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7235 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7236 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7237 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7238 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7239 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7240 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7241 ; GFX9-NEXT: s_mov_b32 s33, s34
7242 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7243 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7245 ; GFX10-LABEL: test_call_external_void_func_v4i32_imm:
7247 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7248 ; GFX10-NEXT: s_mov_b32 s34, s33
7249 ; GFX10-NEXT: s_mov_b32 s33, s32
7250 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7251 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7252 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7253 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7254 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7255 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
7256 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
7257 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
7258 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
7259 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7260 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi
7261 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo
7262 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7263 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7264 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7265 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7266 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7267 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7268 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7269 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7270 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7271 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7272 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7273 ; GFX10-NEXT: s_mov_b32 s33, s34
7274 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7275 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7277 ; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
7279 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7280 ; GFX11-NEXT: s_mov_b32 s0, s33
7281 ; GFX11-NEXT: s_mov_b32 s33, s32
7282 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7283 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7284 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7285 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7286 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
7287 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
7288 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7289 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi
7290 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo
7291 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7292 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7293 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7294 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7295 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7296 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7297 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7298 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7299 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7300 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7301 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7302 ; GFX11-NEXT: s_mov_b32 s33, s0
7303 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7304 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7306 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm:
7307 ; GFX10-SCRATCH: ; %bb.0:
7308 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7309 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7310 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7311 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7312 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7313 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7314 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7315 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7316 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
7317 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
7318 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
7319 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
7320 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7321 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi
7322 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo
7323 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7324 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7325 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7326 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7327 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7328 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7329 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7330 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7331 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7332 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7333 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7334 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7335 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7336 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7337 call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
7341 define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 {
7342 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
7344 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7345 ; GFX9-NEXT: s_mov_b32 s34, s33
7346 ; GFX9-NEXT: s_mov_b32 s33, s32
7347 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7348 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7349 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7350 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7351 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7352 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi
7353 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo
7354 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
7355 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
7356 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
7357 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
7358 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
7359 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7360 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7361 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7362 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7363 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7364 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7365 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7366 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7367 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7368 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7369 ; GFX9-NEXT: s_mov_b32 s33, s34
7370 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7371 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7373 ; GFX10-LABEL: test_call_external_void_func_v5i32_imm:
7375 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7376 ; GFX10-NEXT: s_mov_b32 s34, s33
7377 ; GFX10-NEXT: s_mov_b32 s33, s32
7378 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7379 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7380 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7381 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7382 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7383 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
7384 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
7385 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
7386 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
7387 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7388 ; GFX10-NEXT: v_mov_b32_e32 v4, 5
7389 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi
7390 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo
7391 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7392 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7393 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7394 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7395 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7396 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7397 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7398 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7399 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7400 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7401 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7402 ; GFX10-NEXT: s_mov_b32 s33, s34
7403 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7404 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7406 ; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
7408 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7409 ; GFX11-NEXT: s_mov_b32 s0, s33
7410 ; GFX11-NEXT: s_mov_b32 s33, s32
7411 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7412 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7413 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7414 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7415 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
7416 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
7417 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7418 ; GFX11-NEXT: v_mov_b32_e32 v4, 5
7419 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi
7420 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo
7421 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7422 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7423 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7424 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7425 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7426 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7427 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7428 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7429 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7430 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7431 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7432 ; GFX11-NEXT: s_mov_b32 s33, s0
7433 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7434 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7436 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm:
7437 ; GFX10-SCRATCH: ; %bb.0:
7438 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7439 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7440 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7441 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7442 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7443 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7444 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7445 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7446 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
7447 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
7448 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
7449 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
7450 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7451 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5
7452 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi
7453 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo
7454 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7455 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7456 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7457 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7458 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7459 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7460 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7461 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7462 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7463 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7464 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7465 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7466 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7467 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7468 call amdgpu_gfx void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
7472 define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 {
7473 ; GFX9-LABEL: test_call_external_void_func_v8i32:
7475 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7476 ; GFX9-NEXT: s_mov_b32 s34, s33
7477 ; GFX9-NEXT: s_mov_b32 s33, s32
7478 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7479 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7480 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7481 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7482 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7483 ; GFX9-NEXT: v_mov_b32_e32 v8, 0
7484 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7485 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7486 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7487 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35]
7488 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16
7489 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi
7490 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo
7491 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7492 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7493 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7494 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7495 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7496 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7497 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7498 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7499 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7500 ; GFX9-NEXT: s_mov_b32 s33, s34
7501 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7502 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7504 ; GFX10-LABEL: test_call_external_void_func_v8i32:
7506 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7507 ; GFX10-NEXT: s_mov_b32 s34, s33
7508 ; GFX10-NEXT: s_mov_b32 s33, s32
7509 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7510 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7511 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7512 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7513 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7514 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7515 ; GFX10-NEXT: v_mov_b32_e32 v8, 0
7516 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7517 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
7518 ; GFX10-NEXT: s_clause 0x1
7519 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35]
7520 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16
7521 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7522 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi
7523 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo
7524 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7525 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7526 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7527 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7528 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7529 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7530 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7531 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7532 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7533 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7534 ; GFX10-NEXT: s_mov_b32 s33, s34
7535 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7536 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7538 ; GFX11-LABEL: test_call_external_void_func_v8i32:
7540 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7541 ; GFX11-NEXT: s_mov_b32 s0, s33
7542 ; GFX11-NEXT: s_mov_b32 s33, s32
7543 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7544 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7545 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7546 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7547 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
7548 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
7549 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7550 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
7551 ; GFX11-NEXT: s_clause 0x1
7552 ; GFX11-NEXT: global_load_b128 v[0:3], v4, s[0:1]
7553 ; GFX11-NEXT: global_load_b128 v[4:7], v4, s[0:1] offset:16
7554 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7555 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi
7556 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo
7557 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7558 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7559 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7560 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7561 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7562 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7563 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7564 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7565 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7566 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7567 ; GFX11-NEXT: s_mov_b32 s33, s0
7568 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7569 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7571 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32:
7572 ; GFX10-SCRATCH: ; %bb.0:
7573 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7574 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7575 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7576 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7577 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7578 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7579 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7580 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7581 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
7582 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0
7583 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7584 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
7585 ; GFX10-SCRATCH-NEXT: s_clause 0x1
7586 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1]
7587 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16
7588 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7589 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi
7590 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo
7591 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7592 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7593 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7594 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7595 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7596 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7597 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7598 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7599 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7600 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7601 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7602 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7603 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7604 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
7605 %val = load <8 x i32>, ptr addrspace(1) %ptr
7606 call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> %val)
7610 define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 {
7611 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
7613 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7614 ; GFX9-NEXT: s_mov_b32 s34, s33
7615 ; GFX9-NEXT: s_mov_b32 s33, s32
7616 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7617 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7618 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7619 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7620 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7621 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi
7622 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo
7623 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
7624 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
7625 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
7626 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
7627 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
7628 ; GFX9-NEXT: v_mov_b32_e32 v5, 6
7629 ; GFX9-NEXT: v_mov_b32_e32 v6, 7
7630 ; GFX9-NEXT: v_mov_b32_e32 v7, 8
7631 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7632 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7633 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7634 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7635 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7636 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7637 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7638 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7639 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7640 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7641 ; GFX9-NEXT: s_mov_b32 s33, s34
7642 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7643 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7645 ; GFX10-LABEL: test_call_external_void_func_v8i32_imm:
7647 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7648 ; GFX10-NEXT: s_mov_b32 s34, s33
7649 ; GFX10-NEXT: s_mov_b32 s33, s32
7650 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7651 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7652 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7653 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7654 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7655 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
7656 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
7657 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
7658 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
7659 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7660 ; GFX10-NEXT: v_mov_b32_e32 v4, 5
7661 ; GFX10-NEXT: v_mov_b32_e32 v5, 6
7662 ; GFX10-NEXT: v_mov_b32_e32 v6, 7
7663 ; GFX10-NEXT: v_mov_b32_e32 v7, 8
7664 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi
7665 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo
7666 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7667 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7668 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7669 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7670 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7671 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7672 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7673 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7674 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7675 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7676 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7677 ; GFX10-NEXT: s_mov_b32 s33, s34
7678 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7679 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7681 ; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
7683 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7684 ; GFX11-NEXT: s_mov_b32 s0, s33
7685 ; GFX11-NEXT: s_mov_b32 s33, s32
7686 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7687 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7688 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7689 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7690 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
7691 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
7692 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7693 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6
7694 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8
7695 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi
7696 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo
7697 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7698 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7699 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7700 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7701 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7702 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7703 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7704 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7705 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7706 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7707 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7708 ; GFX11-NEXT: s_mov_b32 s33, s0
7709 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7710 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7712 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm:
7713 ; GFX10-SCRATCH: ; %bb.0:
7714 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7715 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7716 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7717 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7718 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7719 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7720 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7721 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7722 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
7723 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
7724 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
7725 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
7726 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7727 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5
7728 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 6
7729 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 7
7730 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 8
7731 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi
7732 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo
7733 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7734 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7735 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7736 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7737 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7738 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7739 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7740 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7741 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7742 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7743 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7744 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7745 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7746 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7747 call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
7751 define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 {
7752 ; GFX9-LABEL: test_call_external_void_func_v16i32:
7754 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7755 ; GFX9-NEXT: s_mov_b32 s34, s33
7756 ; GFX9-NEXT: s_mov_b32 s33, s32
7757 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7758 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7759 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7760 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7761 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7762 ; GFX9-NEXT: v_mov_b32_e32 v16, 0
7763 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7764 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7765 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7766 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35]
7767 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16
7768 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32
7769 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48
7770 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi
7771 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo
7772 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7773 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7774 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7775 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7776 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7777 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7778 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7779 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7780 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7781 ; GFX9-NEXT: s_mov_b32 s33, s34
7782 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7783 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7785 ; GFX10-LABEL: test_call_external_void_func_v16i32:
7787 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7788 ; GFX10-NEXT: s_mov_b32 s34, s33
7789 ; GFX10-NEXT: s_mov_b32 s33, s32
7790 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7791 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7792 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7793 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7794 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7795 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7796 ; GFX10-NEXT: v_mov_b32_e32 v16, 0
7797 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7798 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
7799 ; GFX10-NEXT: s_clause 0x3
7800 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35]
7801 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16
7802 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32
7803 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48
7804 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7805 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi
7806 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo
7807 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7808 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7809 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7810 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7811 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7812 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7813 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7814 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7815 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7816 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7817 ; GFX10-NEXT: s_mov_b32 s33, s34
7818 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7819 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7821 ; GFX11-LABEL: test_call_external_void_func_v16i32:
7823 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7824 ; GFX11-NEXT: s_mov_b32 s0, s33
7825 ; GFX11-NEXT: s_mov_b32 s33, s32
7826 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7827 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7828 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7829 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7830 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
7831 ; GFX11-NEXT: v_mov_b32_e32 v12, 0
7832 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7833 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
7834 ; GFX11-NEXT: s_clause 0x3
7835 ; GFX11-NEXT: global_load_b128 v[0:3], v12, s[0:1]
7836 ; GFX11-NEXT: global_load_b128 v[4:7], v12, s[0:1] offset:16
7837 ; GFX11-NEXT: global_load_b128 v[8:11], v12, s[0:1] offset:32
7838 ; GFX11-NEXT: global_load_b128 v[12:15], v12, s[0:1] offset:48
7839 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7840 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi
7841 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo
7842 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7843 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7844 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7845 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7846 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7847 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7848 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7849 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7850 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7851 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7852 ; GFX11-NEXT: s_mov_b32 s33, s0
7853 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7854 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7856 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32:
7857 ; GFX10-SCRATCH: ; %bb.0:
7858 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7859 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7860 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7861 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7862 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7863 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7864 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7865 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7866 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
7867 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0
7868 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7869 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
7870 ; GFX10-SCRATCH-NEXT: s_clause 0x3
7871 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1]
7872 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16
7873 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32
7874 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48
7875 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7876 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi
7877 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo
7878 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7879 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7880 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7881 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7882 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7883 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7884 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7885 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7886 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7887 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7888 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7889 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7890 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7891 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
7892 %val = load <16 x i32>, ptr addrspace(1) %ptr
7893 call amdgpu_gfx void @external_void_func_v16i32(<16 x i32> %val)
7897 define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 {
7898 ; GFX9-LABEL: test_call_external_void_func_v32i32:
7900 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7901 ; GFX9-NEXT: s_mov_b32 s34, s33
7902 ; GFX9-NEXT: s_mov_b32 s33, s32
7903 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7904 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7905 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7906 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7907 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7908 ; GFX9-NEXT: v_mov_b32_e32 v28, 0
7909 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7910 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7911 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7912 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35]
7913 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16
7914 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32
7915 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48
7916 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64
7917 ; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80
7918 ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96
7919 ; GFX9-NEXT: s_nop 0
7920 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112
7921 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi
7922 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo
7923 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7924 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7925 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7926 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7927 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7928 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7929 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7930 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7931 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7932 ; GFX9-NEXT: s_mov_b32 s33, s34
7933 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7934 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7936 ; GFX10-LABEL: test_call_external_void_func_v32i32:
7938 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7939 ; GFX10-NEXT: s_mov_b32 s34, s33
7940 ; GFX10-NEXT: s_mov_b32 s33, s32
7941 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7942 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7943 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7944 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7945 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7946 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7947 ; GFX10-NEXT: v_mov_b32_e32 v32, 0
7948 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7949 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
7950 ; GFX10-NEXT: s_clause 0x7
7951 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35]
7952 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16
7953 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32
7954 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48
7955 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64
7956 ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80
7957 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96
7958 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112
7959 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7960 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi
7961 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo
7962 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7963 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7964 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7965 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7966 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7967 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7968 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7969 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7970 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7971 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7972 ; GFX10-NEXT: s_mov_b32 s33, s34
7973 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7974 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7976 ; GFX11-LABEL: test_call_external_void_func_v32i32:
7978 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7979 ; GFX11-NEXT: s_mov_b32 s0, s33
7980 ; GFX11-NEXT: s_mov_b32 s33, s32
7981 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7982 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7983 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7984 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7985 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
7986 ; GFX11-NEXT: v_mov_b32_e32 v28, 0
7987 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7988 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
7989 ; GFX11-NEXT: s_clause 0x7
7990 ; GFX11-NEXT: global_load_b128 v[0:3], v28, s[0:1]
7991 ; GFX11-NEXT: global_load_b128 v[4:7], v28, s[0:1] offset:16
7992 ; GFX11-NEXT: global_load_b128 v[8:11], v28, s[0:1] offset:32
7993 ; GFX11-NEXT: global_load_b128 v[12:15], v28, s[0:1] offset:48
7994 ; GFX11-NEXT: global_load_b128 v[16:19], v28, s[0:1] offset:64
7995 ; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80
7996 ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96
7997 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112
7998 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7999 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi
8000 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo
8001 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8002 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8003 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8004 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8005 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8006 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8007 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8008 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
8009 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8010 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8011 ; GFX11-NEXT: s_mov_b32 s33, s0
8012 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8013 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8015 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32:
8016 ; GFX10-SCRATCH: ; %bb.0:
8017 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8018 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8019 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8020 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8021 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
8022 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8023 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8024 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8025 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8026 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0
8027 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8028 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8029 ; GFX10-SCRATCH-NEXT: s_clause 0x7
8030 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1]
8031 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] offset:16
8032 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:32
8033 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:48
8034 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:64
8035 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
8036 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96
8037 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112
8038 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8039 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi
8040 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo
8041 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8042 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8043 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8044 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8045 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8046 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8047 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
8048 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8049 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8050 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8051 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8052 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8053 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8054 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
8055 %val = load <32 x i32>, ptr addrspace(1) %ptr
8056 call amdgpu_gfx void @external_void_func_v32i32(<32 x i32> %val)
8060 define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 {
8061 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
8063 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8064 ; GFX9-NEXT: s_mov_b32 s34, s33
8065 ; GFX9-NEXT: s_mov_b32 s33, s32
8066 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8067 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8068 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8069 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8070 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8071 ; GFX9-NEXT: v_mov_b32_e32 v28, 0
8072 ; GFX9-NEXT: global_load_dword v32, v[0:1], off
8073 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8074 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8075 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35]
8076 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16
8077 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32
8078 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48
8079 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64
8080 ; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80
8081 ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96
8082 ; GFX9-NEXT: s_nop 0
8083 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112
8084 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8085 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi
8086 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo
8087 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8088 ; GFX9-NEXT: s_waitcnt vmcnt(8)
8089 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
8090 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8091 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8092 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8093 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8094 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8095 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8096 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8097 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8098 ; GFX9-NEXT: s_mov_b32 s33, s34
8099 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8100 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8102 ; GFX10-LABEL: test_call_external_void_func_v32i32_i32:
8104 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8105 ; GFX10-NEXT: s_mov_b32 s34, s33
8106 ; GFX10-NEXT: s_mov_b32 s33, s32
8107 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8108 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8109 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8110 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8111 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8112 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8113 ; GFX10-NEXT: v_mov_b32_e32 v32, 0
8114 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8115 ; GFX10-NEXT: global_load_dword v33, v[0:1], off
8116 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8117 ; GFX10-NEXT: s_clause 0x7
8118 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35]
8119 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16
8120 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32
8121 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48
8122 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64
8123 ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80
8124 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96
8125 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112
8126 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8127 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi
8128 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo
8129 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8130 ; GFX10-NEXT: s_waitcnt vmcnt(8)
8131 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32
8132 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8133 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8134 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8135 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8136 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8137 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8138 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8139 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8140 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8141 ; GFX10-NEXT: s_mov_b32 s33, s34
8142 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8143 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8145 ; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
8147 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8148 ; GFX11-NEXT: s_mov_b32 s0, s33
8149 ; GFX11-NEXT: s_mov_b32 s33, s32
8150 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8151 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
8152 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8153 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8154 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
8155 ; GFX11-NEXT: v_mov_b32_e32 v28, 0
8156 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8157 ; GFX11-NEXT: global_load_b32 v32, v[0:1], off
8158 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
8159 ; GFX11-NEXT: s_clause 0x7
8160 ; GFX11-NEXT: global_load_b128 v[0:3], v28, s[0:1]
8161 ; GFX11-NEXT: global_load_b128 v[4:7], v28, s[0:1] offset:16
8162 ; GFX11-NEXT: global_load_b128 v[8:11], v28, s[0:1] offset:32
8163 ; GFX11-NEXT: global_load_b128 v[12:15], v28, s[0:1] offset:48
8164 ; GFX11-NEXT: global_load_b128 v[16:19], v28, s[0:1] offset:64
8165 ; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80
8166 ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96
8167 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112
8168 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8169 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi
8170 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo
8171 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8172 ; GFX11-NEXT: s_waitcnt vmcnt(8)
8173 ; GFX11-NEXT: scratch_store_b32 off, v32, s32
8174 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8175 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8176 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8177 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8178 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8179 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
8180 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8181 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8182 ; GFX11-NEXT: s_mov_b32 s33, s0
8183 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8184 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8186 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32:
8187 ; GFX10-SCRATCH: ; %bb.0:
8188 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8189 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8190 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8191 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8192 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
8193 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8194 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8195 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8196 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8197 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0
8198 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8199 ; GFX10-SCRATCH-NEXT: global_load_dword v33, v[0:1], off
8200 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8201 ; GFX10-SCRATCH-NEXT: s_clause 0x7
8202 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1]
8203 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] offset:16
8204 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:32
8205 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:48
8206 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:64
8207 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
8208 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96
8209 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112
8210 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8211 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi
8212 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo
8213 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8214 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8)
8215 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32
8216 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8217 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8218 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8219 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8220 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8221 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
8222 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8223 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8224 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8225 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8226 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8227 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8228 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
8229 %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
8230 %val1 = load i32, ptr addrspace(1) undef
8231 call amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
8235 define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
8236 ; GFX9-LABEL: test_call_external_i32_func_i32_imm:
8238 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8239 ; GFX9-NEXT: s_mov_b32 s34, s33
8240 ; GFX9-NEXT: s_mov_b32 s33, s32
8241 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8242 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
8243 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8244 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8245 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
8246 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
8247 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8248 ; GFX9-NEXT: v_mov_b32_e32 v41, v0
8249 ; GFX9-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi
8250 ; GFX9-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo
8251 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
8252 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8253 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8254 ; GFX9-NEXT: v_mov_b32_e32 v42, v1
8255 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8256 ; GFX9-NEXT: global_store_dword v[41:42], v0, off
8257 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8258 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
8259 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
8260 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8261 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8262 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8263 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8264 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
8265 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8266 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8267 ; GFX9-NEXT: s_mov_b32 s33, s34
8268 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8269 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8271 ; GFX10-LABEL: test_call_external_i32_func_i32_imm:
8273 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8274 ; GFX10-NEXT: s_mov_b32 s34, s33
8275 ; GFX10-NEXT: s_mov_b32 s33, s32
8276 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8277 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
8278 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8279 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8280 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8281 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
8282 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
8283 ; GFX10-NEXT: v_mov_b32_e32 v41, v0
8284 ; GFX10-NEXT: v_mov_b32_e32 v0, 42
8285 ; GFX10-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi
8286 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8287 ; GFX10-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo
8288 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8289 ; GFX10-NEXT: v_mov_b32_e32 v42, v1
8290 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8291 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8292 ; GFX10-NEXT: global_store_dword v[41:42], v0, off
8293 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8294 ; GFX10-NEXT: s_clause 0x1
8295 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
8296 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
8297 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8298 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8299 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8300 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8301 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
8302 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8303 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8304 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8305 ; GFX10-NEXT: s_mov_b32 s33, s34
8306 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8307 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8309 ; GFX11-LABEL: test_call_external_i32_func_i32_imm:
8311 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8312 ; GFX11-NEXT: s_mov_b32 s0, s33
8313 ; GFX11-NEXT: s_mov_b32 s33, s32
8314 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8315 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
8316 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8317 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8318 ; GFX11-NEXT: s_clause 0x1
8319 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
8320 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
8321 ; GFX11-NEXT: v_dual_mov_b32 v42, v1 :: v_dual_mov_b32 v41, v0
8322 ; GFX11-NEXT: v_mov_b32_e32 v0, 42
8323 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8324 ; GFX11-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi
8325 ; GFX11-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo
8326 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8327 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8328 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8329 ; GFX11-NEXT: global_store_b32 v[41:42], v0, off dlc
8330 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
8331 ; GFX11-NEXT: s_clause 0x1
8332 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
8333 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
8334 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8335 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8336 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8337 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8338 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
8339 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8340 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8341 ; GFX11-NEXT: s_mov_b32 s33, s0
8342 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8343 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8345 ; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm:
8346 ; GFX10-SCRATCH: ; %bb.0:
8347 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8348 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8349 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8350 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8351 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
8352 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8353 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8354 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8355 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
8356 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
8357 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v0
8358 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42
8359 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi
8360 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8361 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo
8362 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8363 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, v1
8364 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8365 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8366 ; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v0, off
8367 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8368 ; GFX10-SCRATCH-NEXT: s_clause 0x1
8369 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
8370 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
8371 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8372 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8373 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8374 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8375 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
8376 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8377 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8378 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8379 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8380 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8381 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8382 %val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42)
8383 store volatile i32 %val, ptr addrspace(1) %out
8387 define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 {
8388 ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
8390 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8391 ; GFX9-NEXT: s_mov_b32 s34, s33
8392 ; GFX9-NEXT: s_mov_b32 s33, s32
8393 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8394 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8395 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8396 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8397 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8398 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
8399 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8400 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8401 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8402 ; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35]
8403 ; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4
8404 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi
8405 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo
8406 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8407 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8408 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8409 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8410 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8411 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8412 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8413 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8414 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8415 ; GFX9-NEXT: s_mov_b32 s33, s34
8416 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8417 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8419 ; GFX10-LABEL: test_call_external_void_func_struct_i8_i32:
8421 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8422 ; GFX10-NEXT: s_mov_b32 s34, s33
8423 ; GFX10-NEXT: s_mov_b32 s33, s32
8424 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8425 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8426 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8427 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8428 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8429 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8430 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
8431 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8432 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8433 ; GFX10-NEXT: s_clause 0x1
8434 ; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35]
8435 ; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4
8436 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8437 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi
8438 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo
8439 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8440 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8441 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8442 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8443 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8444 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8445 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8446 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8447 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8448 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8449 ; GFX10-NEXT: s_mov_b32 s33, s34
8450 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8451 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8453 ; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
8455 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8456 ; GFX11-NEXT: s_mov_b32 s0, s33
8457 ; GFX11-NEXT: s_mov_b32 s33, s32
8458 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8459 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
8460 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8461 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8462 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
8463 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
8464 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8465 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
8466 ; GFX11-NEXT: s_clause 0x1
8467 ; GFX11-NEXT: global_load_u8 v0, v1, s[0:1]
8468 ; GFX11-NEXT: global_load_b32 v1, v1, s[0:1] offset:4
8469 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8470 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi
8471 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo
8472 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8473 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8474 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8475 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8476 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8477 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8478 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8479 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
8480 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8481 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8482 ; GFX11-NEXT: s_mov_b32 s33, s0
8483 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8484 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8486 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32:
8487 ; GFX10-SCRATCH: ; %bb.0:
8488 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8489 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8490 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8491 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8492 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
8493 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8494 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8495 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8496 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8497 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
8498 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8499 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8500 ; GFX10-SCRATCH-NEXT: s_clause 0x1
8501 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v2, s[0:1]
8502 ; GFX10-SCRATCH-NEXT: global_load_dword v1, v2, s[0:1] offset:4
8503 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8504 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi
8505 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo
8506 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8507 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8508 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8509 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8510 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8511 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8512 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
8513 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8514 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8515 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8516 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8517 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8518 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8519 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
8520 %val = load { i8, i32 }, ptr addrspace(1) %ptr0
8521 call amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 } %val)
8525 define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 {
8526 ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
8528 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8529 ; GFX9-NEXT: s_mov_b32 s34, s33
8530 ; GFX9-NEXT: s_mov_b32 s33, s32
8531 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8532 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
8533 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8534 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
8535 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8536 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33
8537 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
8538 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8539 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
8540 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi
8541 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo
8542 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33
8543 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8544 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8545 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8546 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8547 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8548 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8549 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8550 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
8551 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8552 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8553 ; GFX9-NEXT: s_mov_b32 s33, s34
8554 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8555 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8557 ; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32:
8559 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8560 ; GFX10-NEXT: s_mov_b32 s34, s33
8561 ; GFX10-NEXT: s_mov_b32 s33, s32
8562 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8563 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
8564 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8565 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8566 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8567 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
8568 ; GFX10-NEXT: v_mov_b32_e32 v1, 8
8569 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi
8570 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo
8571 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8572 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33
8573 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4
8574 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
8575 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8576 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8577 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8578 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8579 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8580 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8581 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8582 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
8583 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8584 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8585 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8586 ; GFX10-NEXT: s_mov_b32 s33, s34
8587 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8588 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8590 ; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32:
8592 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8593 ; GFX11-NEXT: s_mov_b32 s0, s33
8594 ; GFX11-NEXT: s_mov_b32 s33, s32
8595 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8596 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
8597 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8598 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8599 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
8600 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi
8601 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo
8602 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8603 ; GFX11-NEXT: s_clause 0x1
8604 ; GFX11-NEXT: scratch_store_b8 off, v0, s33
8605 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4
8606 ; GFX11-NEXT: v_mov_b32_e32 v0, s33
8607 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8608 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8609 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8610 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8611 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8612 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8613 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8614 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8615 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
8616 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8617 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8618 ; GFX11-NEXT: s_mov_b32 s33, s0
8619 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8620 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8622 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32:
8623 ; GFX10-SCRATCH: ; %bb.0:
8624 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8625 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8626 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8627 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8628 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
8629 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8630 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8631 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8632 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
8633 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8
8634 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi
8635 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo
8636 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8637 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33
8638 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4
8639 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s33
8640 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8641 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8642 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8643 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8644 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8645 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8646 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8647 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
8648 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8649 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8650 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8651 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8652 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8653 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8654 %val = alloca { i8, i32 }, align 4, addrspace(5)
8655 %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
8656 %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
8657 store i8 3, ptr addrspace(5) %gep0
8658 store i32 8, ptr addrspace(5) %gep1
8659 call amdgpu_gfx void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val)
8663 define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
8664 ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
8666 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8667 ; GFX9-NEXT: s_mov_b32 s34, s33
8668 ; GFX9-NEXT: s_mov_b32 s33, s32
8669 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8670 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
8671 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8672 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
8673 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33
8674 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
8675 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8676 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
8677 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33
8678 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8679 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi
8680 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo
8681 ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0
8682 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33
8683 ; GFX9-NEXT: s_addk_i32 s32, 0x800
8684 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8685 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8686 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8
8687 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12
8688 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8689 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8690 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8691 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8692 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
8693 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8694 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
8695 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8696 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8697 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
8698 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8699 ; GFX9-NEXT: s_addk_i32 s32, 0xf800
8700 ; GFX9-NEXT: s_mov_b32 s33, s34
8701 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8702 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8704 ; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
8706 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8707 ; GFX10-NEXT: s_mov_b32 s34, s33
8708 ; GFX10-NEXT: s_mov_b32 s33, s32
8709 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8710 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
8711 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8712 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8713 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
8714 ; GFX10-NEXT: v_mov_b32_e32 v1, 8
8715 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8716 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi
8717 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo
8718 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33
8719 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4
8720 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
8721 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8722 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33
8723 ; GFX10-NEXT: s_addk_i32 s32, 0x400
8724 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0
8725 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8726 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8727 ; GFX10-NEXT: s_clause 0x1
8728 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8
8729 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12
8730 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8731 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8732 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8733 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8734 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
8735 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8736 ; GFX10-NEXT: global_store_dword v[0:1], v1, off
8737 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8738 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8739 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
8740 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8741 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8742 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00
8743 ; GFX10-NEXT: s_mov_b32 s33, s34
8744 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8745 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8747 ; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
8749 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8750 ; GFX11-NEXT: s_mov_b32 s0, s33
8751 ; GFX11-NEXT: s_mov_b32 s33, s32
8752 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8753 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill
8754 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8755 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8756 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
8757 ; GFX11-NEXT: s_add_i32 s32, s32, 32
8758 ; GFX11-NEXT: s_add_i32 s2, s33, 8
8759 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8760 ; GFX11-NEXT: s_clause 0x1
8761 ; GFX11-NEXT: scratch_store_b8 off, v0, s33
8762 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4
8763 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33
8764 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi
8765 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo
8766 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8767 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8768 ; GFX11-NEXT: s_clause 0x1
8769 ; GFX11-NEXT: scratch_load_u8 v0, off, s33 offset:8
8770 ; GFX11-NEXT: scratch_load_b32 v1, off, s33 offset:12
8771 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8772 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8773 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8774 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8775 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off dlc
8776 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
8777 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
8778 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
8779 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8780 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 ; 4-byte Folded Reload
8781 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8782 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0
8783 ; GFX11-NEXT: s_mov_b32 s33, s0
8784 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8785 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8787 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
8788 ; GFX10-SCRATCH: ; %bb.0:
8789 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8790 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8791 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8792 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8793 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:16 ; 4-byte Folded Spill
8794 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8795 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8796 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8797 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
8798 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8
8799 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32
8800 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s33, 8
8801 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8802 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33
8803 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4
8804 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s2
8805 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s33
8806 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi
8807 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo
8808 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8809 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8810 ; GFX10-SCRATCH-NEXT: s_clause 0x1
8811 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8
8812 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12
8813 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8814 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8815 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8816 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8817 ; GFX10-SCRATCH-NEXT: global_store_byte v[0:1], v0, off
8818 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8819 ; GFX10-SCRATCH-NEXT: global_store_dword v[0:1], v1, off
8820 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8821 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8822 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:16 ; 4-byte Folded Reload
8823 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8824 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8825 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
8826 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8827 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8828 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8829 %in.val = alloca { i8, i32 }, align 4, addrspace(5)
8830 %out.val = alloca { i8, i32 }, align 4, addrspace(5)
8831 %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
8832 %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
8833 store i8 3, ptr addrspace(5) %in.gep0
8834 store i32 8, ptr addrspace(5) %in.gep1
8835 call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %out.val, ptr addrspace(5) byval({ i8, i32 }) %in.val)
8836 %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
8837 %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
8838 %out.val0 = load i8, ptr addrspace(5) %out.gep0
8839 %out.val1 = load i32, ptr addrspace(5) %out.gep1
8841 store volatile i8 %out.val0, ptr addrspace(1) undef
8842 store volatile i32 %out.val1, ptr addrspace(1) undef
8846 define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 {
8847 ; GFX9-LABEL: test_call_external_void_func_v16i8:
8849 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8850 ; GFX9-NEXT: s_mov_b32 s34, s33
8851 ; GFX9-NEXT: s_mov_b32 s33, s32
8852 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8853 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8854 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8855 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8856 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8857 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
8858 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8859 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8860 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8861 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8862 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35]
8863 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i8@abs32@hi
8864 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i8@abs32@lo
8865 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8866 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
8867 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0
8868 ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0
8869 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
8870 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
8871 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
8872 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
8873 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
8874 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
8875 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
8876 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
8877 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
8878 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
8879 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
8880 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
8881 ; GFX9-NEXT: v_mov_b32_e32 v1, v16
8882 ; GFX9-NEXT: v_mov_b32_e32 v2, v17
8883 ; GFX9-NEXT: v_mov_b32_e32 v3, v18
8884 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8885 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8886 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8887 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8888 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8889 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8890 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8891 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8892 ; GFX9-NEXT: s_mov_b32 s33, s34
8893 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8894 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8896 ; GFX10-LABEL: test_call_external_void_func_v16i8:
8898 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8899 ; GFX10-NEXT: s_mov_b32 s34, s33
8900 ; GFX10-NEXT: s_mov_b32 s33, s32
8901 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8902 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8903 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8904 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8905 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8906 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8907 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
8908 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8909 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8910 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8911 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8912 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35]
8913 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8914 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i8@abs32@hi
8915 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i8@abs32@lo
8916 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8917 ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0
8918 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0
8919 ; GFX10-NEXT: v_lshrrev_b32_e32 v18, 24, v0
8920 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
8921 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
8922 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
8923 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2
8924 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
8925 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
8926 ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v3
8927 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v3
8928 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v3
8929 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
8930 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
8931 ; GFX10-NEXT: v_mov_b32_e32 v12, v3
8932 ; GFX10-NEXT: v_mov_b32_e32 v1, v16
8933 ; GFX10-NEXT: v_mov_b32_e32 v2, v17
8934 ; GFX10-NEXT: v_mov_b32_e32 v3, v18
8935 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8936 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8937 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8938 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8939 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8940 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8941 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8942 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8943 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8944 ; GFX10-NEXT: s_mov_b32 s33, s34
8945 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8946 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8948 ; GFX11-LABEL: test_call_external_void_func_v16i8:
8950 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8951 ; GFX11-NEXT: s_mov_b32 s0, s33
8952 ; GFX11-NEXT: s_mov_b32 s33, s32
8953 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8954 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
8955 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8956 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8957 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
8958 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
8959 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8960 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8961 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8962 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
8963 ; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1]
8964 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i8@abs32@hi
8965 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i8@abs32@lo
8966 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8967 ; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0
8968 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0
8969 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0
8970 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
8971 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
8972 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
8973 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
8974 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
8975 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
8976 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
8977 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
8978 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
8979 ; GFX11-NEXT: v_mov_b32_e32 v4, v1
8980 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
8981 ; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
8982 ; GFX11-NEXT: v_dual_mov_b32 v1, v16 :: v_dual_mov_b32 v2, v17
8983 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8984 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8985 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8986 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8987 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8988 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
8989 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8990 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8991 ; GFX11-NEXT: s_mov_b32 s33, s0
8992 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8993 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8995 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8:
8996 ; GFX10-SCRATCH: ; %bb.0:
8997 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8998 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8999 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9000 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9001 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
9002 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9003 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9004 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
9005 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
9006 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
9007 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9008 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9009 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9010 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
9011 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
9012 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9013 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i8@abs32@hi
9014 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i8@abs32@lo
9015 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9016 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v16, 8, v0
9017 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v17, 16, v0
9018 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v18, 24, v0
9019 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
9020 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
9021 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
9022 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v9, 8, v2
9023 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v10, 16, v2
9024 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v11, 24, v2
9025 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v13, 8, v3
9026 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v14, 16, v3
9027 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v15, 24, v3
9028 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
9029 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, v2
9030 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, v3
9031 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v16
9032 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17
9033 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18
9034 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9035 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
9036 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
9037 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
9038 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9039 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
9040 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9041 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9042 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9043 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9044 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9045 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9046 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
9047 %val = load <16 x i8>, ptr addrspace(1) %ptr
9048 call amdgpu_gfx void @external_void_func_v16i8(<16 x i8> %val)
9052 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
9053 ; GFX9-LABEL: tail_call_byval_align16:
9054 ; GFX9: ; %bb.0: ; %entry
9055 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9056 ; GFX9-NEXT: s_mov_b32 s6, s33
9057 ; GFX9-NEXT: s_mov_b32 s33, s32
9058 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
9059 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
9060 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
9061 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20
9062 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16
9063 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s33
9064 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
9065 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
9066 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
9067 ; GFX9-NEXT: v_writelane_b32 v40, s35, 3
9068 ; GFX9-NEXT: v_writelane_b32 v40, s36, 4
9069 ; GFX9-NEXT: v_writelane_b32 v40, s37, 5
9070 ; GFX9-NEXT: v_writelane_b32 v40, s38, 6
9071 ; GFX9-NEXT: v_writelane_b32 v40, s39, 7
9072 ; GFX9-NEXT: v_writelane_b32 v40, s40, 8
9073 ; GFX9-NEXT: v_writelane_b32 v40, s41, 9
9074 ; GFX9-NEXT: v_writelane_b32 v40, s42, 10
9075 ; GFX9-NEXT: v_writelane_b32 v40, s43, 11
9076 ; GFX9-NEXT: v_writelane_b32 v40, s44, 12
9077 ; GFX9-NEXT: v_writelane_b32 v40, s45, 13
9078 ; GFX9-NEXT: v_writelane_b32 v40, s46, 14
9079 ; GFX9-NEXT: v_writelane_b32 v40, s47, 15
9080 ; GFX9-NEXT: v_writelane_b32 v40, s48, 16
9081 ; GFX9-NEXT: v_writelane_b32 v40, s49, 17
9082 ; GFX9-NEXT: v_writelane_b32 v40, s50, 18
9083 ; GFX9-NEXT: v_writelane_b32 v40, s51, 19
9084 ; GFX9-NEXT: v_writelane_b32 v40, s52, 20
9085 ; GFX9-NEXT: v_writelane_b32 v40, s53, 21
9086 ; GFX9-NEXT: v_writelane_b32 v40, s54, 22
9087 ; GFX9-NEXT: v_writelane_b32 v40, s55, 23
9088 ; GFX9-NEXT: v_writelane_b32 v40, s56, 24
9089 ; GFX9-NEXT: v_writelane_b32 v40, s57, 25
9090 ; GFX9-NEXT: v_writelane_b32 v40, s58, 26
9091 ; GFX9-NEXT: v_writelane_b32 v40, s59, 27
9092 ; GFX9-NEXT: v_writelane_b32 v40, s60, 28
9093 ; GFX9-NEXT: v_writelane_b32 v40, s61, 29
9094 ; GFX9-NEXT: s_addk_i32 s32, 0x800
9095 ; GFX9-NEXT: v_writelane_b32 v40, s62, 30
9096 ; GFX9-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi
9097 ; GFX9-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo
9098 ; GFX9-NEXT: v_writelane_b32 v40, s63, 31
9099 ; GFX9-NEXT: s_waitcnt vmcnt(2)
9100 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
9101 ; GFX9-NEXT: s_waitcnt vmcnt(2)
9102 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32
9103 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
9104 ; GFX9-NEXT: v_readlane_b32 s63, v40, 31
9105 ; GFX9-NEXT: v_readlane_b32 s62, v40, 30
9106 ; GFX9-NEXT: v_readlane_b32 s61, v40, 29
9107 ; GFX9-NEXT: v_readlane_b32 s60, v40, 28
9108 ; GFX9-NEXT: v_readlane_b32 s59, v40, 27
9109 ; GFX9-NEXT: v_readlane_b32 s58, v40, 26
9110 ; GFX9-NEXT: v_readlane_b32 s57, v40, 25
9111 ; GFX9-NEXT: v_readlane_b32 s56, v40, 24
9112 ; GFX9-NEXT: v_readlane_b32 s55, v40, 23
9113 ; GFX9-NEXT: v_readlane_b32 s54, v40, 22
9114 ; GFX9-NEXT: v_readlane_b32 s53, v40, 21
9115 ; GFX9-NEXT: v_readlane_b32 s52, v40, 20
9116 ; GFX9-NEXT: v_readlane_b32 s51, v40, 19
9117 ; GFX9-NEXT: v_readlane_b32 s50, v40, 18
9118 ; GFX9-NEXT: v_readlane_b32 s49, v40, 17
9119 ; GFX9-NEXT: v_readlane_b32 s48, v40, 16
9120 ; GFX9-NEXT: v_readlane_b32 s47, v40, 15
9121 ; GFX9-NEXT: v_readlane_b32 s46, v40, 14
9122 ; GFX9-NEXT: v_readlane_b32 s45, v40, 13
9123 ; GFX9-NEXT: v_readlane_b32 s44, v40, 12
9124 ; GFX9-NEXT: v_readlane_b32 s43, v40, 11
9125 ; GFX9-NEXT: v_readlane_b32 s42, v40, 10
9126 ; GFX9-NEXT: v_readlane_b32 s41, v40, 9
9127 ; GFX9-NEXT: v_readlane_b32 s40, v40, 8
9128 ; GFX9-NEXT: v_readlane_b32 s39, v40, 7
9129 ; GFX9-NEXT: v_readlane_b32 s38, v40, 6
9130 ; GFX9-NEXT: v_readlane_b32 s37, v40, 5
9131 ; GFX9-NEXT: v_readlane_b32 s36, v40, 4
9132 ; GFX9-NEXT: v_readlane_b32 s35, v40, 3
9133 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
9134 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
9135 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
9136 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
9137 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
9138 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
9139 ; GFX9-NEXT: s_addk_i32 s32, 0xf800
9140 ; GFX9-NEXT: s_mov_b32 s33, s6
9141 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9142 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9144 ; GFX10-LABEL: tail_call_byval_align16:
9145 ; GFX10: ; %bb.0: ; %entry
9146 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9147 ; GFX10-NEXT: s_mov_b32 s6, s33
9148 ; GFX10-NEXT: s_mov_b32 s33, s32
9149 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
9150 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
9151 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9152 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
9153 ; GFX10-NEXT: s_clause 0x2
9154 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20
9155 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16
9156 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33
9157 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
9158 ; GFX10-NEXT: s_addk_i32 s32, 0x400
9159 ; GFX10-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi
9160 ; GFX10-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo
9161 ; GFX10-NEXT: s_waitcnt vmcnt(2)
9162 ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
9163 ; GFX10-NEXT: s_waitcnt vmcnt(1)
9164 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32
9165 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
9166 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
9167 ; GFX10-NEXT: v_writelane_b32 v40, s35, 3
9168 ; GFX10-NEXT: v_writelane_b32 v40, s36, 4
9169 ; GFX10-NEXT: v_writelane_b32 v40, s37, 5
9170 ; GFX10-NEXT: v_writelane_b32 v40, s38, 6
9171 ; GFX10-NEXT: v_writelane_b32 v40, s39, 7
9172 ; GFX10-NEXT: v_writelane_b32 v40, s40, 8
9173 ; GFX10-NEXT: v_writelane_b32 v40, s41, 9
9174 ; GFX10-NEXT: v_writelane_b32 v40, s42, 10
9175 ; GFX10-NEXT: v_writelane_b32 v40, s43, 11
9176 ; GFX10-NEXT: v_writelane_b32 v40, s44, 12
9177 ; GFX10-NEXT: v_writelane_b32 v40, s45, 13
9178 ; GFX10-NEXT: v_writelane_b32 v40, s46, 14
9179 ; GFX10-NEXT: v_writelane_b32 v40, s47, 15
9180 ; GFX10-NEXT: v_writelane_b32 v40, s48, 16
9181 ; GFX10-NEXT: v_writelane_b32 v40, s49, 17
9182 ; GFX10-NEXT: v_writelane_b32 v40, s50, 18
9183 ; GFX10-NEXT: v_writelane_b32 v40, s51, 19
9184 ; GFX10-NEXT: v_writelane_b32 v40, s52, 20
9185 ; GFX10-NEXT: v_writelane_b32 v40, s53, 21
9186 ; GFX10-NEXT: v_writelane_b32 v40, s54, 22
9187 ; GFX10-NEXT: v_writelane_b32 v40, s55, 23
9188 ; GFX10-NEXT: v_writelane_b32 v40, s56, 24
9189 ; GFX10-NEXT: v_writelane_b32 v40, s57, 25
9190 ; GFX10-NEXT: v_writelane_b32 v40, s58, 26
9191 ; GFX10-NEXT: v_writelane_b32 v40, s59, 27
9192 ; GFX10-NEXT: v_writelane_b32 v40, s60, 28
9193 ; GFX10-NEXT: v_writelane_b32 v40, s61, 29
9194 ; GFX10-NEXT: v_writelane_b32 v40, s62, 30
9195 ; GFX10-NEXT: v_writelane_b32 v40, s63, 31
9196 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
9197 ; GFX10-NEXT: v_readlane_b32 s63, v40, 31
9198 ; GFX10-NEXT: v_readlane_b32 s62, v40, 30
9199 ; GFX10-NEXT: v_readlane_b32 s61, v40, 29
9200 ; GFX10-NEXT: v_readlane_b32 s60, v40, 28
9201 ; GFX10-NEXT: v_readlane_b32 s59, v40, 27
9202 ; GFX10-NEXT: v_readlane_b32 s58, v40, 26
9203 ; GFX10-NEXT: v_readlane_b32 s57, v40, 25
9204 ; GFX10-NEXT: v_readlane_b32 s56, v40, 24
9205 ; GFX10-NEXT: v_readlane_b32 s55, v40, 23
9206 ; GFX10-NEXT: v_readlane_b32 s54, v40, 22
9207 ; GFX10-NEXT: v_readlane_b32 s53, v40, 21
9208 ; GFX10-NEXT: v_readlane_b32 s52, v40, 20
9209 ; GFX10-NEXT: v_readlane_b32 s51, v40, 19
9210 ; GFX10-NEXT: v_readlane_b32 s50, v40, 18
9211 ; GFX10-NEXT: v_readlane_b32 s49, v40, 17
9212 ; GFX10-NEXT: v_readlane_b32 s48, v40, 16
9213 ; GFX10-NEXT: v_readlane_b32 s47, v40, 15
9214 ; GFX10-NEXT: v_readlane_b32 s46, v40, 14
9215 ; GFX10-NEXT: v_readlane_b32 s45, v40, 13
9216 ; GFX10-NEXT: v_readlane_b32 s44, v40, 12
9217 ; GFX10-NEXT: v_readlane_b32 s43, v40, 11
9218 ; GFX10-NEXT: v_readlane_b32 s42, v40, 10
9219 ; GFX10-NEXT: v_readlane_b32 s41, v40, 9
9220 ; GFX10-NEXT: v_readlane_b32 s40, v40, 8
9221 ; GFX10-NEXT: v_readlane_b32 s39, v40, 7
9222 ; GFX10-NEXT: v_readlane_b32 s38, v40, 6
9223 ; GFX10-NEXT: v_readlane_b32 s37, v40, 5
9224 ; GFX10-NEXT: v_readlane_b32 s36, v40, 4
9225 ; GFX10-NEXT: v_readlane_b32 s35, v40, 3
9226 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
9227 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
9228 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
9229 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
9230 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
9231 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9232 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
9233 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00
9234 ; GFX10-NEXT: s_mov_b32 s33, s6
9235 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9236 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9238 ; GFX11-LABEL: tail_call_byval_align16:
9239 ; GFX11: ; %bb.0: ; %entry
9240 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9241 ; GFX11-NEXT: s_mov_b32 s4, s33
9242 ; GFX11-NEXT: s_mov_b32 s33, s32
9243 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1
9244 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:24 ; 4-byte Folded Spill
9245 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
9246 ; GFX11-NEXT: s_clause 0x1
9247 ; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 offset:16
9248 ; GFX11-NEXT: scratch_load_b32 v31, off, s33
9249 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
9250 ; GFX11-NEXT: s_add_i32 s32, s32, 32
9251 ; GFX11-NEXT: s_mov_b32 s1, byval_align16_f64_arg@abs32@hi
9252 ; GFX11-NEXT: s_mov_b32 s0, byval_align16_f64_arg@abs32@lo
9253 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
9254 ; GFX11-NEXT: v_writelane_b32 v40, s34, 2
9255 ; GFX11-NEXT: v_writelane_b32 v40, s35, 3
9256 ; GFX11-NEXT: v_writelane_b32 v40, s36, 4
9257 ; GFX11-NEXT: v_writelane_b32 v40, s37, 5
9258 ; GFX11-NEXT: v_writelane_b32 v40, s38, 6
9259 ; GFX11-NEXT: v_writelane_b32 v40, s39, 7
9260 ; GFX11-NEXT: v_writelane_b32 v40, s40, 8
9261 ; GFX11-NEXT: v_writelane_b32 v40, s41, 9
9262 ; GFX11-NEXT: v_writelane_b32 v40, s42, 10
9263 ; GFX11-NEXT: v_writelane_b32 v40, s43, 11
9264 ; GFX11-NEXT: v_writelane_b32 v40, s44, 12
9265 ; GFX11-NEXT: v_writelane_b32 v40, s45, 13
9266 ; GFX11-NEXT: v_writelane_b32 v40, s46, 14
9267 ; GFX11-NEXT: v_writelane_b32 v40, s47, 15
9268 ; GFX11-NEXT: v_writelane_b32 v40, s48, 16
9269 ; GFX11-NEXT: v_writelane_b32 v40, s49, 17
9270 ; GFX11-NEXT: v_writelane_b32 v40, s50, 18
9271 ; GFX11-NEXT: v_writelane_b32 v40, s51, 19
9272 ; GFX11-NEXT: v_writelane_b32 v40, s52, 20
9273 ; GFX11-NEXT: v_writelane_b32 v40, s53, 21
9274 ; GFX11-NEXT: v_writelane_b32 v40, s54, 22
9275 ; GFX11-NEXT: v_writelane_b32 v40, s55, 23
9276 ; GFX11-NEXT: v_writelane_b32 v40, s56, 24
9277 ; GFX11-NEXT: v_writelane_b32 v40, s57, 25
9278 ; GFX11-NEXT: v_writelane_b32 v40, s58, 26
9279 ; GFX11-NEXT: v_writelane_b32 v40, s59, 27
9280 ; GFX11-NEXT: v_writelane_b32 v40, s60, 28
9281 ; GFX11-NEXT: v_writelane_b32 v40, s61, 29
9282 ; GFX11-NEXT: v_writelane_b32 v40, s62, 30
9283 ; GFX11-NEXT: v_writelane_b32 v40, s63, 31
9284 ; GFX11-NEXT: s_waitcnt vmcnt(1)
9285 ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32
9286 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9287 ; GFX11-NEXT: v_readlane_b32 s63, v40, 31
9288 ; GFX11-NEXT: v_readlane_b32 s62, v40, 30
9289 ; GFX11-NEXT: v_readlane_b32 s61, v40, 29
9290 ; GFX11-NEXT: v_readlane_b32 s60, v40, 28
9291 ; GFX11-NEXT: v_readlane_b32 s59, v40, 27
9292 ; GFX11-NEXT: v_readlane_b32 s58, v40, 26
9293 ; GFX11-NEXT: v_readlane_b32 s57, v40, 25
9294 ; GFX11-NEXT: v_readlane_b32 s56, v40, 24
9295 ; GFX11-NEXT: v_readlane_b32 s55, v40, 23
9296 ; GFX11-NEXT: v_readlane_b32 s54, v40, 22
9297 ; GFX11-NEXT: v_readlane_b32 s53, v40, 21
9298 ; GFX11-NEXT: v_readlane_b32 s52, v40, 20
9299 ; GFX11-NEXT: v_readlane_b32 s51, v40, 19
9300 ; GFX11-NEXT: v_readlane_b32 s50, v40, 18
9301 ; GFX11-NEXT: v_readlane_b32 s49, v40, 17
9302 ; GFX11-NEXT: v_readlane_b32 s48, v40, 16
9303 ; GFX11-NEXT: v_readlane_b32 s47, v40, 15
9304 ; GFX11-NEXT: v_readlane_b32 s46, v40, 14
9305 ; GFX11-NEXT: v_readlane_b32 s45, v40, 13
9306 ; GFX11-NEXT: v_readlane_b32 s44, v40, 12
9307 ; GFX11-NEXT: v_readlane_b32 s43, v40, 11
9308 ; GFX11-NEXT: v_readlane_b32 s42, v40, 10
9309 ; GFX11-NEXT: v_readlane_b32 s41, v40, 9
9310 ; GFX11-NEXT: v_readlane_b32 s40, v40, 8
9311 ; GFX11-NEXT: v_readlane_b32 s39, v40, 7
9312 ; GFX11-NEXT: v_readlane_b32 s38, v40, 6
9313 ; GFX11-NEXT: v_readlane_b32 s37, v40, 5
9314 ; GFX11-NEXT: v_readlane_b32 s36, v40, 4
9315 ; GFX11-NEXT: v_readlane_b32 s35, v40, 3
9316 ; GFX11-NEXT: v_readlane_b32 s34, v40, 2
9317 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
9318 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
9319 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1
9320 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:24 ; 4-byte Folded Reload
9321 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
9322 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0
9323 ; GFX11-NEXT: s_mov_b32 s33, s4
9324 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9325 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9327 ; GFX10-SCRATCH-LABEL: tail_call_byval_align16:
9328 ; GFX10-SCRATCH: ; %bb.0: ; %entry
9329 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9330 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33
9331 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9332 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
9333 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:24 ; 4-byte Folded Spill
9334 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9335 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
9336 ; GFX10-SCRATCH-NEXT: s_clause 0x1
9337 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:16
9338 ; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33
9339 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9340 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32
9341 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, byval_align16_f64_arg@abs32@hi
9342 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, byval_align16_f64_arg@abs32@lo
9343 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9344 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2
9345 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3
9346 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4
9347 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5
9348 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6
9349 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7
9350 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 8
9351 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 9
9352 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 10
9353 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 11
9354 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 12
9355 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 13
9356 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 14
9357 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 15
9358 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 16
9359 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 17
9360 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 18
9361 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 19
9362 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 20
9363 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 21
9364 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 22
9365 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 23
9366 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 24
9367 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 25
9368 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 26
9369 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 27
9370 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 28
9371 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 29
9372 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 30
9373 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 31
9374 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1)
9375 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
9376 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9377 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 31
9378 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 30
9379 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 29
9380 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 28
9381 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 27
9382 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 26
9383 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 25
9384 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 24
9385 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 23
9386 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 22
9387 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 21
9388 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 20
9389 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 19
9390 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 18
9391 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 17
9392 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 16
9393 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 15
9394 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 14
9395 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 13
9396 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 12
9397 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 11
9398 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 10
9399 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 9
9400 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 8
9401 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7
9402 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6
9403 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5
9404 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4
9405 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3
9406 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 2
9407 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
9408 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
9409 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
9410 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:24 ; 4-byte Folded Reload
9411 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9412 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
9413 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
9414 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4
9415 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9416 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9418 %alloca = alloca double, align 8, addrspace(5)
9419 tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca)
9423 ; inreg arguments are put in sgprs
9424 define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 {
9425 ; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg:
9427 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9428 ; GFX9-NEXT: s_mov_b32 s34, s33
9429 ; GFX9-NEXT: s_mov_b32 s33, s32
9430 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9431 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9432 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9433 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
9434 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9435 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
9436 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
9437 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi
9438 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo
9439 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
9440 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
9441 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9442 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
9443 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
9444 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
9445 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9446 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9447 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9448 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9449 ; GFX9-NEXT: s_mov_b32 s33, s34
9450 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9451 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9453 ; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg:
9455 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9456 ; GFX10-NEXT: s_mov_b32 s34, s33
9457 ; GFX10-NEXT: s_mov_b32 s33, s32
9458 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9459 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9460 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9461 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9462 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
9463 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
9464 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9465 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi
9466 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo
9467 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
9468 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
9469 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
9470 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9471 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
9472 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
9473 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
9474 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9475 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9476 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9477 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9478 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9479 ; GFX10-NEXT: s_mov_b32 s33, s34
9480 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9481 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9483 ; GFX11-LABEL: test_call_external_void_func_i1_imm_inreg:
9485 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9486 ; GFX11-NEXT: s_mov_b32 s0, s33
9487 ; GFX11-NEXT: s_mov_b32 s33, s32
9488 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9489 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9490 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9491 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
9492 ; GFX11-NEXT: v_mov_b32_e32 v0, 1
9493 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9494 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi
9495 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo
9496 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
9497 ; GFX11-NEXT: scratch_store_b8 off, v0, s32
9498 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
9499 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9500 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
9501 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
9502 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
9503 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
9504 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9505 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
9506 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9507 ; GFX11-NEXT: s_add_i32 s32, s32, -16
9508 ; GFX11-NEXT: s_mov_b32 s33, s0
9509 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9510 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9512 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg:
9513 ; GFX10-SCRATCH: ; %bb.0:
9514 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9515 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
9516 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9517 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9518 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
9519 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9520 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9521 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
9522 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
9523 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9524 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi
9525 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo
9526 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9527 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
9528 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9529 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9530 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
9531 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
9532 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
9533 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9534 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
9535 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9536 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9537 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9538 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9539 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9540 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9541 call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true)
9545 define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 {
9546 ; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg:
9548 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9549 ; GFX9-NEXT: s_mov_b32 s34, s33
9550 ; GFX9-NEXT: s_mov_b32 s33, s32
9551 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9552 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9553 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9554 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
9555 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
9556 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
9557 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi
9558 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo
9559 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
9560 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9561 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
9562 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9563 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
9564 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
9565 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
9566 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
9567 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9568 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9569 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9570 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9571 ; GFX9-NEXT: s_mov_b32 s33, s34
9572 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9573 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9575 ; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg:
9577 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9578 ; GFX10-NEXT: s_mov_b32 s34, s33
9579 ; GFX10-NEXT: s_mov_b32 s33, s32
9580 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9581 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9582 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9583 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9584 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
9585 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi
9586 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo
9587 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9588 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
9589 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
9590 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
9591 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
9592 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9593 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
9594 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
9595 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
9596 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
9597 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9598 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9599 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9600 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9601 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9602 ; GFX10-NEXT: s_mov_b32 s33, s34
9603 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9604 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9606 ; GFX11-LABEL: test_call_external_void_func_i8_imm_inreg:
9608 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9609 ; GFX11-NEXT: s_mov_b32 s0, s33
9610 ; GFX11-NEXT: s_mov_b32 s33, s32
9611 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9612 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9613 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9614 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
9615 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi
9616 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo
9617 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9618 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
9619 ; GFX11-NEXT: s_movk_i32 s4, 0x7b
9620 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
9621 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
9622 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9623 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
9624 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
9625 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
9626 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
9627 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
9628 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9629 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
9630 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9631 ; GFX11-NEXT: s_add_i32 s32, s32, -16
9632 ; GFX11-NEXT: s_mov_b32 s33, s0
9633 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9634 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9636 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg:
9637 ; GFX10-SCRATCH: ; %bb.0:
9638 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9639 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
9640 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9641 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9642 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
9643 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9644 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9645 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
9646 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi
9647 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo
9648 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9649 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
9650 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
9651 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
9652 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
9653 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9654 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
9655 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
9656 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
9657 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
9658 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9659 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
9660 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9661 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9662 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9663 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9664 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9665 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9666 call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123)
9670 define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 {
9671 ; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg:
9673 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9674 ; GFX9-NEXT: s_mov_b32 s34, s33
9675 ; GFX9-NEXT: s_mov_b32 s33, s32
9676 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9677 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9678 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9679 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
9680 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
9681 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
9682 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi
9683 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo
9684 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
9685 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9686 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
9687 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9688 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
9689 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
9690 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
9691 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
9692 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9693 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9694 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9695 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9696 ; GFX9-NEXT: s_mov_b32 s33, s34
9697 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9698 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9700 ; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg:
9702 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9703 ; GFX10-NEXT: s_mov_b32 s34, s33
9704 ; GFX10-NEXT: s_mov_b32 s33, s32
9705 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9706 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9707 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9708 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9709 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
9710 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi
9711 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo
9712 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9713 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
9714 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
9715 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
9716 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
9717 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9718 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
9719 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
9720 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
9721 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
9722 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9723 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9724 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9725 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9726 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9727 ; GFX10-NEXT: s_mov_b32 s33, s34
9728 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9729 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9731 ; GFX11-LABEL: test_call_external_void_func_i16_imm_inreg:
9733 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9734 ; GFX11-NEXT: s_mov_b32 s0, s33
9735 ; GFX11-NEXT: s_mov_b32 s33, s32
9736 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9737 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9738 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9739 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
9740 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi
9741 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo
9742 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9743 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
9744 ; GFX11-NEXT: s_movk_i32 s4, 0x7b
9745 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
9746 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
9747 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9748 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
9749 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
9750 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
9751 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
9752 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
9753 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9754 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
9755 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9756 ; GFX11-NEXT: s_add_i32 s32, s32, -16
9757 ; GFX11-NEXT: s_mov_b32 s33, s0
9758 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9759 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9761 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg:
9762 ; GFX10-SCRATCH: ; %bb.0:
9763 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9764 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
9765 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9766 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9767 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
9768 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9769 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9770 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
9771 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi
9772 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo
9773 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9774 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
9775 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
9776 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
9777 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
9778 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9779 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
9780 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
9781 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
9782 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
9783 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9784 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
9785 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9786 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9787 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9788 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9789 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9790 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9791 call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123)
9795 define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 {
9796 ; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg:
9798 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9799 ; GFX9-NEXT: s_mov_b32 s34, s33
9800 ; GFX9-NEXT: s_mov_b32 s33, s32
9801 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9802 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9803 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9804 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
9805 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
9806 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
9807 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi
9808 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo
9809 ; GFX9-NEXT: s_mov_b32 s4, 42
9810 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9811 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
9812 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9813 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
9814 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
9815 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
9816 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
9817 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9818 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9819 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9820 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9821 ; GFX9-NEXT: s_mov_b32 s33, s34
9822 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9823 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9825 ; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg:
9827 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9828 ; GFX10-NEXT: s_mov_b32 s34, s33
9829 ; GFX10-NEXT: s_mov_b32 s33, s32
9830 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9831 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9832 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9833 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9834 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
9835 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi
9836 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo
9837 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9838 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
9839 ; GFX10-NEXT: s_mov_b32 s4, 42
9840 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
9841 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
9842 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9843 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
9844 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
9845 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
9846 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
9847 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9848 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9849 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9850 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9851 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9852 ; GFX10-NEXT: s_mov_b32 s33, s34
9853 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9854 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9856 ; GFX11-LABEL: test_call_external_void_func_i32_imm_inreg:
9858 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9859 ; GFX11-NEXT: s_mov_b32 s0, s33
9860 ; GFX11-NEXT: s_mov_b32 s33, s32
9861 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9862 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9863 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9864 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
9865 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi
9866 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo
9867 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9868 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
9869 ; GFX11-NEXT: s_mov_b32 s4, 42
9870 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
9871 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
9872 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9873 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
9874 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
9875 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
9876 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
9877 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
9878 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9879 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
9880 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9881 ; GFX11-NEXT: s_add_i32 s32, s32, -16
9882 ; GFX11-NEXT: s_mov_b32 s33, s0
9883 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9884 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9886 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg:
9887 ; GFX10-SCRATCH: ; %bb.0:
9888 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9889 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
9890 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9891 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9892 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
9893 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9894 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9895 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
9896 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi
9897 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo
9898 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9899 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
9900 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42
9901 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
9902 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
9903 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9904 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
9905 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
9906 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
9907 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
9908 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9909 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
9910 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9911 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9912 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9913 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9914 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9915 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9916 call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42)
9920 define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 {
9921 ; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg:
9923 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9924 ; GFX9-NEXT: s_mov_b32 s34, s33
9925 ; GFX9-NEXT: s_mov_b32 s33, s32
9926 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9927 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9928 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9929 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
9930 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
9931 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
9932 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
9933 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi
9934 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo
9935 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
9936 ; GFX9-NEXT: s_mov_b32 s5, 0
9937 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9938 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
9939 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9940 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
9941 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
9942 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
9943 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
9944 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
9945 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9946 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9947 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9948 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9949 ; GFX9-NEXT: s_mov_b32 s33, s34
9950 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9951 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9953 ; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg:
9955 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9956 ; GFX10-NEXT: s_mov_b32 s34, s33
9957 ; GFX10-NEXT: s_mov_b32 s33, s32
9958 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9959 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9960 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9961 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9962 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
9963 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi
9964 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo
9965 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9966 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
9967 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
9968 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
9969 ; GFX10-NEXT: s_mov_b32 s5, 0
9970 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
9971 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
9972 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9973 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
9974 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
9975 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
9976 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
9977 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
9978 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9979 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9980 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9981 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9982 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9983 ; GFX10-NEXT: s_mov_b32 s33, s34
9984 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9985 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9987 ; GFX11-LABEL: test_call_external_void_func_i64_imm_inreg:
9989 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9990 ; GFX11-NEXT: s_mov_b32 s0, s33
9991 ; GFX11-NEXT: s_mov_b32 s33, s32
9992 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9993 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9994 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9995 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
9996 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi
9997 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo
9998 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9999 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10000 ; GFX11-NEXT: s_movk_i32 s4, 0x7b
10001 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10002 ; GFX11-NEXT: s_mov_b32 s5, 0
10003 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
10004 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
10005 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10006 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10007 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
10008 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
10009 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10010 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10011 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
10012 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10013 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10014 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10015 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10016 ; GFX11-NEXT: s_mov_b32 s33, s0
10017 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10018 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10020 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg:
10021 ; GFX10-SCRATCH: ; %bb.0:
10022 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10023 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10024 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10025 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10026 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10027 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10028 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10029 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
10030 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi
10031 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo
10032 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10033 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10034 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
10035 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10036 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0
10037 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
10038 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
10039 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10040 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
10041 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
10042 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10043 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10044 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
10045 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10046 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10047 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10048 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10049 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10050 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10051 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10052 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10053 call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123)
10057 define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 {
10058 ; GFX9-LABEL: test_call_external_void_func_v2i64_inreg:
10060 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10061 ; GFX9-NEXT: s_mov_b32 s34, s33
10062 ; GFX9-NEXT: s_mov_b32 s33, s32
10063 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10064 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10065 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10066 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
10067 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10068 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10069 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
10070 ; GFX9-NEXT: s_mov_b64 s[34:35], 0
10071 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
10072 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10073 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
10074 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi
10075 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo
10076 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10077 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
10078 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10079 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
10080 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
10081 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
10082 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
10083 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10084 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10085 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
10086 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10087 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10088 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10089 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10090 ; GFX9-NEXT: s_mov_b32 s33, s34
10091 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10092 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10094 ; GFX10-LABEL: test_call_external_void_func_v2i64_inreg:
10096 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10097 ; GFX10-NEXT: s_mov_b32 s34, s33
10098 ; GFX10-NEXT: s_mov_b32 s33, s32
10099 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10100 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10101 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10102 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10103 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
10104 ; GFX10-NEXT: s_mov_b64 s[34:35], 0
10105 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10106 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10107 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10108 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
10109 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
10110 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10111 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi
10112 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo
10113 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
10114 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
10115 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10116 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
10117 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
10118 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
10119 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
10120 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10121 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10122 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
10123 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10124 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10125 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10126 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10127 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10128 ; GFX10-NEXT: s_mov_b32 s33, s34
10129 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10130 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10132 ; GFX11-LABEL: test_call_external_void_func_v2i64_inreg:
10134 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10135 ; GFX11-NEXT: s_mov_b32 s0, s33
10136 ; GFX11-NEXT: s_mov_b32 s33, s32
10137 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10138 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10139 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10140 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
10141 ; GFX11-NEXT: s_mov_b64 s[0:1], 0
10142 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10143 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10144 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10145 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
10146 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
10147 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
10148 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi
10149 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo
10150 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
10151 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
10152 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10153 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10154 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
10155 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
10156 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
10157 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
10158 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10159 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10160 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
10161 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10162 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10163 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10164 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10165 ; GFX11-NEXT: s_mov_b32 s33, s0
10166 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10167 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10169 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg:
10170 ; GFX10-SCRATCH: ; %bb.0:
10171 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10172 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10173 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10174 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10175 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10176 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10177 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10178 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
10179 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
10180 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10181 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10182 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10183 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
10184 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
10185 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
10186 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi
10187 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo
10188 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
10189 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
10190 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10191 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
10192 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
10193 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
10194 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
10195 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10196 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10197 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
10198 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10199 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10200 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10201 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10202 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10203 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10204 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10205 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10206 %val = load <2 x i64>, ptr addrspace(4) null
10207 call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val)
10211 define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 {
10212 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg:
10214 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10215 ; GFX9-NEXT: s_mov_b32 s34, s33
10216 ; GFX9-NEXT: s_mov_b32 s33, s32
10217 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10218 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10219 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10220 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
10221 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10222 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10223 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
10224 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
10225 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
10226 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi
10227 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo
10228 ; GFX9-NEXT: s_mov_b32 s4, 1
10229 ; GFX9-NEXT: s_mov_b32 s5, 2
10230 ; GFX9-NEXT: s_mov_b32 s6, 3
10231 ; GFX9-NEXT: s_mov_b32 s7, 4
10232 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10233 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
10234 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10235 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
10236 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
10237 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
10238 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
10239 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10240 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10241 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
10242 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10243 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10244 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10245 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10246 ; GFX9-NEXT: s_mov_b32 s33, s34
10247 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10248 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10250 ; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg:
10252 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10253 ; GFX10-NEXT: s_mov_b32 s34, s33
10254 ; GFX10-NEXT: s_mov_b32 s33, s32
10255 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10256 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10257 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10258 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10259 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
10260 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi
10261 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo
10262 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10263 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10264 ; GFX10-NEXT: s_mov_b32 s4, 1
10265 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10266 ; GFX10-NEXT: s_mov_b32 s5, 2
10267 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
10268 ; GFX10-NEXT: s_mov_b32 s6, 3
10269 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
10270 ; GFX10-NEXT: s_mov_b32 s7, 4
10271 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
10272 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
10273 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10274 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
10275 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
10276 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
10277 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
10278 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10279 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10280 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
10281 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10282 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10283 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10284 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10285 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10286 ; GFX10-NEXT: s_mov_b32 s33, s34
10287 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10288 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10290 ; GFX11-LABEL: test_call_external_void_func_v2i64_imm_inreg:
10292 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10293 ; GFX11-NEXT: s_mov_b32 s0, s33
10294 ; GFX11-NEXT: s_mov_b32 s33, s32
10295 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10296 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10297 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10298 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
10299 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi
10300 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo
10301 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10302 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10303 ; GFX11-NEXT: s_mov_b32 s4, 1
10304 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10305 ; GFX11-NEXT: s_mov_b32 s5, 2
10306 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
10307 ; GFX11-NEXT: s_mov_b32 s6, 3
10308 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
10309 ; GFX11-NEXT: s_mov_b32 s7, 4
10310 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
10311 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
10312 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10313 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10314 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
10315 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
10316 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
10317 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
10318 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10319 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10320 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
10321 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10322 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10323 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10324 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10325 ; GFX11-NEXT: s_mov_b32 s33, s0
10326 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10327 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10329 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg:
10330 ; GFX10-SCRATCH: ; %bb.0:
10331 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10332 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10333 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10334 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10335 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10336 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10337 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10338 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
10339 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi
10340 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo
10341 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10342 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10343 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
10344 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10345 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
10346 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
10347 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
10348 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
10349 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
10350 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
10351 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
10352 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10353 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
10354 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
10355 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
10356 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
10357 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10358 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10359 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
10360 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10361 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10362 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10363 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10364 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10365 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10366 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10367 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10368 call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg <i64 8589934593, i64 17179869187>)
10372 define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
10373 ; GFX9-LABEL: test_call_external_void_func_v3i64_inreg:
10375 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10376 ; GFX9-NEXT: s_mov_b32 s34, s33
10377 ; GFX9-NEXT: s_mov_b32 s33, s32
10378 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10379 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10380 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10381 ; GFX9-NEXT: v_writelane_b32 v40, s34, 8
10382 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10383 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10384 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
10385 ; GFX9-NEXT: s_mov_b64 s[34:35], 0
10386 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
10387 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10388 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
10389 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
10390 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6
10391 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi
10392 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo
10393 ; GFX9-NEXT: s_mov_b32 s8, 1
10394 ; GFX9-NEXT: s_mov_b32 s9, 2
10395 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10396 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7
10397 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10398 ; GFX9-NEXT: v_readlane_b32 s31, v40, 7
10399 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6
10400 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
10401 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
10402 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
10403 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
10404 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10405 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10406 ; GFX9-NEXT: v_readlane_b32 s34, v40, 8
10407 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10408 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10409 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10410 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10411 ; GFX9-NEXT: s_mov_b32 s33, s34
10412 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10413 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10415 ; GFX10-LABEL: test_call_external_void_func_v3i64_inreg:
10417 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10418 ; GFX10-NEXT: s_mov_b32 s34, s33
10419 ; GFX10-NEXT: s_mov_b32 s33, s32
10420 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10421 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10422 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10423 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10424 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8
10425 ; GFX10-NEXT: s_mov_b64 s[34:35], 0
10426 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10427 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10428 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10429 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
10430 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
10431 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10432 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi
10433 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo
10434 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
10435 ; GFX10-NEXT: s_mov_b32 s8, 1
10436 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
10437 ; GFX10-NEXT: s_mov_b32 s9, 2
10438 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6
10439 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7
10440 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10441 ; GFX10-NEXT: v_readlane_b32 s31, v40, 7
10442 ; GFX10-NEXT: v_readlane_b32 s30, v40, 6
10443 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
10444 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
10445 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
10446 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
10447 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10448 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10449 ; GFX10-NEXT: v_readlane_b32 s34, v40, 8
10450 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10451 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10452 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10453 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10454 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10455 ; GFX10-NEXT: s_mov_b32 s33, s34
10456 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10457 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10459 ; GFX11-LABEL: test_call_external_void_func_v3i64_inreg:
10461 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10462 ; GFX11-NEXT: s_mov_b32 s0, s33
10463 ; GFX11-NEXT: s_mov_b32 s33, s32
10464 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10465 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10466 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10467 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8
10468 ; GFX11-NEXT: s_mov_b64 s[0:1], 0
10469 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10470 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10471 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10472 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
10473 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
10474 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
10475 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi
10476 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo
10477 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
10478 ; GFX11-NEXT: s_mov_b32 s8, 1
10479 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
10480 ; GFX11-NEXT: s_mov_b32 s9, 2
10481 ; GFX11-NEXT: v_writelane_b32 v40, s30, 6
10482 ; GFX11-NEXT: v_writelane_b32 v40, s31, 7
10483 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10484 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10485 ; GFX11-NEXT: v_readlane_b32 s31, v40, 7
10486 ; GFX11-NEXT: v_readlane_b32 s30, v40, 6
10487 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
10488 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
10489 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
10490 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
10491 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10492 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10493 ; GFX11-NEXT: v_readlane_b32 s0, v40, 8
10494 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10495 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10496 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10497 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10498 ; GFX11-NEXT: s_mov_b32 s33, s0
10499 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10500 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10502 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg:
10503 ; GFX10-SCRATCH: ; %bb.0:
10504 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10505 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10506 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10507 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10508 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10509 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10510 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10511 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8
10512 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
10513 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10514 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10515 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10516 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
10517 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
10518 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
10519 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi
10520 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo
10521 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
10522 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
10523 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
10524 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
10525 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6
10526 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7
10527 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10528 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7
10529 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6
10530 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
10531 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
10532 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
10533 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
10534 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10535 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10536 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 8
10537 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10538 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10539 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10540 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10541 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10542 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10543 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10544 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10545 %load = load <2 x i64>, ptr addrspace(4) null
10546 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
10548 call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
10552 define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 {
10553 ; GFX9-LABEL: test_call_external_void_func_v4i64_inreg:
10555 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10556 ; GFX9-NEXT: s_mov_b32 s34, s33
10557 ; GFX9-NEXT: s_mov_b32 s33, s32
10558 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10559 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10560 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10561 ; GFX9-NEXT: v_writelane_b32 v40, s34, 10
10562 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10563 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10564 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
10565 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
10566 ; GFX9-NEXT: s_mov_b64 s[34:35], 0
10567 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
10568 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10569 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
10570 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
10571 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
10572 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8
10573 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi
10574 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo
10575 ; GFX9-NEXT: s_mov_b32 s8, 1
10576 ; GFX9-NEXT: s_mov_b32 s9, 2
10577 ; GFX9-NEXT: s_mov_b32 s10, 3
10578 ; GFX9-NEXT: s_mov_b32 s11, 4
10579 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10580 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9
10581 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10582 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9
10583 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8
10584 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
10585 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
10586 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
10587 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
10588 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
10589 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
10590 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10591 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10592 ; GFX9-NEXT: v_readlane_b32 s34, v40, 10
10593 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10594 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10595 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10596 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10597 ; GFX9-NEXT: s_mov_b32 s33, s34
10598 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10599 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10601 ; GFX10-LABEL: test_call_external_void_func_v4i64_inreg:
10603 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10604 ; GFX10-NEXT: s_mov_b32 s34, s33
10605 ; GFX10-NEXT: s_mov_b32 s33, s32
10606 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10607 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10608 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10609 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10610 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10
10611 ; GFX10-NEXT: s_mov_b64 s[34:35], 0
10612 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10613 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10614 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10615 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
10616 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
10617 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10618 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi
10619 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo
10620 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
10621 ; GFX10-NEXT: s_mov_b32 s8, 1
10622 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
10623 ; GFX10-NEXT: s_mov_b32 s9, 2
10624 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
10625 ; GFX10-NEXT: s_mov_b32 s10, 3
10626 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
10627 ; GFX10-NEXT: s_mov_b32 s11, 4
10628 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8
10629 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9
10630 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10631 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9
10632 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8
10633 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
10634 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
10635 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
10636 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
10637 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
10638 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
10639 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10640 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10641 ; GFX10-NEXT: v_readlane_b32 s34, v40, 10
10642 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10643 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10644 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10645 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10646 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10647 ; GFX10-NEXT: s_mov_b32 s33, s34
10648 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10649 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10651 ; GFX11-LABEL: test_call_external_void_func_v4i64_inreg:
10653 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10654 ; GFX11-NEXT: s_mov_b32 s0, s33
10655 ; GFX11-NEXT: s_mov_b32 s33, s32
10656 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10657 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10658 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10659 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10
10660 ; GFX11-NEXT: s_mov_b64 s[0:1], 0
10661 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10662 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10663 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10664 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
10665 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
10666 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
10667 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi
10668 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo
10669 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
10670 ; GFX11-NEXT: s_mov_b32 s8, 1
10671 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
10672 ; GFX11-NEXT: s_mov_b32 s9, 2
10673 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
10674 ; GFX11-NEXT: s_mov_b32 s10, 3
10675 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
10676 ; GFX11-NEXT: s_mov_b32 s11, 4
10677 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8
10678 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9
10679 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10680 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10681 ; GFX11-NEXT: v_readlane_b32 s31, v40, 9
10682 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8
10683 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
10684 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
10685 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
10686 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
10687 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
10688 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
10689 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10690 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10691 ; GFX11-NEXT: v_readlane_b32 s0, v40, 10
10692 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10693 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10694 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10695 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10696 ; GFX11-NEXT: s_mov_b32 s33, s0
10697 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10698 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10700 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg:
10701 ; GFX10-SCRATCH: ; %bb.0:
10702 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10703 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10704 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10705 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10706 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10707 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10708 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10709 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10
10710 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
10711 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10712 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10713 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10714 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
10715 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
10716 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
10717 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi
10718 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo
10719 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
10720 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
10721 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
10722 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
10723 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
10724 ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3
10725 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
10726 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4
10727 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
10728 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
10729 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10730 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
10731 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
10732 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
10733 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
10734 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
10735 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
10736 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
10737 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
10738 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10739 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10740 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10
10741 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10742 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10743 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10744 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10745 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10746 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10747 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10748 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10749 %load = load <2 x i64>, ptr addrspace(4) null
10750 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10751 call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val)
10755 define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 {
10756 ; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg:
10758 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10759 ; GFX9-NEXT: s_mov_b32 s34, s33
10760 ; GFX9-NEXT: s_mov_b32 s33, s32
10761 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10762 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10763 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10764 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
10765 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10766 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
10767 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi
10768 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo
10769 ; GFX9-NEXT: s_movk_i32 s4, 0x4400
10770 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10771 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
10772 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10773 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
10774 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
10775 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10776 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
10777 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10778 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10779 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10780 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10781 ; GFX9-NEXT: s_mov_b32 s33, s34
10782 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10783 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10785 ; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg:
10787 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10788 ; GFX10-NEXT: s_mov_b32 s34, s33
10789 ; GFX10-NEXT: s_mov_b32 s33, s32
10790 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10791 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10792 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10793 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10794 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
10795 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi
10796 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo
10797 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10798 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10799 ; GFX10-NEXT: s_movk_i32 s4, 0x4400
10800 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
10801 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
10802 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10803 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
10804 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
10805 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10806 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
10807 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10808 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10809 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10810 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10811 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10812 ; GFX10-NEXT: s_mov_b32 s33, s34
10813 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10814 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10816 ; GFX11-LABEL: test_call_external_void_func_f16_imm_inreg:
10818 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10819 ; GFX11-NEXT: s_mov_b32 s0, s33
10820 ; GFX11-NEXT: s_mov_b32 s33, s32
10821 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10822 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10823 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10824 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
10825 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi
10826 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo
10827 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10828 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10829 ; GFX11-NEXT: s_movk_i32 s4, 0x4400
10830 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
10831 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
10832 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10833 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10834 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
10835 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
10836 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10837 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
10838 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10839 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10840 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10841 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10842 ; GFX11-NEXT: s_mov_b32 s33, s0
10843 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10844 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10846 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg:
10847 ; GFX10-SCRATCH: ; %bb.0:
10848 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10849 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10850 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10851 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10852 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10853 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10854 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10855 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
10856 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi
10857 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo
10858 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10859 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10860 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400
10861 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
10862 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
10863 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10864 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
10865 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
10866 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10867 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
10868 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10869 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10870 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10871 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10872 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10873 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10874 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10875 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10876 call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0)
10880 define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 {
10881 ; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg:
10883 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10884 ; GFX9-NEXT: s_mov_b32 s34, s33
10885 ; GFX9-NEXT: s_mov_b32 s33, s32
10886 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10887 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10888 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10889 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
10890 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10891 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
10892 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi
10893 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo
10894 ; GFX9-NEXT: s_mov_b32 s4, 4.0
10895 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10896 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
10897 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10898 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
10899 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
10900 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10901 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
10902 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10903 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10904 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10905 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10906 ; GFX9-NEXT: s_mov_b32 s33, s34
10907 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10908 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10910 ; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg:
10912 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10913 ; GFX10-NEXT: s_mov_b32 s34, s33
10914 ; GFX10-NEXT: s_mov_b32 s33, s32
10915 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10916 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10917 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10918 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10919 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
10920 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi
10921 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo
10922 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10923 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10924 ; GFX10-NEXT: s_mov_b32 s4, 4.0
10925 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
10926 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
10927 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10928 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
10929 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
10930 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10931 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
10932 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10933 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10934 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10935 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10936 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10937 ; GFX10-NEXT: s_mov_b32 s33, s34
10938 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10939 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10941 ; GFX11-LABEL: test_call_external_void_func_f32_imm_inreg:
10943 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10944 ; GFX11-NEXT: s_mov_b32 s0, s33
10945 ; GFX11-NEXT: s_mov_b32 s33, s32
10946 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10947 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10948 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10949 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
10950 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi
10951 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo
10952 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10953 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10954 ; GFX11-NEXT: s_mov_b32 s4, 4.0
10955 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
10956 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
10957 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10958 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10959 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
10960 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
10961 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10962 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
10963 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10964 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10965 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10966 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10967 ; GFX11-NEXT: s_mov_b32 s33, s0
10968 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10969 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10971 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg:
10972 ; GFX10-SCRATCH: ; %bb.0:
10973 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10974 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10975 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10976 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10977 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10978 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10979 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10980 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
10981 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi
10982 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo
10983 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10984 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10985 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0
10986 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
10987 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
10988 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10989 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
10990 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
10991 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10992 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
10993 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10994 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10995 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10996 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10997 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10998 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10999 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11000 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11001 call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0)
11005 define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 {
11006 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg:
11008 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11009 ; GFX9-NEXT: s_mov_b32 s34, s33
11010 ; GFX9-NEXT: s_mov_b32 s33, s32
11011 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11012 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11013 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11014 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
11015 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11016 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11017 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
11018 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi
11019 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo
11020 ; GFX9-NEXT: s_mov_b32 s4, 1.0
11021 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11022 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11023 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
11024 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11025 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
11026 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
11027 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11028 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11029 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
11030 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11031 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11032 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11033 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11034 ; GFX9-NEXT: s_mov_b32 s33, s34
11035 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11036 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11038 ; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg:
11040 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11041 ; GFX10-NEXT: s_mov_b32 s34, s33
11042 ; GFX10-NEXT: s_mov_b32 s33, s32
11043 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11044 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11045 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11046 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11047 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
11048 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi
11049 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo
11050 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11051 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11052 ; GFX10-NEXT: s_mov_b32 s4, 1.0
11053 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11054 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11055 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
11056 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
11057 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11058 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
11059 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
11060 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11061 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11062 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
11063 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11064 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11065 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11066 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11067 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11068 ; GFX10-NEXT: s_mov_b32 s33, s34
11069 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11070 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11072 ; GFX11-LABEL: test_call_external_void_func_v2f32_imm_inreg:
11074 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11075 ; GFX11-NEXT: s_mov_b32 s0, s33
11076 ; GFX11-NEXT: s_mov_b32 s33, s32
11077 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11078 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11079 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11080 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
11081 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi
11082 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo
11083 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11084 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11085 ; GFX11-NEXT: s_mov_b32 s4, 1.0
11086 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11087 ; GFX11-NEXT: s_mov_b32 s5, 2.0
11088 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
11089 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
11090 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11091 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11092 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
11093 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
11094 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11095 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11096 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
11097 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11098 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11099 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11100 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11101 ; GFX11-NEXT: s_mov_b32 s33, s0
11102 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11103 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11105 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg:
11106 ; GFX10-SCRATCH: ; %bb.0:
11107 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11108 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11109 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11110 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11111 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11112 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11113 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11114 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
11115 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi
11116 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo
11117 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11118 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11119 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
11120 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11121 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
11122 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
11123 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
11124 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11125 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
11126 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
11127 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11128 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11129 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
11130 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11131 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11132 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11133 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11134 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11135 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11136 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11137 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11138 call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg <float 1.0, float 2.0>)
11142 define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 {
11143 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg:
11145 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11146 ; GFX9-NEXT: s_mov_b32 s34, s33
11147 ; GFX9-NEXT: s_mov_b32 s33, s32
11148 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11149 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11150 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11151 ; GFX9-NEXT: v_writelane_b32 v40, s34, 5
11152 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11153 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11154 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
11155 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3
11156 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi
11157 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo
11158 ; GFX9-NEXT: s_mov_b32 s4, 1.0
11159 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11160 ; GFX9-NEXT: s_mov_b32 s6, 4.0
11161 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11162 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4
11163 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11164 ; GFX9-NEXT: v_readlane_b32 s31, v40, 4
11165 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3
11166 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
11167 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11168 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11169 ; GFX9-NEXT: v_readlane_b32 s34, v40, 5
11170 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11171 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11172 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11173 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11174 ; GFX9-NEXT: s_mov_b32 s33, s34
11175 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11176 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11178 ; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg:
11180 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11181 ; GFX10-NEXT: s_mov_b32 s34, s33
11182 ; GFX10-NEXT: s_mov_b32 s33, s32
11183 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11184 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11185 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11186 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11187 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5
11188 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi
11189 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo
11190 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11191 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11192 ; GFX10-NEXT: s_mov_b32 s4, 1.0
11193 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11194 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11195 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
11196 ; GFX10-NEXT: s_mov_b32 s6, 4.0
11197 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3
11198 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4
11199 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11200 ; GFX10-NEXT: v_readlane_b32 s31, v40, 4
11201 ; GFX10-NEXT: v_readlane_b32 s30, v40, 3
11202 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
11203 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11204 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11205 ; GFX10-NEXT: v_readlane_b32 s34, v40, 5
11206 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11207 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11208 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11209 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11210 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11211 ; GFX10-NEXT: s_mov_b32 s33, s34
11212 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11213 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11215 ; GFX11-LABEL: test_call_external_void_func_v3f32_imm_inreg:
11217 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11218 ; GFX11-NEXT: s_mov_b32 s0, s33
11219 ; GFX11-NEXT: s_mov_b32 s33, s32
11220 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11221 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11222 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11223 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5
11224 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi
11225 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo
11226 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11227 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11228 ; GFX11-NEXT: s_mov_b32 s4, 1.0
11229 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11230 ; GFX11-NEXT: s_mov_b32 s5, 2.0
11231 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
11232 ; GFX11-NEXT: s_mov_b32 s6, 4.0
11233 ; GFX11-NEXT: v_writelane_b32 v40, s30, 3
11234 ; GFX11-NEXT: v_writelane_b32 v40, s31, 4
11235 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11236 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11237 ; GFX11-NEXT: v_readlane_b32 s31, v40, 4
11238 ; GFX11-NEXT: v_readlane_b32 s30, v40, 3
11239 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
11240 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11241 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11242 ; GFX11-NEXT: v_readlane_b32 s0, v40, 5
11243 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11244 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11245 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11246 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11247 ; GFX11-NEXT: s_mov_b32 s33, s0
11248 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11249 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11251 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg:
11252 ; GFX10-SCRATCH: ; %bb.0:
11253 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11254 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11255 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11256 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11257 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11258 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11259 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11260 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5
11261 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi
11262 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo
11263 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11264 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11265 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
11266 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11267 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
11268 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
11269 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
11270 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3
11271 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4
11272 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11273 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4
11274 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3
11275 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
11276 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11277 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11278 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 5
11279 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11280 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11281 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11282 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11283 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11284 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11285 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11286 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11287 call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg <float 1.0, float 2.0, float 4.0>)
11291 define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 {
11292 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg:
11294 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11295 ; GFX9-NEXT: s_mov_b32 s34, s33
11296 ; GFX9-NEXT: s_mov_b32 s33, s32
11297 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11298 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11299 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11300 ; GFX9-NEXT: v_writelane_b32 v40, s34, 7
11301 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11302 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11303 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
11304 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
11305 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
11306 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5
11307 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi
11308 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo
11309 ; GFX9-NEXT: s_mov_b32 s4, 1.0
11310 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11311 ; GFX9-NEXT: s_mov_b32 s6, 4.0
11312 ; GFX9-NEXT: s_mov_b32 s7, -1.0
11313 ; GFX9-NEXT: s_mov_b32 s8, 0.5
11314 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11315 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6
11316 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11317 ; GFX9-NEXT: v_readlane_b32 s31, v40, 6
11318 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5
11319 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
11320 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
11321 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
11322 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11323 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11324 ; GFX9-NEXT: v_readlane_b32 s34, v40, 7
11325 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11326 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11327 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11328 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11329 ; GFX9-NEXT: s_mov_b32 s33, s34
11330 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11331 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11333 ; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg:
11335 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11336 ; GFX10-NEXT: s_mov_b32 s34, s33
11337 ; GFX10-NEXT: s_mov_b32 s33, s32
11338 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11339 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11340 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11341 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11342 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7
11343 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi
11344 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo
11345 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11346 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11347 ; GFX10-NEXT: s_mov_b32 s4, 1.0
11348 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11349 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11350 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
11351 ; GFX10-NEXT: s_mov_b32 s6, 4.0
11352 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
11353 ; GFX10-NEXT: s_mov_b32 s7, -1.0
11354 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
11355 ; GFX10-NEXT: s_mov_b32 s8, 0.5
11356 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5
11357 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6
11358 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11359 ; GFX10-NEXT: v_readlane_b32 s31, v40, 6
11360 ; GFX10-NEXT: v_readlane_b32 s30, v40, 5
11361 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
11362 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
11363 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
11364 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11365 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11366 ; GFX10-NEXT: v_readlane_b32 s34, v40, 7
11367 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11368 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11369 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11370 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11371 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11372 ; GFX10-NEXT: s_mov_b32 s33, s34
11373 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11374 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11376 ; GFX11-LABEL: test_call_external_void_func_v5f32_imm_inreg:
11378 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11379 ; GFX11-NEXT: s_mov_b32 s0, s33
11380 ; GFX11-NEXT: s_mov_b32 s33, s32
11381 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11382 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11383 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11384 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7
11385 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi
11386 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo
11387 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11388 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11389 ; GFX11-NEXT: s_mov_b32 s4, 1.0
11390 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11391 ; GFX11-NEXT: s_mov_b32 s5, 2.0
11392 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
11393 ; GFX11-NEXT: s_mov_b32 s6, 4.0
11394 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
11395 ; GFX11-NEXT: s_mov_b32 s7, -1.0
11396 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
11397 ; GFX11-NEXT: s_mov_b32 s8, 0.5
11398 ; GFX11-NEXT: v_writelane_b32 v40, s30, 5
11399 ; GFX11-NEXT: v_writelane_b32 v40, s31, 6
11400 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11401 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11402 ; GFX11-NEXT: v_readlane_b32 s31, v40, 6
11403 ; GFX11-NEXT: v_readlane_b32 s30, v40, 5
11404 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
11405 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
11406 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
11407 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11408 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11409 ; GFX11-NEXT: v_readlane_b32 s0, v40, 7
11410 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11411 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11412 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11413 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11414 ; GFX11-NEXT: s_mov_b32 s33, s0
11415 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11416 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11418 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg:
11419 ; GFX10-SCRATCH: ; %bb.0:
11420 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11421 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11422 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11423 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11424 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11425 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11426 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11427 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7
11428 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi
11429 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo
11430 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11431 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11432 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
11433 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11434 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
11435 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
11436 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
11437 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
11438 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0
11439 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
11440 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5
11441 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5
11442 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6
11443 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11444 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6
11445 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5
11446 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
11447 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
11448 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
11449 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11450 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11451 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 7
11452 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11453 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11454 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11455 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11456 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11457 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11458 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11459 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11460 call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
11464 define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 {
11465 ; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg:
11467 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11468 ; GFX9-NEXT: s_mov_b32 s34, s33
11469 ; GFX9-NEXT: s_mov_b32 s33, s32
11470 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11471 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11472 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11473 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
11474 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11475 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11476 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
11477 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi
11478 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo
11479 ; GFX9-NEXT: s_mov_b32 s4, 0
11480 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000
11481 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11482 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
11483 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11484 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
11485 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
11486 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11487 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11488 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
11489 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11490 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11491 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11492 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11493 ; GFX9-NEXT: s_mov_b32 s33, s34
11494 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11495 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11497 ; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg:
11499 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11500 ; GFX10-NEXT: s_mov_b32 s34, s33
11501 ; GFX10-NEXT: s_mov_b32 s33, s32
11502 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11503 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11504 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11505 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11506 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
11507 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi
11508 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo
11509 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11510 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11511 ; GFX10-NEXT: s_mov_b32 s4, 0
11512 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11513 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000
11514 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
11515 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
11516 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11517 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
11518 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
11519 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11520 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11521 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
11522 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11523 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11524 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11525 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11526 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11527 ; GFX10-NEXT: s_mov_b32 s33, s34
11528 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11529 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11531 ; GFX11-LABEL: test_call_external_void_func_f64_imm_inreg:
11533 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11534 ; GFX11-NEXT: s_mov_b32 s0, s33
11535 ; GFX11-NEXT: s_mov_b32 s33, s32
11536 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11537 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11538 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11539 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
11540 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi
11541 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo
11542 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11543 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11544 ; GFX11-NEXT: s_mov_b32 s4, 0
11545 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11546 ; GFX11-NEXT: s_mov_b32 s5, 0x40100000
11547 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
11548 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
11549 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11550 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11551 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
11552 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
11553 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11554 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11555 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
11556 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11557 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11558 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11559 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11560 ; GFX11-NEXT: s_mov_b32 s33, s0
11561 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11562 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11564 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg:
11565 ; GFX10-SCRATCH: ; %bb.0:
11566 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11567 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11568 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11569 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11570 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11571 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11572 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11573 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
11574 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi
11575 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo
11576 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11577 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11578 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
11579 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11580 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000
11581 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
11582 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
11583 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11584 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
11585 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
11586 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11587 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11588 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
11589 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11590 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11591 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11592 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11593 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11594 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11595 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11596 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11597 call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0)
11601 define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 {
11602 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg:
11604 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11605 ; GFX9-NEXT: s_mov_b32 s34, s33
11606 ; GFX9-NEXT: s_mov_b32 s33, s32
11607 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11608 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11609 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11610 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
11611 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11612 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11613 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
11614 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
11615 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
11616 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi
11617 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo
11618 ; GFX9-NEXT: s_mov_b32 s4, 0
11619 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11620 ; GFX9-NEXT: s_mov_b32 s6, 0
11621 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000
11622 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11623 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
11624 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11625 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
11626 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
11627 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
11628 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
11629 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11630 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11631 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
11632 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11633 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11634 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11635 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11636 ; GFX9-NEXT: s_mov_b32 s33, s34
11637 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11638 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11640 ; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg:
11642 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11643 ; GFX10-NEXT: s_mov_b32 s34, s33
11644 ; GFX10-NEXT: s_mov_b32 s33, s32
11645 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11646 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11647 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11648 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11649 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
11650 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi
11651 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo
11652 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11653 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11654 ; GFX10-NEXT: s_mov_b32 s4, 0
11655 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11656 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11657 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
11658 ; GFX10-NEXT: s_mov_b32 s6, 0
11659 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
11660 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000
11661 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
11662 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
11663 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11664 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
11665 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
11666 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
11667 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
11668 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11669 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11670 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
11671 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11672 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11673 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11674 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11675 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11676 ; GFX10-NEXT: s_mov_b32 s33, s34
11677 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11678 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11680 ; GFX11-LABEL: test_call_external_void_func_v2f64_imm_inreg:
11682 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11683 ; GFX11-NEXT: s_mov_b32 s0, s33
11684 ; GFX11-NEXT: s_mov_b32 s33, s32
11685 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11686 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11687 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11688 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
11689 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi
11690 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo
11691 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11692 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11693 ; GFX11-NEXT: s_mov_b32 s4, 0
11694 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11695 ; GFX11-NEXT: s_mov_b32 s5, 2.0
11696 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
11697 ; GFX11-NEXT: s_mov_b32 s6, 0
11698 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
11699 ; GFX11-NEXT: s_mov_b32 s7, 0x40100000
11700 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
11701 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
11702 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11703 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11704 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
11705 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
11706 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
11707 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
11708 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11709 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11710 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
11711 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11712 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11713 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11714 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11715 ; GFX11-NEXT: s_mov_b32 s33, s0
11716 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11717 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11719 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg:
11720 ; GFX10-SCRATCH: ; %bb.0:
11721 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11722 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11723 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11724 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11725 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11726 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11727 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11728 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
11729 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi
11730 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo
11731 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11732 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11733 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
11734 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11735 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
11736 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
11737 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
11738 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
11739 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
11740 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
11741 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
11742 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11743 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
11744 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
11745 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
11746 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
11747 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11748 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11749 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
11750 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11751 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11752 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11753 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11754 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11755 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11756 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11757 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11758 call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg <double 2.0, double 4.0>)
11762 define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 {
11763 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg:
11765 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11766 ; GFX9-NEXT: s_mov_b32 s34, s33
11767 ; GFX9-NEXT: s_mov_b32 s33, s32
11768 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11769 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11770 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11771 ; GFX9-NEXT: v_writelane_b32 v40, s34, 8
11772 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11773 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11774 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
11775 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
11776 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
11777 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
11778 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6
11779 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi
11780 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo
11781 ; GFX9-NEXT: s_mov_b32 s4, 0
11782 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11783 ; GFX9-NEXT: s_mov_b32 s6, 0
11784 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000
11785 ; GFX9-NEXT: s_mov_b32 s8, 0
11786 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000
11787 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11788 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7
11789 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11790 ; GFX9-NEXT: v_readlane_b32 s31, v40, 7
11791 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6
11792 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
11793 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
11794 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
11795 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
11796 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11797 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11798 ; GFX9-NEXT: v_readlane_b32 s34, v40, 8
11799 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11800 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11801 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11802 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11803 ; GFX9-NEXT: s_mov_b32 s33, s34
11804 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11805 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11807 ; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg:
11809 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11810 ; GFX10-NEXT: s_mov_b32 s34, s33
11811 ; GFX10-NEXT: s_mov_b32 s33, s32
11812 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11813 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11814 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11815 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11816 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8
11817 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi
11818 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo
11819 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11820 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11821 ; GFX10-NEXT: s_mov_b32 s4, 0
11822 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11823 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11824 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
11825 ; GFX10-NEXT: s_mov_b32 s6, 0
11826 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
11827 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000
11828 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
11829 ; GFX10-NEXT: s_mov_b32 s8, 0
11830 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
11831 ; GFX10-NEXT: s_mov_b32 s9, 0x40200000
11832 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6
11833 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7
11834 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11835 ; GFX10-NEXT: v_readlane_b32 s31, v40, 7
11836 ; GFX10-NEXT: v_readlane_b32 s30, v40, 6
11837 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
11838 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
11839 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
11840 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
11841 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11842 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11843 ; GFX10-NEXT: v_readlane_b32 s34, v40, 8
11844 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11845 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11846 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11847 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11848 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11849 ; GFX10-NEXT: s_mov_b32 s33, s34
11850 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11851 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11853 ; GFX11-LABEL: test_call_external_void_func_v3f64_imm_inreg:
11855 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11856 ; GFX11-NEXT: s_mov_b32 s0, s33
11857 ; GFX11-NEXT: s_mov_b32 s33, s32
11858 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11859 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11860 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11861 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8
11862 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi
11863 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo
11864 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11865 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11866 ; GFX11-NEXT: s_mov_b32 s4, 0
11867 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11868 ; GFX11-NEXT: s_mov_b32 s5, 2.0
11869 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
11870 ; GFX11-NEXT: s_mov_b32 s6, 0
11871 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
11872 ; GFX11-NEXT: s_mov_b32 s7, 0x40100000
11873 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
11874 ; GFX11-NEXT: s_mov_b32 s8, 0
11875 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
11876 ; GFX11-NEXT: s_mov_b32 s9, 0x40200000
11877 ; GFX11-NEXT: v_writelane_b32 v40, s30, 6
11878 ; GFX11-NEXT: v_writelane_b32 v40, s31, 7
11879 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11880 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11881 ; GFX11-NEXT: v_readlane_b32 s31, v40, 7
11882 ; GFX11-NEXT: v_readlane_b32 s30, v40, 6
11883 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
11884 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
11885 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
11886 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
11887 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11888 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11889 ; GFX11-NEXT: v_readlane_b32 s0, v40, 8
11890 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11891 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11892 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11893 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11894 ; GFX11-NEXT: s_mov_b32 s33, s0
11895 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11896 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11898 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg:
11899 ; GFX10-SCRATCH: ; %bb.0:
11900 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11901 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11902 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11903 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11904 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11905 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11906 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11907 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8
11908 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi
11909 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo
11910 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11911 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11912 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
11913 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11914 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
11915 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
11916 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
11917 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
11918 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
11919 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
11920 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0
11921 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
11922 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000
11923 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6
11924 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7
11925 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11926 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7
11927 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6
11928 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
11929 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
11930 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
11931 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
11932 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11933 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11934 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 8
11935 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11936 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11937 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11938 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11939 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11940 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11941 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11942 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11943 call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg <double 2.0, double 4.0, double 8.0>)
11947 define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 {
11948 ; GFX9-LABEL: test_call_external_void_func_v2i16_inreg:
11950 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11951 ; GFX9-NEXT: s_mov_b32 s34, s33
11952 ; GFX9-NEXT: s_mov_b32 s33, s32
11953 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11954 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11955 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11956 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
11957 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11958 ; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0
11959 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
11960 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi
11961 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo
11962 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11963 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
11964 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11965 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
11966 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
11967 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11968 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
11969 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11970 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11971 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11972 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11973 ; GFX9-NEXT: s_mov_b32 s33, s34
11974 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11975 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11977 ; GFX10-LABEL: test_call_external_void_func_v2i16_inreg:
11979 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11980 ; GFX10-NEXT: s_mov_b32 s34, s33
11981 ; GFX10-NEXT: s_mov_b32 s33, s32
11982 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11983 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11984 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11985 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11986 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
11987 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11988 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11989 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0
11990 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi
11991 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo
11992 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
11993 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
11994 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11995 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
11996 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
11997 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11998 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
11999 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12000 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12001 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12002 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12003 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12004 ; GFX10-NEXT: s_mov_b32 s33, s34
12005 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12006 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12008 ; GFX11-LABEL: test_call_external_void_func_v2i16_inreg:
12010 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12011 ; GFX11-NEXT: s_mov_b32 s0, s33
12012 ; GFX11-NEXT: s_mov_b32 s33, s32
12013 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12014 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12015 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12016 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
12017 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12018 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12019 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0
12020 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi
12021 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo
12022 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
12023 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
12024 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12025 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12026 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
12027 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
12028 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12029 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
12030 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12031 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12032 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12033 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12034 ; GFX11-NEXT: s_mov_b32 s33, s0
12035 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12036 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12038 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg:
12039 ; GFX10-SCRATCH: ; %bb.0:
12040 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12041 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12042 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12043 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12044 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12045 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12046 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12047 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
12048 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12049 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12050 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
12051 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi
12052 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo
12053 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
12054 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
12055 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12056 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
12057 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
12058 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12059 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
12060 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12061 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12062 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12063 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12064 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12065 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12066 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12067 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12068 %val = load <2 x i16>, ptr addrspace(4) undef
12069 call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val)
12073 define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 {
12074 ; GFX9-LABEL: test_call_external_void_func_v3i16_inreg:
12076 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12077 ; GFX9-NEXT: s_mov_b32 s34, s33
12078 ; GFX9-NEXT: s_mov_b32 s33, s32
12079 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12080 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12081 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12082 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12083 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12084 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12085 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12086 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12087 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi
12088 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo
12089 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12090 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12091 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12092 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12093 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12094 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12095 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12096 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12097 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12098 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12099 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12100 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12101 ; GFX9-NEXT: s_mov_b32 s33, s34
12102 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12103 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12105 ; GFX10-LABEL: test_call_external_void_func_v3i16_inreg:
12107 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12108 ; GFX10-NEXT: s_mov_b32 s34, s33
12109 ; GFX10-NEXT: s_mov_b32 s33, s32
12110 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12111 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12112 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12113 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12114 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12115 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12116 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12117 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12118 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12119 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi
12120 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo
12121 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12122 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12123 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12124 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12125 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12126 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12127 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12128 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12129 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12130 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12131 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12132 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12133 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12134 ; GFX10-NEXT: s_mov_b32 s33, s34
12135 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12136 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12138 ; GFX11-LABEL: test_call_external_void_func_v3i16_inreg:
12140 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12141 ; GFX11-NEXT: s_mov_b32 s0, s33
12142 ; GFX11-NEXT: s_mov_b32 s33, s32
12143 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12144 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12145 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12146 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12147 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12148 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12149 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12150 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
12151 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi
12152 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo
12153 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12154 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12155 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12156 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12157 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12158 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12159 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12160 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12161 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12162 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12163 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12164 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12165 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12166 ; GFX11-NEXT: s_mov_b32 s33, s0
12167 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12168 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12170 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg:
12171 ; GFX10-SCRATCH: ; %bb.0:
12172 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12173 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12174 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12175 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12176 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12177 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12178 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12179 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12180 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12181 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12182 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12183 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
12184 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi
12185 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo
12186 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12187 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12188 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12189 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12190 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12191 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12192 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12193 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12194 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12195 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12196 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12197 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12198 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12199 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12200 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12201 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12202 %val = load <3 x i16>, ptr addrspace(4) undef
12203 call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val)
12207 define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 {
12208 ; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
12210 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12211 ; GFX9-NEXT: s_mov_b32 s34, s33
12212 ; GFX9-NEXT: s_mov_b32 s33, s32
12213 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12214 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12215 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12216 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12217 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12218 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12219 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12220 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12221 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi
12222 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo
12223 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12224 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12225 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12226 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12227 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12228 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12229 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12230 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12231 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12232 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12233 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12234 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12235 ; GFX9-NEXT: s_mov_b32 s33, s34
12236 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12237 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12239 ; GFX10-LABEL: test_call_external_void_func_v3f16_inreg:
12241 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12242 ; GFX10-NEXT: s_mov_b32 s34, s33
12243 ; GFX10-NEXT: s_mov_b32 s33, s32
12244 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12245 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12246 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12247 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12248 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12249 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12250 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12251 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12252 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12253 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi
12254 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo
12255 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12256 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12257 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12258 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12259 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12260 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12261 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12262 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12263 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12264 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12265 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12266 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12267 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12268 ; GFX10-NEXT: s_mov_b32 s33, s34
12269 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12270 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12272 ; GFX11-LABEL: test_call_external_void_func_v3f16_inreg:
12274 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12275 ; GFX11-NEXT: s_mov_b32 s0, s33
12276 ; GFX11-NEXT: s_mov_b32 s33, s32
12277 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12278 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12279 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12280 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12281 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12282 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12283 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12284 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
12285 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi
12286 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo
12287 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12288 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12289 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12290 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12291 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12292 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12293 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12294 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12295 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12296 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12297 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12298 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12299 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12300 ; GFX11-NEXT: s_mov_b32 s33, s0
12301 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12302 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12304 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg:
12305 ; GFX10-SCRATCH: ; %bb.0:
12306 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12307 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12308 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12309 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12310 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12311 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12312 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12313 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12314 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12315 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12316 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12317 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
12318 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi
12319 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo
12320 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12321 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12322 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12323 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12324 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12325 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12326 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12327 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12328 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12329 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12330 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12331 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12332 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12333 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12334 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12335 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12336 %val = load <3 x half>, ptr addrspace(4) undef
12337 call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val)
12341 define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 {
12342 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg:
12344 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12345 ; GFX9-NEXT: s_mov_b32 s34, s33
12346 ; GFX9-NEXT: s_mov_b32 s33, s32
12347 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12348 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12349 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12350 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12351 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12352 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12353 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12354 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi
12355 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo
12356 ; GFX9-NEXT: s_mov_b32 s4, 0x20001
12357 ; GFX9-NEXT: s_mov_b32 s5, 3
12358 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12359 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12360 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12361 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12362 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12363 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12364 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12365 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12366 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12367 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12368 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12369 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12370 ; GFX9-NEXT: s_mov_b32 s33, s34
12371 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12372 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12374 ; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg:
12376 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12377 ; GFX10-NEXT: s_mov_b32 s34, s33
12378 ; GFX10-NEXT: s_mov_b32 s33, s32
12379 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12380 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12381 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12382 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12383 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12384 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi
12385 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo
12386 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12387 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12388 ; GFX10-NEXT: s_mov_b32 s4, 0x20001
12389 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12390 ; GFX10-NEXT: s_mov_b32 s5, 3
12391 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12392 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12393 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12394 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12395 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12396 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12397 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12398 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12399 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12400 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12401 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12402 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12403 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12404 ; GFX10-NEXT: s_mov_b32 s33, s34
12405 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12406 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12408 ; GFX11-LABEL: test_call_external_void_func_v3i16_imm_inreg:
12410 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12411 ; GFX11-NEXT: s_mov_b32 s0, s33
12412 ; GFX11-NEXT: s_mov_b32 s33, s32
12413 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12414 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12415 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12416 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12417 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi
12418 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo
12419 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12420 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12421 ; GFX11-NEXT: s_mov_b32 s4, 0x20001
12422 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12423 ; GFX11-NEXT: s_mov_b32 s5, 3
12424 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12425 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12426 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12427 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12428 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12429 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12430 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12431 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12432 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12433 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12434 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12435 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12436 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12437 ; GFX11-NEXT: s_mov_b32 s33, s0
12438 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12439 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12441 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg:
12442 ; GFX10-SCRATCH: ; %bb.0:
12443 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12444 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12445 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12446 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12447 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12448 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12449 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12450 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12451 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi
12452 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo
12453 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12454 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12455 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
12456 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12457 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3
12458 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12459 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12460 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12461 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12462 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12463 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12464 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12465 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12466 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12467 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12468 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12469 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12470 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12471 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12472 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12473 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12474 call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg <i16 1, i16 2, i16 3>)
12478 define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 {
12479 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg:
12481 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12482 ; GFX9-NEXT: s_mov_b32 s34, s33
12483 ; GFX9-NEXT: s_mov_b32 s33, s32
12484 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12485 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12486 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12487 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12488 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12489 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12490 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12491 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi
12492 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo
12493 ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00
12494 ; GFX9-NEXT: s_movk_i32 s5, 0x4400
12495 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12496 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12497 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12498 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12499 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12500 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12501 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12502 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12503 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12504 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12505 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12506 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12507 ; GFX9-NEXT: s_mov_b32 s33, s34
12508 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12509 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12511 ; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg:
12513 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12514 ; GFX10-NEXT: s_mov_b32 s34, s33
12515 ; GFX10-NEXT: s_mov_b32 s33, s32
12516 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12517 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12518 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12519 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12520 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12521 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi
12522 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo
12523 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12524 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12525 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00
12526 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12527 ; GFX10-NEXT: s_movk_i32 s5, 0x4400
12528 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12529 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12530 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12531 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12532 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12533 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12534 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12535 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12536 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12537 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12538 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12539 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12540 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12541 ; GFX10-NEXT: s_mov_b32 s33, s34
12542 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12543 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12545 ; GFX11-LABEL: test_call_external_void_func_v3f16_imm_inreg:
12547 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12548 ; GFX11-NEXT: s_mov_b32 s0, s33
12549 ; GFX11-NEXT: s_mov_b32 s33, s32
12550 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12551 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12552 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12553 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12554 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi
12555 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo
12556 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12557 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12558 ; GFX11-NEXT: s_mov_b32 s4, 0x40003c00
12559 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12560 ; GFX11-NEXT: s_movk_i32 s5, 0x4400
12561 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12562 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12563 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12564 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12565 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12566 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12567 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12568 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12569 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12570 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12571 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12572 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12573 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12574 ; GFX11-NEXT: s_mov_b32 s33, s0
12575 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12576 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12578 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg:
12579 ; GFX10-SCRATCH: ; %bb.0:
12580 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12581 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12582 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12583 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12584 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12585 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12586 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12587 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12588 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi
12589 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo
12590 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12591 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12592 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00
12593 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12594 ; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400
12595 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12596 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12597 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12598 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12599 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12600 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12601 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12602 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12603 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12604 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12605 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12606 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12607 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12608 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12609 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12610 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12611 call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg <half 1.0, half 2.0, half 4.0>)
12615 define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 {
12616 ; GFX9-LABEL: test_call_external_void_func_v4i16_inreg:
12618 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12619 ; GFX9-NEXT: s_mov_b32 s34, s33
12620 ; GFX9-NEXT: s_mov_b32 s33, s32
12621 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12622 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12623 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12624 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12625 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12626 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12627 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12628 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12629 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi
12630 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo
12631 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12632 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12633 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12634 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12635 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12636 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12637 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12638 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12639 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12640 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12641 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12642 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12643 ; GFX9-NEXT: s_mov_b32 s33, s34
12644 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12645 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12647 ; GFX10-LABEL: test_call_external_void_func_v4i16_inreg:
12649 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12650 ; GFX10-NEXT: s_mov_b32 s34, s33
12651 ; GFX10-NEXT: s_mov_b32 s33, s32
12652 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12653 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12654 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12655 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12656 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12657 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12658 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12659 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12660 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12661 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi
12662 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo
12663 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12664 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12665 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12666 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12667 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12668 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12669 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12670 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12671 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12672 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12673 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12674 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12675 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12676 ; GFX10-NEXT: s_mov_b32 s33, s34
12677 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12678 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12680 ; GFX11-LABEL: test_call_external_void_func_v4i16_inreg:
12682 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12683 ; GFX11-NEXT: s_mov_b32 s0, s33
12684 ; GFX11-NEXT: s_mov_b32 s33, s32
12685 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12686 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12687 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12688 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12689 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12690 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12691 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12692 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
12693 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi
12694 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo
12695 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12696 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12697 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12698 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12699 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12700 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12701 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12702 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12703 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12704 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12705 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12706 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12707 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12708 ; GFX11-NEXT: s_mov_b32 s33, s0
12709 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12710 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12712 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg:
12713 ; GFX10-SCRATCH: ; %bb.0:
12714 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12715 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12716 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12717 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12718 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12719 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12720 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12721 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12722 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12723 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12724 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12725 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
12726 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi
12727 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo
12728 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12729 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12730 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12731 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12732 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12733 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12734 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12735 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12736 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12737 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12738 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12739 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12740 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12741 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12742 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12743 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12744 %val = load <4 x i16>, ptr addrspace(4) undef
12745 call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val)
12749 define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 {
12750 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg:
12752 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12753 ; GFX9-NEXT: s_mov_b32 s34, s33
12754 ; GFX9-NEXT: s_mov_b32 s33, s32
12755 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12756 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12757 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12758 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12759 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12760 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12761 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12762 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi
12763 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo
12764 ; GFX9-NEXT: s_mov_b32 s4, 0x20001
12765 ; GFX9-NEXT: s_mov_b32 s5, 0x40003
12766 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12767 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12768 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12769 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12770 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12771 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12772 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12773 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12774 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12775 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12776 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12777 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12778 ; GFX9-NEXT: s_mov_b32 s33, s34
12779 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12780 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12782 ; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg:
12784 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12785 ; GFX10-NEXT: s_mov_b32 s34, s33
12786 ; GFX10-NEXT: s_mov_b32 s33, s32
12787 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12788 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12789 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12790 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12791 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12792 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi
12793 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo
12794 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12795 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12796 ; GFX10-NEXT: s_mov_b32 s4, 0x20001
12797 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12798 ; GFX10-NEXT: s_mov_b32 s5, 0x40003
12799 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12800 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12801 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12802 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12803 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12804 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12805 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12806 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12807 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12808 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12809 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12810 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12811 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12812 ; GFX10-NEXT: s_mov_b32 s33, s34
12813 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12814 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12816 ; GFX11-LABEL: test_call_external_void_func_v4i16_imm_inreg:
12818 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12819 ; GFX11-NEXT: s_mov_b32 s0, s33
12820 ; GFX11-NEXT: s_mov_b32 s33, s32
12821 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12822 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12823 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12824 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12825 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi
12826 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo
12827 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12828 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12829 ; GFX11-NEXT: s_mov_b32 s4, 0x20001
12830 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12831 ; GFX11-NEXT: s_mov_b32 s5, 0x40003
12832 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12833 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12834 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12835 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12836 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12837 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12838 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12839 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12840 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12841 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12842 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12843 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12844 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12845 ; GFX11-NEXT: s_mov_b32 s33, s0
12846 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12847 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12849 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg:
12850 ; GFX10-SCRATCH: ; %bb.0:
12851 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12852 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12853 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12854 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12855 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12856 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12857 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12858 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12859 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi
12860 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo
12861 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12862 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12863 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
12864 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12865 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003
12866 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12867 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12868 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12869 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12870 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12871 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12872 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12873 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12874 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12875 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12876 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12877 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12878 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12879 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12880 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12881 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12882 call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg <i16 1, i16 2, i16 3, i16 4>)
12886 define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 {
12887 ; GFX9-LABEL: test_call_external_void_func_v2f16_inreg:
12889 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12890 ; GFX9-NEXT: s_mov_b32 s34, s33
12891 ; GFX9-NEXT: s_mov_b32 s33, s32
12892 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12893 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12894 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12895 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
12896 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12897 ; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0
12898 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
12899 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi
12900 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo
12901 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12902 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
12903 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12904 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
12905 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
12906 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12907 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
12908 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12909 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12910 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12911 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12912 ; GFX9-NEXT: s_mov_b32 s33, s34
12913 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12914 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12916 ; GFX10-LABEL: test_call_external_void_func_v2f16_inreg:
12918 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12919 ; GFX10-NEXT: s_mov_b32 s34, s33
12920 ; GFX10-NEXT: s_mov_b32 s33, s32
12921 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12922 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12923 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12924 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12925 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
12926 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12927 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12928 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0
12929 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi
12930 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo
12931 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
12932 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
12933 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12934 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
12935 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
12936 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12937 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
12938 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12939 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12940 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12941 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12942 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12943 ; GFX10-NEXT: s_mov_b32 s33, s34
12944 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12945 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12947 ; GFX11-LABEL: test_call_external_void_func_v2f16_inreg:
12949 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12950 ; GFX11-NEXT: s_mov_b32 s0, s33
12951 ; GFX11-NEXT: s_mov_b32 s33, s32
12952 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12953 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12954 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12955 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
12956 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12957 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12958 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0
12959 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi
12960 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo
12961 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
12962 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
12963 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12964 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12965 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
12966 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
12967 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12968 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
12969 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12970 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12971 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12972 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12973 ; GFX11-NEXT: s_mov_b32 s33, s0
12974 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12975 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12977 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg:
12978 ; GFX10-SCRATCH: ; %bb.0:
12979 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12980 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12981 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12982 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12983 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12984 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12985 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12986 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
12987 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12988 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12989 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
12990 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi
12991 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo
12992 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
12993 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
12994 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12995 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
12996 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
12997 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12998 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
12999 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13000 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13001 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13002 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13003 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13004 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13005 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13006 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13007 %val = load <2 x half>, ptr addrspace(4) undef
13008 call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val)
13012 define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 {
13013 ; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
13015 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13016 ; GFX9-NEXT: s_mov_b32 s34, s33
13017 ; GFX9-NEXT: s_mov_b32 s33, s32
13018 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13019 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13020 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13021 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
13022 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13023 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13024 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
13025 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
13026 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi
13027 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo
13028 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13029 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
13030 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13031 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
13032 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
13033 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13034 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13035 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
13036 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13037 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13038 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13039 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13040 ; GFX9-NEXT: s_mov_b32 s33, s34
13041 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13042 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13044 ; GFX10-LABEL: test_call_external_void_func_v2i32_inreg:
13046 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13047 ; GFX10-NEXT: s_mov_b32 s34, s33
13048 ; GFX10-NEXT: s_mov_b32 s33, s32
13049 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13050 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13051 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13052 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13053 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
13054 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13055 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13056 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13057 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
13058 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi
13059 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo
13060 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
13061 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
13062 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13063 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
13064 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
13065 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13066 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13067 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
13068 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13069 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13070 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13071 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13072 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13073 ; GFX10-NEXT: s_mov_b32 s33, s34
13074 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13075 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13077 ; GFX11-LABEL: test_call_external_void_func_v2i32_inreg:
13079 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13080 ; GFX11-NEXT: s_mov_b32 s0, s33
13081 ; GFX11-NEXT: s_mov_b32 s33, s32
13082 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13083 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13084 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13085 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
13086 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13087 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13088 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13089 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
13090 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi
13091 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo
13092 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
13093 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
13094 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13095 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13096 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
13097 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
13098 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13099 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13100 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
13101 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13102 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13103 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13104 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13105 ; GFX11-NEXT: s_mov_b32 s33, s0
13106 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13107 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13109 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg:
13110 ; GFX10-SCRATCH: ; %bb.0:
13111 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13112 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13113 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13114 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13115 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13116 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13117 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13118 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
13119 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13120 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13121 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13122 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
13123 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi
13124 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo
13125 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
13126 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
13127 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13128 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
13129 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
13130 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13131 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13132 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
13133 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13134 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13135 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13136 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13137 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13138 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13139 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13140 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13141 %val = load <2 x i32>, ptr addrspace(4) undef
13142 call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val)
13146 define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 {
13147 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg:
13149 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13150 ; GFX9-NEXT: s_mov_b32 s34, s33
13151 ; GFX9-NEXT: s_mov_b32 s33, s32
13152 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13153 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13154 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13155 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
13156 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13157 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13158 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
13159 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi
13160 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo
13161 ; GFX9-NEXT: s_mov_b32 s4, 1
13162 ; GFX9-NEXT: s_mov_b32 s5, 2
13163 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13164 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
13165 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13166 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
13167 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
13168 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13169 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13170 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
13171 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13172 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13173 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13174 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13175 ; GFX9-NEXT: s_mov_b32 s33, s34
13176 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13177 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13179 ; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg:
13181 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13182 ; GFX10-NEXT: s_mov_b32 s34, s33
13183 ; GFX10-NEXT: s_mov_b32 s33, s32
13184 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13185 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13186 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13187 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13188 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
13189 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi
13190 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo
13191 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13192 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13193 ; GFX10-NEXT: s_mov_b32 s4, 1
13194 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13195 ; GFX10-NEXT: s_mov_b32 s5, 2
13196 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
13197 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
13198 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13199 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
13200 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
13201 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13202 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13203 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
13204 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13205 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13206 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13207 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13208 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13209 ; GFX10-NEXT: s_mov_b32 s33, s34
13210 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13211 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13213 ; GFX11-LABEL: test_call_external_void_func_v2i32_imm_inreg:
13215 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13216 ; GFX11-NEXT: s_mov_b32 s0, s33
13217 ; GFX11-NEXT: s_mov_b32 s33, s32
13218 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13219 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13220 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13221 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
13222 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi
13223 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo
13224 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13225 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13226 ; GFX11-NEXT: s_mov_b32 s4, 1
13227 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13228 ; GFX11-NEXT: s_mov_b32 s5, 2
13229 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
13230 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
13231 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13232 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13233 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
13234 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
13235 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13236 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13237 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
13238 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13239 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13240 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13241 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13242 ; GFX11-NEXT: s_mov_b32 s33, s0
13243 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13244 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13246 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg:
13247 ; GFX10-SCRATCH: ; %bb.0:
13248 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13249 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13250 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13251 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13252 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13253 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13254 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13255 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
13256 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi
13257 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo
13258 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13259 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13260 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
13261 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13262 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
13263 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
13264 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
13265 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13266 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
13267 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
13268 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13269 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13270 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
13271 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13272 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13273 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13274 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13275 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13276 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13277 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13278 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13279 call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg <i32 1, i32 2>)
13283 define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 {
13284 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg:
13286 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13287 ; GFX9-NEXT: s_mov_b32 s34, s33
13288 ; GFX9-NEXT: s_mov_b32 s33, s32
13289 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13290 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13291 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13292 ; GFX9-NEXT: v_writelane_b32 v40, s34, 5
13293 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13294 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13295 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
13296 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3
13297 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi
13298 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo
13299 ; GFX9-NEXT: s_mov_b32 s4, 3
13300 ; GFX9-NEXT: s_mov_b32 s5, 4
13301 ; GFX9-NEXT: s_mov_b32 s6, 5
13302 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13303 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4
13304 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13305 ; GFX9-NEXT: v_readlane_b32 s31, v40, 4
13306 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3
13307 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
13308 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13309 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13310 ; GFX9-NEXT: v_readlane_b32 s34, v40, 5
13311 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13312 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13313 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13314 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13315 ; GFX9-NEXT: s_mov_b32 s33, s34
13316 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13317 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13319 ; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg:
13321 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13322 ; GFX10-NEXT: s_mov_b32 s34, s33
13323 ; GFX10-NEXT: s_mov_b32 s33, s32
13324 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13325 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13326 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13327 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13328 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5
13329 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi
13330 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo
13331 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13332 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13333 ; GFX10-NEXT: s_mov_b32 s4, 3
13334 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13335 ; GFX10-NEXT: s_mov_b32 s5, 4
13336 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
13337 ; GFX10-NEXT: s_mov_b32 s6, 5
13338 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3
13339 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4
13340 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13341 ; GFX10-NEXT: v_readlane_b32 s31, v40, 4
13342 ; GFX10-NEXT: v_readlane_b32 s30, v40, 3
13343 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
13344 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13345 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13346 ; GFX10-NEXT: v_readlane_b32 s34, v40, 5
13347 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13348 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13349 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13350 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13351 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13352 ; GFX10-NEXT: s_mov_b32 s33, s34
13353 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13354 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13356 ; GFX11-LABEL: test_call_external_void_func_v3i32_imm_inreg:
13358 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13359 ; GFX11-NEXT: s_mov_b32 s0, s33
13360 ; GFX11-NEXT: s_mov_b32 s33, s32
13361 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13362 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13363 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13364 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5
13365 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi
13366 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo
13367 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13368 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13369 ; GFX11-NEXT: s_mov_b32 s4, 3
13370 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13371 ; GFX11-NEXT: s_mov_b32 s5, 4
13372 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
13373 ; GFX11-NEXT: s_mov_b32 s6, 5
13374 ; GFX11-NEXT: v_writelane_b32 v40, s30, 3
13375 ; GFX11-NEXT: v_writelane_b32 v40, s31, 4
13376 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13377 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13378 ; GFX11-NEXT: v_readlane_b32 s31, v40, 4
13379 ; GFX11-NEXT: v_readlane_b32 s30, v40, 3
13380 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
13381 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13382 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13383 ; GFX11-NEXT: v_readlane_b32 s0, v40, 5
13384 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13385 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13386 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13387 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13388 ; GFX11-NEXT: s_mov_b32 s33, s0
13389 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13390 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13392 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg:
13393 ; GFX10-SCRATCH: ; %bb.0:
13394 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13395 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13396 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13397 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13398 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13399 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13400 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13401 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5
13402 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi
13403 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo
13404 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13405 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13406 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
13407 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13408 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
13409 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
13410 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
13411 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3
13412 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4
13413 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13414 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4
13415 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3
13416 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
13417 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13418 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13419 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 5
13420 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13421 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13422 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13423 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13424 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13425 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13426 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13427 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13428 call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>)
13432 define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 {
13433 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg:
13435 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13436 ; GFX9-NEXT: s_mov_b32 s34, s33
13437 ; GFX9-NEXT: s_mov_b32 s33, s32
13438 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13439 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13440 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13441 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
13442 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13443 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13444 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
13445 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
13446 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
13447 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi
13448 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo
13449 ; GFX9-NEXT: s_mov_b32 s4, 3
13450 ; GFX9-NEXT: s_mov_b32 s5, 4
13451 ; GFX9-NEXT: s_mov_b32 s6, 5
13452 ; GFX9-NEXT: s_mov_b32 s7, 6
13453 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13454 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
13455 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13456 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
13457 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
13458 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
13459 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
13460 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13461 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13462 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
13463 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13464 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13465 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13466 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13467 ; GFX9-NEXT: s_mov_b32 s33, s34
13468 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13469 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13471 ; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg:
13473 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13474 ; GFX10-NEXT: s_mov_b32 s34, s33
13475 ; GFX10-NEXT: s_mov_b32 s33, s32
13476 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13477 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13478 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13479 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13480 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
13481 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi
13482 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo
13483 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13484 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13485 ; GFX10-NEXT: s_mov_b32 s4, 3
13486 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13487 ; GFX10-NEXT: s_mov_b32 s5, 4
13488 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
13489 ; GFX10-NEXT: s_mov_b32 s6, 5
13490 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
13491 ; GFX10-NEXT: s_mov_b32 s7, 6
13492 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
13493 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
13494 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13495 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
13496 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
13497 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
13498 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
13499 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13500 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13501 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
13502 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13503 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13504 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13505 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13506 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13507 ; GFX10-NEXT: s_mov_b32 s33, s34
13508 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13509 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13511 ; GFX11-LABEL: test_call_external_void_func_v3i32_i32_inreg:
13513 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13514 ; GFX11-NEXT: s_mov_b32 s0, s33
13515 ; GFX11-NEXT: s_mov_b32 s33, s32
13516 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13517 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13518 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13519 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
13520 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi
13521 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo
13522 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13523 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13524 ; GFX11-NEXT: s_mov_b32 s4, 3
13525 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13526 ; GFX11-NEXT: s_mov_b32 s5, 4
13527 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
13528 ; GFX11-NEXT: s_mov_b32 s6, 5
13529 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
13530 ; GFX11-NEXT: s_mov_b32 s7, 6
13531 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
13532 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
13533 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13534 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13535 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
13536 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
13537 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
13538 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
13539 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13540 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13541 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
13542 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13543 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13544 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13545 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13546 ; GFX11-NEXT: s_mov_b32 s33, s0
13547 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13548 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13550 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg:
13551 ; GFX10-SCRATCH: ; %bb.0:
13552 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13553 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13554 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13555 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13556 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13557 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13558 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13559 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
13560 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi
13561 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo
13562 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13563 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13564 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
13565 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13566 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
13567 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
13568 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
13569 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
13570 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6
13571 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
13572 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
13573 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13574 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
13575 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
13576 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
13577 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
13578 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13579 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13580 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
13581 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13582 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13583 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13584 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13585 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13586 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13587 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13588 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13589 call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>, i32 inreg 6)
13593 define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 {
13594 ; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
13596 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13597 ; GFX9-NEXT: s_mov_b32 s34, s33
13598 ; GFX9-NEXT: s_mov_b32 s33, s32
13599 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13600 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13601 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13602 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
13603 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13604 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13605 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
13606 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
13607 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
13608 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
13609 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi
13610 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo
13611 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13612 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
13613 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13614 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
13615 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
13616 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
13617 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
13618 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13619 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13620 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
13621 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13622 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13623 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13624 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13625 ; GFX9-NEXT: s_mov_b32 s33, s34
13626 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13627 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13629 ; GFX10-LABEL: test_call_external_void_func_v4i32_inreg:
13631 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13632 ; GFX10-NEXT: s_mov_b32 s34, s33
13633 ; GFX10-NEXT: s_mov_b32 s33, s32
13634 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13635 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13636 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13637 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13638 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
13639 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13640 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13641 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13642 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
13643 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
13644 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
13645 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi
13646 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo
13647 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
13648 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
13649 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13650 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
13651 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
13652 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
13653 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
13654 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13655 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13656 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
13657 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13658 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13659 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13660 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13661 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13662 ; GFX10-NEXT: s_mov_b32 s33, s34
13663 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13664 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13666 ; GFX11-LABEL: test_call_external_void_func_v4i32_inreg:
13668 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13669 ; GFX11-NEXT: s_mov_b32 s0, s33
13670 ; GFX11-NEXT: s_mov_b32 s33, s32
13671 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13672 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13673 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13674 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
13675 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13676 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13677 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13678 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
13679 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
13680 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
13681 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi
13682 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo
13683 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
13684 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
13685 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13686 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13687 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
13688 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
13689 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
13690 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
13691 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13692 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13693 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
13694 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13695 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13696 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13697 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13698 ; GFX11-NEXT: s_mov_b32 s33, s0
13699 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13700 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13702 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg:
13703 ; GFX10-SCRATCH: ; %bb.0:
13704 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13705 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13706 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13707 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13708 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13709 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13710 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13711 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
13712 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13713 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13714 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13715 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
13716 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
13717 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
13718 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi
13719 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo
13720 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
13721 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
13722 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13723 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
13724 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
13725 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
13726 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
13727 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13728 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13729 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
13730 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13731 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13732 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13733 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13734 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13735 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13736 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13737 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13738 %val = load <4 x i32>, ptr addrspace(4) undef
13739 call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val)
13743 define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 {
13744 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg:
13746 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13747 ; GFX9-NEXT: s_mov_b32 s34, s33
13748 ; GFX9-NEXT: s_mov_b32 s33, s32
13749 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13750 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13751 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13752 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
13753 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13754 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13755 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
13756 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
13757 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
13758 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi
13759 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo
13760 ; GFX9-NEXT: s_mov_b32 s4, 1
13761 ; GFX9-NEXT: s_mov_b32 s5, 2
13762 ; GFX9-NEXT: s_mov_b32 s6, 3
13763 ; GFX9-NEXT: s_mov_b32 s7, 4
13764 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13765 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
13766 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13767 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
13768 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
13769 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
13770 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
13771 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13772 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13773 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
13774 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13775 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13776 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13777 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13778 ; GFX9-NEXT: s_mov_b32 s33, s34
13779 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13780 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13782 ; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg:
13784 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13785 ; GFX10-NEXT: s_mov_b32 s34, s33
13786 ; GFX10-NEXT: s_mov_b32 s33, s32
13787 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13788 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13789 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13790 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13791 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
13792 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi
13793 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo
13794 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13795 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13796 ; GFX10-NEXT: s_mov_b32 s4, 1
13797 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13798 ; GFX10-NEXT: s_mov_b32 s5, 2
13799 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
13800 ; GFX10-NEXT: s_mov_b32 s6, 3
13801 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
13802 ; GFX10-NEXT: s_mov_b32 s7, 4
13803 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
13804 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
13805 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13806 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
13807 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
13808 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
13809 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
13810 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13811 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13812 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
13813 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13814 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13815 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13816 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13817 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13818 ; GFX10-NEXT: s_mov_b32 s33, s34
13819 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13820 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13822 ; GFX11-LABEL: test_call_external_void_func_v4i32_imm_inreg:
13824 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13825 ; GFX11-NEXT: s_mov_b32 s0, s33
13826 ; GFX11-NEXT: s_mov_b32 s33, s32
13827 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13828 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13829 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13830 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
13831 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi
13832 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo
13833 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13834 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13835 ; GFX11-NEXT: s_mov_b32 s4, 1
13836 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13837 ; GFX11-NEXT: s_mov_b32 s5, 2
13838 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
13839 ; GFX11-NEXT: s_mov_b32 s6, 3
13840 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
13841 ; GFX11-NEXT: s_mov_b32 s7, 4
13842 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
13843 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
13844 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13845 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13846 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
13847 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
13848 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
13849 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
13850 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13851 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13852 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
13853 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13854 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13855 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13856 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13857 ; GFX11-NEXT: s_mov_b32 s33, s0
13858 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13859 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13861 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg:
13862 ; GFX10-SCRATCH: ; %bb.0:
13863 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13864 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13865 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13866 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13867 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13868 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13869 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13870 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
13871 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi
13872 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo
13873 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13874 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13875 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
13876 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13877 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
13878 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
13879 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
13880 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
13881 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
13882 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
13883 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
13884 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13885 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
13886 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
13887 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
13888 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
13889 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13890 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13891 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
13892 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13893 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13894 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13895 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13896 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13897 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13898 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13899 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13900 call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg <i32 1, i32 2, i32 3, i32 4>)
13904 define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 {
13905 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg:
13907 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13908 ; GFX9-NEXT: s_mov_b32 s34, s33
13909 ; GFX9-NEXT: s_mov_b32 s33, s32
13910 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13911 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13912 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13913 ; GFX9-NEXT: v_writelane_b32 v40, s34, 7
13914 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13915 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13916 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
13917 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
13918 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
13919 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5
13920 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi
13921 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo
13922 ; GFX9-NEXT: s_mov_b32 s4, 1
13923 ; GFX9-NEXT: s_mov_b32 s5, 2
13924 ; GFX9-NEXT: s_mov_b32 s6, 3
13925 ; GFX9-NEXT: s_mov_b32 s7, 4
13926 ; GFX9-NEXT: s_mov_b32 s8, 5
13927 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13928 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6
13929 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13930 ; GFX9-NEXT: v_readlane_b32 s31, v40, 6
13931 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5
13932 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
13933 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
13934 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
13935 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13936 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13937 ; GFX9-NEXT: v_readlane_b32 s34, v40, 7
13938 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13939 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13940 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13941 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13942 ; GFX9-NEXT: s_mov_b32 s33, s34
13943 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13944 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13946 ; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg:
13948 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13949 ; GFX10-NEXT: s_mov_b32 s34, s33
13950 ; GFX10-NEXT: s_mov_b32 s33, s32
13951 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13952 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13953 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13954 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13955 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7
13956 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi
13957 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo
13958 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13959 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13960 ; GFX10-NEXT: s_mov_b32 s4, 1
13961 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13962 ; GFX10-NEXT: s_mov_b32 s5, 2
13963 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
13964 ; GFX10-NEXT: s_mov_b32 s6, 3
13965 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
13966 ; GFX10-NEXT: s_mov_b32 s7, 4
13967 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
13968 ; GFX10-NEXT: s_mov_b32 s8, 5
13969 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5
13970 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6
13971 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13972 ; GFX10-NEXT: v_readlane_b32 s31, v40, 6
13973 ; GFX10-NEXT: v_readlane_b32 s30, v40, 5
13974 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
13975 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
13976 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
13977 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13978 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13979 ; GFX10-NEXT: v_readlane_b32 s34, v40, 7
13980 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13981 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13982 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13983 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13984 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13985 ; GFX10-NEXT: s_mov_b32 s33, s34
13986 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13987 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13989 ; GFX11-LABEL: test_call_external_void_func_v5i32_imm_inreg:
13991 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13992 ; GFX11-NEXT: s_mov_b32 s0, s33
13993 ; GFX11-NEXT: s_mov_b32 s33, s32
13994 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13995 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13996 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13997 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7
13998 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi
13999 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo
14000 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14001 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14002 ; GFX11-NEXT: s_mov_b32 s4, 1
14003 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14004 ; GFX11-NEXT: s_mov_b32 s5, 2
14005 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14006 ; GFX11-NEXT: s_mov_b32 s6, 3
14007 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14008 ; GFX11-NEXT: s_mov_b32 s7, 4
14009 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
14010 ; GFX11-NEXT: s_mov_b32 s8, 5
14011 ; GFX11-NEXT: v_writelane_b32 v40, s30, 5
14012 ; GFX11-NEXT: v_writelane_b32 v40, s31, 6
14013 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14014 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14015 ; GFX11-NEXT: v_readlane_b32 s31, v40, 6
14016 ; GFX11-NEXT: v_readlane_b32 s30, v40, 5
14017 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
14018 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14019 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14020 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14021 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14022 ; GFX11-NEXT: v_readlane_b32 s0, v40, 7
14023 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14024 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14025 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14026 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14027 ; GFX11-NEXT: s_mov_b32 s33, s0
14028 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14029 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14031 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg:
14032 ; GFX10-SCRATCH: ; %bb.0:
14033 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14034 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14035 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14036 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14037 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14038 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14039 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14040 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7
14041 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi
14042 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo
14043 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14044 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14045 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
14046 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14047 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
14048 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14049 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
14050 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14051 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
14052 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
14053 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
14054 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5
14055 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6
14056 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14057 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6
14058 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5
14059 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
14060 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14061 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14062 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14063 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14064 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 7
14065 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14066 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14067 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14068 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14069 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14070 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14071 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14072 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14073 call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5>)
14077 define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 {
14078 ; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
14080 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14081 ; GFX9-NEXT: s_mov_b32 s34, s33
14082 ; GFX9-NEXT: s_mov_b32 s33, s32
14083 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14084 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14085 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14086 ; GFX9-NEXT: v_writelane_b32 v40, s34, 10
14087 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14088 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14089 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14090 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14091 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14092 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
14093 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
14094 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
14095 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
14096 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
14097 ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0
14098 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8
14099 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi
14100 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo
14101 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14102 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9
14103 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14104 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9
14105 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8
14106 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
14107 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
14108 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
14109 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
14110 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14111 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14112 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14113 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14114 ; GFX9-NEXT: v_readlane_b32 s34, v40, 10
14115 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14116 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14117 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14118 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14119 ; GFX9-NEXT: s_mov_b32 s33, s34
14120 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14121 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14123 ; GFX10-LABEL: test_call_external_void_func_v8i32_inreg:
14125 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14126 ; GFX10-NEXT: s_mov_b32 s34, s33
14127 ; GFX10-NEXT: s_mov_b32 s33, s32
14128 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14129 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14130 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14131 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14132 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10
14133 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14134 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14135 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14136 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14137 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14138 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14139 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
14140 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
14141 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
14142 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
14143 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
14144 ; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0
14145 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi
14146 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo
14147 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8
14148 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9
14149 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14150 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9
14151 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8
14152 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
14153 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
14154 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
14155 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
14156 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14157 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14158 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14159 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14160 ; GFX10-NEXT: v_readlane_b32 s34, v40, 10
14161 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14162 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14163 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14164 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14165 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14166 ; GFX10-NEXT: s_mov_b32 s33, s34
14167 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14168 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14170 ; GFX11-LABEL: test_call_external_void_func_v8i32_inreg:
14172 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14173 ; GFX11-NEXT: s_mov_b32 s0, s33
14174 ; GFX11-NEXT: s_mov_b32 s33, s32
14175 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14176 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14177 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14178 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10
14179 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
14180 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14181 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14182 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14183 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14184 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14185 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
14186 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
14187 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
14188 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
14189 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
14190 ; GFX11-NEXT: s_load_b256 s[4:11], s[0:1], 0x0
14191 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi
14192 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo
14193 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8
14194 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9
14195 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14196 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14197 ; GFX11-NEXT: v_readlane_b32 s31, v40, 9
14198 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8
14199 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
14200 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
14201 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
14202 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
14203 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14204 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14205 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14206 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14207 ; GFX11-NEXT: v_readlane_b32 s0, v40, 10
14208 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14209 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14210 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14211 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14212 ; GFX11-NEXT: s_mov_b32 s33, s0
14213 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14214 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14216 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg:
14217 ; GFX10-SCRATCH: ; %bb.0:
14218 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14219 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14220 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14221 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14222 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14223 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14224 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14225 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10
14226 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
14227 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14228 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14229 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14230 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14231 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14232 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
14233 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
14234 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
14235 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
14236 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
14237 ; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
14238 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi
14239 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo
14240 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
14241 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
14242 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14243 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
14244 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
14245 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
14246 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
14247 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
14248 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
14249 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14250 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14251 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14252 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14253 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10
14254 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14255 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14256 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14257 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14258 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14259 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14260 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14261 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14262 %ptr = load ptr addrspace(4), ptr addrspace(4) undef
14263 %val = load <8 x i32>, ptr addrspace(4) %ptr
14264 call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val)
14268 define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 {
14269 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg:
14271 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14272 ; GFX9-NEXT: s_mov_b32 s34, s33
14273 ; GFX9-NEXT: s_mov_b32 s33, s32
14274 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14275 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14276 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14277 ; GFX9-NEXT: v_writelane_b32 v40, s34, 10
14278 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14279 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14280 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14281 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14282 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
14283 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
14284 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
14285 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
14286 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8
14287 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi
14288 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo
14289 ; GFX9-NEXT: s_mov_b32 s4, 1
14290 ; GFX9-NEXT: s_mov_b32 s5, 2
14291 ; GFX9-NEXT: s_mov_b32 s6, 3
14292 ; GFX9-NEXT: s_mov_b32 s7, 4
14293 ; GFX9-NEXT: s_mov_b32 s8, 5
14294 ; GFX9-NEXT: s_mov_b32 s9, 6
14295 ; GFX9-NEXT: s_mov_b32 s10, 7
14296 ; GFX9-NEXT: s_mov_b32 s11, 8
14297 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14298 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9
14299 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14300 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9
14301 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8
14302 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
14303 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
14304 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
14305 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
14306 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14307 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14308 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14309 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14310 ; GFX9-NEXT: v_readlane_b32 s34, v40, 10
14311 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14312 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14313 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14314 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14315 ; GFX9-NEXT: s_mov_b32 s33, s34
14316 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14317 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14319 ; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg:
14321 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14322 ; GFX10-NEXT: s_mov_b32 s34, s33
14323 ; GFX10-NEXT: s_mov_b32 s33, s32
14324 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14325 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14326 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14327 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14328 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10
14329 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi
14330 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo
14331 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14332 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14333 ; GFX10-NEXT: s_mov_b32 s4, 1
14334 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14335 ; GFX10-NEXT: s_mov_b32 s5, 2
14336 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14337 ; GFX10-NEXT: s_mov_b32 s6, 3
14338 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14339 ; GFX10-NEXT: s_mov_b32 s7, 4
14340 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
14341 ; GFX10-NEXT: s_mov_b32 s8, 5
14342 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
14343 ; GFX10-NEXT: s_mov_b32 s9, 6
14344 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
14345 ; GFX10-NEXT: s_mov_b32 s10, 7
14346 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
14347 ; GFX10-NEXT: s_mov_b32 s11, 8
14348 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8
14349 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9
14350 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14351 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9
14352 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8
14353 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
14354 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
14355 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
14356 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
14357 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14358 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14359 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14360 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14361 ; GFX10-NEXT: v_readlane_b32 s34, v40, 10
14362 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14363 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14364 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14365 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14366 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14367 ; GFX10-NEXT: s_mov_b32 s33, s34
14368 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14369 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14371 ; GFX11-LABEL: test_call_external_void_func_v8i32_imm_inreg:
14373 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14374 ; GFX11-NEXT: s_mov_b32 s0, s33
14375 ; GFX11-NEXT: s_mov_b32 s33, s32
14376 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14377 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14378 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14379 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10
14380 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi
14381 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo
14382 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14383 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14384 ; GFX11-NEXT: s_mov_b32 s4, 1
14385 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14386 ; GFX11-NEXT: s_mov_b32 s5, 2
14387 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14388 ; GFX11-NEXT: s_mov_b32 s6, 3
14389 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14390 ; GFX11-NEXT: s_mov_b32 s7, 4
14391 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
14392 ; GFX11-NEXT: s_mov_b32 s8, 5
14393 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
14394 ; GFX11-NEXT: s_mov_b32 s9, 6
14395 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
14396 ; GFX11-NEXT: s_mov_b32 s10, 7
14397 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
14398 ; GFX11-NEXT: s_mov_b32 s11, 8
14399 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8
14400 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9
14401 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14402 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14403 ; GFX11-NEXT: v_readlane_b32 s31, v40, 9
14404 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8
14405 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
14406 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
14407 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
14408 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
14409 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14410 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14411 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14412 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14413 ; GFX11-NEXT: v_readlane_b32 s0, v40, 10
14414 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14415 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14416 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14417 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14418 ; GFX11-NEXT: s_mov_b32 s33, s0
14419 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14420 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14422 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg:
14423 ; GFX10-SCRATCH: ; %bb.0:
14424 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14425 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14426 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14427 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14428 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14429 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14430 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14431 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10
14432 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi
14433 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo
14434 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14435 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14436 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
14437 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14438 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
14439 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14440 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
14441 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14442 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
14443 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
14444 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
14445 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
14446 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6
14447 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
14448 ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7
14449 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
14450 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8
14451 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
14452 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
14453 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14454 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
14455 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
14456 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
14457 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
14458 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
14459 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
14460 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14461 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14462 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14463 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14464 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10
14465 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14466 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14467 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14468 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14469 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14470 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14471 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14472 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14473 call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
14477 define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 {
14478 ; GFX9-LABEL: test_call_external_void_func_v16i32_inreg:
14480 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14481 ; GFX9-NEXT: s_mov_b32 s34, s33
14482 ; GFX9-NEXT: s_mov_b32 s33, s32
14483 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14484 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14485 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14486 ; GFX9-NEXT: v_writelane_b32 v40, s34, 18
14487 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14488 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14489 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14490 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14491 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
14492 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
14493 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
14494 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
14495 ; GFX9-NEXT: v_writelane_b32 v40, s12, 8
14496 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9
14497 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10
14498 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14499 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11
14500 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12
14501 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13
14502 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14
14503 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15
14504 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
14505 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
14506 ; GFX9-NEXT: v_writelane_b32 v40, s30, 16
14507 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32_inreg@abs32@hi
14508 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32_inreg@abs32@lo
14509 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14510 ; GFX9-NEXT: v_writelane_b32 v40, s31, 17
14511 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14512 ; GFX9-NEXT: v_readlane_b32 s31, v40, 17
14513 ; GFX9-NEXT: v_readlane_b32 s30, v40, 16
14514 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15
14515 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14
14516 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13
14517 ; GFX9-NEXT: v_readlane_b32 s16, v40, 12
14518 ; GFX9-NEXT: v_readlane_b32 s15, v40, 11
14519 ; GFX9-NEXT: v_readlane_b32 s14, v40, 10
14520 ; GFX9-NEXT: v_readlane_b32 s13, v40, 9
14521 ; GFX9-NEXT: v_readlane_b32 s12, v40, 8
14522 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
14523 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
14524 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
14525 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
14526 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14527 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14528 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14529 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14530 ; GFX9-NEXT: v_readlane_b32 s34, v40, 18
14531 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14532 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14533 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14534 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14535 ; GFX9-NEXT: s_mov_b32 s33, s34
14536 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14537 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14539 ; GFX10-LABEL: test_call_external_void_func_v16i32_inreg:
14541 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14542 ; GFX10-NEXT: s_mov_b32 s34, s33
14543 ; GFX10-NEXT: s_mov_b32 s33, s32
14544 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14545 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14546 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14547 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14548 ; GFX10-NEXT: v_writelane_b32 v40, s34, 18
14549 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14550 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14551 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14552 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14553 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14554 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14555 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
14556 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
14557 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
14558 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
14559 ; GFX10-NEXT: v_writelane_b32 v40, s12, 8
14560 ; GFX10-NEXT: v_writelane_b32 v40, s13, 9
14561 ; GFX10-NEXT: v_writelane_b32 v40, s14, 10
14562 ; GFX10-NEXT: v_writelane_b32 v40, s15, 11
14563 ; GFX10-NEXT: v_writelane_b32 v40, s16, 12
14564 ; GFX10-NEXT: v_writelane_b32 v40, s17, 13
14565 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14
14566 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15
14567 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
14568 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
14569 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32_inreg@abs32@hi
14570 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32_inreg@abs32@lo
14571 ; GFX10-NEXT: v_writelane_b32 v40, s30, 16
14572 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17
14573 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14574 ; GFX10-NEXT: v_readlane_b32 s31, v40, 17
14575 ; GFX10-NEXT: v_readlane_b32 s30, v40, 16
14576 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15
14577 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14
14578 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13
14579 ; GFX10-NEXT: v_readlane_b32 s16, v40, 12
14580 ; GFX10-NEXT: v_readlane_b32 s15, v40, 11
14581 ; GFX10-NEXT: v_readlane_b32 s14, v40, 10
14582 ; GFX10-NEXT: v_readlane_b32 s13, v40, 9
14583 ; GFX10-NEXT: v_readlane_b32 s12, v40, 8
14584 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
14585 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
14586 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
14587 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
14588 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14589 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14590 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14591 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14592 ; GFX10-NEXT: v_readlane_b32 s34, v40, 18
14593 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14594 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14595 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14596 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14597 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14598 ; GFX10-NEXT: s_mov_b32 s33, s34
14599 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14600 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14602 ; GFX11-LABEL: test_call_external_void_func_v16i32_inreg:
14604 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14605 ; GFX11-NEXT: s_mov_b32 s0, s33
14606 ; GFX11-NEXT: s_mov_b32 s33, s32
14607 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14608 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14609 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14610 ; GFX11-NEXT: v_writelane_b32 v40, s0, 18
14611 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
14612 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14613 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14614 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14615 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14616 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14617 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
14618 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
14619 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
14620 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
14621 ; GFX11-NEXT: v_writelane_b32 v40, s12, 8
14622 ; GFX11-NEXT: v_writelane_b32 v40, s13, 9
14623 ; GFX11-NEXT: v_writelane_b32 v40, s14, 10
14624 ; GFX11-NEXT: v_writelane_b32 v40, s15, 11
14625 ; GFX11-NEXT: v_writelane_b32 v40, s16, 12
14626 ; GFX11-NEXT: v_writelane_b32 v40, s17, 13
14627 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14
14628 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15
14629 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
14630 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0
14631 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32_inreg@abs32@hi
14632 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32_inreg@abs32@lo
14633 ; GFX11-NEXT: v_writelane_b32 v40, s30, 16
14634 ; GFX11-NEXT: v_writelane_b32 v40, s31, 17
14635 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14636 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14637 ; GFX11-NEXT: v_readlane_b32 s31, v40, 17
14638 ; GFX11-NEXT: v_readlane_b32 s30, v40, 16
14639 ; GFX11-NEXT: v_readlane_b32 s19, v40, 15
14640 ; GFX11-NEXT: v_readlane_b32 s18, v40, 14
14641 ; GFX11-NEXT: v_readlane_b32 s17, v40, 13
14642 ; GFX11-NEXT: v_readlane_b32 s16, v40, 12
14643 ; GFX11-NEXT: v_readlane_b32 s15, v40, 11
14644 ; GFX11-NEXT: v_readlane_b32 s14, v40, 10
14645 ; GFX11-NEXT: v_readlane_b32 s13, v40, 9
14646 ; GFX11-NEXT: v_readlane_b32 s12, v40, 8
14647 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
14648 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
14649 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
14650 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
14651 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14652 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14653 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14654 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14655 ; GFX11-NEXT: v_readlane_b32 s0, v40, 18
14656 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14657 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14658 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14659 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14660 ; GFX11-NEXT: s_mov_b32 s33, s0
14661 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14662 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14664 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg:
14665 ; GFX10-SCRATCH: ; %bb.0:
14666 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14667 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14668 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14669 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14670 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14671 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14672 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14673 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 18
14674 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
14675 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14676 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14677 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14678 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14679 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14680 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
14681 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
14682 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
14683 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
14684 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
14685 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
14686 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
14687 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
14688 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
14689 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
14690 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
14691 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
14692 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
14693 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
14694 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32_inreg@abs32@hi
14695 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32_inreg@abs32@lo
14696 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16
14697 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17
14698 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14699 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17
14700 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16
14701 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
14702 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
14703 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
14704 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
14705 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
14706 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
14707 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
14708 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
14709 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
14710 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
14711 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
14712 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
14713 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14714 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14715 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14716 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14717 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 18
14718 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14719 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14720 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14721 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14722 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14723 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14724 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14725 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14726 %ptr = load ptr addrspace(4), ptr addrspace(4) undef
14727 %val = load <16 x i32>, ptr addrspace(4) %ptr
14728 call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val)
14732 define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 {
14733 ; GFX9-LABEL: test_call_external_void_func_v32i32_inreg:
14735 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14736 ; GFX9-NEXT: s_mov_b32 s34, s33
14737 ; GFX9-NEXT: s_mov_b32 s33, s32
14738 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14739 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14740 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14741 ; GFX9-NEXT: v_writelane_b32 v40, s34, 28
14742 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14743 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14744 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14745 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14746 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
14747 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
14748 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
14749 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
14750 ; GFX9-NEXT: v_writelane_b32 v40, s12, 8
14751 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9
14752 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10
14753 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11
14754 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12
14755 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13
14756 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14
14757 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15
14758 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14759 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16
14760 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17
14761 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18
14762 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19
14763 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20
14764 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
14765 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40
14766 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
14767 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21
14768 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22
14769 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23
14770 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14771 ; GFX9-NEXT: v_writelane_b32 v40, s28, 24
14772 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
14773 ; GFX9-NEXT: v_mov_b32_e32 v0, s46
14774 ; GFX9-NEXT: v_writelane_b32 v40, s29, 25
14775 ; GFX9-NEXT: v_mov_b32_e32 v1, s47
14776 ; GFX9-NEXT: v_mov_b32_e32 v2, s48
14777 ; GFX9-NEXT: v_mov_b32_e32 v3, s49
14778 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
14779 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
14780 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
14781 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
14782 ; GFX9-NEXT: v_mov_b32_e32 v0, s50
14783 ; GFX9-NEXT: v_writelane_b32 v40, s30, 26
14784 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
14785 ; GFX9-NEXT: v_mov_b32_e32 v0, s51
14786 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi
14787 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo
14788 ; GFX9-NEXT: s_mov_b32 s20, s36
14789 ; GFX9-NEXT: s_mov_b32 s21, s37
14790 ; GFX9-NEXT: s_mov_b32 s22, s38
14791 ; GFX9-NEXT: s_mov_b32 s23, s39
14792 ; GFX9-NEXT: s_mov_b32 s24, s40
14793 ; GFX9-NEXT: s_mov_b32 s25, s41
14794 ; GFX9-NEXT: s_mov_b32 s26, s42
14795 ; GFX9-NEXT: s_mov_b32 s27, s43
14796 ; GFX9-NEXT: s_mov_b32 s28, s44
14797 ; GFX9-NEXT: s_mov_b32 s29, s45
14798 ; GFX9-NEXT: v_writelane_b32 v40, s31, 27
14799 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
14800 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14801 ; GFX9-NEXT: v_readlane_b32 s31, v40, 27
14802 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26
14803 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25
14804 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24
14805 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23
14806 ; GFX9-NEXT: v_readlane_b32 s26, v40, 22
14807 ; GFX9-NEXT: v_readlane_b32 s25, v40, 21
14808 ; GFX9-NEXT: v_readlane_b32 s24, v40, 20
14809 ; GFX9-NEXT: v_readlane_b32 s23, v40, 19
14810 ; GFX9-NEXT: v_readlane_b32 s22, v40, 18
14811 ; GFX9-NEXT: v_readlane_b32 s21, v40, 17
14812 ; GFX9-NEXT: v_readlane_b32 s20, v40, 16
14813 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15
14814 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14
14815 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13
14816 ; GFX9-NEXT: v_readlane_b32 s16, v40, 12
14817 ; GFX9-NEXT: v_readlane_b32 s15, v40, 11
14818 ; GFX9-NEXT: v_readlane_b32 s14, v40, 10
14819 ; GFX9-NEXT: v_readlane_b32 s13, v40, 9
14820 ; GFX9-NEXT: v_readlane_b32 s12, v40, 8
14821 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
14822 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
14823 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
14824 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
14825 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14826 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14827 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14828 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14829 ; GFX9-NEXT: v_readlane_b32 s34, v40, 28
14830 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14831 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14832 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14833 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14834 ; GFX9-NEXT: s_mov_b32 s33, s34
14835 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14836 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14838 ; GFX10-LABEL: test_call_external_void_func_v32i32_inreg:
14840 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14841 ; GFX10-NEXT: s_mov_b32 s34, s33
14842 ; GFX10-NEXT: s_mov_b32 s33, s32
14843 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14844 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14845 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14846 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14847 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28
14848 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14849 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14850 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14851 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14852 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14853 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14854 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
14855 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
14856 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
14857 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
14858 ; GFX10-NEXT: v_writelane_b32 v40, s12, 8
14859 ; GFX10-NEXT: v_writelane_b32 v40, s13, 9
14860 ; GFX10-NEXT: v_writelane_b32 v40, s14, 10
14861 ; GFX10-NEXT: v_writelane_b32 v40, s15, 11
14862 ; GFX10-NEXT: v_writelane_b32 v40, s16, 12
14863 ; GFX10-NEXT: v_writelane_b32 v40, s17, 13
14864 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14
14865 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15
14866 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
14867 ; GFX10-NEXT: s_clause 0x1
14868 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40
14869 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
14870 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi
14871 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo
14872 ; GFX10-NEXT: v_writelane_b32 v40, s20, 16
14873 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17
14874 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18
14875 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
14876 ; GFX10-NEXT: v_mov_b32_e32 v0, s46
14877 ; GFX10-NEXT: v_writelane_b32 v40, s23, 19
14878 ; GFX10-NEXT: v_mov_b32_e32 v1, s47
14879 ; GFX10-NEXT: v_mov_b32_e32 v2, s48
14880 ; GFX10-NEXT: v_mov_b32_e32 v3, s49
14881 ; GFX10-NEXT: s_mov_b32 s20, s36
14882 ; GFX10-NEXT: v_writelane_b32 v40, s24, 20
14883 ; GFX10-NEXT: s_mov_b32 s21, s37
14884 ; GFX10-NEXT: s_mov_b32 s22, s38
14885 ; GFX10-NEXT: s_mov_b32 s23, s39
14886 ; GFX10-NEXT: s_mov_b32 s24, s40
14887 ; GFX10-NEXT: v_writelane_b32 v40, s25, 21
14888 ; GFX10-NEXT: s_mov_b32 s25, s41
14889 ; GFX10-NEXT: v_mov_b32_e32 v4, s50
14890 ; GFX10-NEXT: v_mov_b32_e32 v5, s51
14891 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
14892 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
14893 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
14894 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
14895 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16
14896 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20
14897 ; GFX10-NEXT: v_writelane_b32 v40, s26, 22
14898 ; GFX10-NEXT: s_mov_b32 s26, s42
14899 ; GFX10-NEXT: v_writelane_b32 v40, s27, 23
14900 ; GFX10-NEXT: s_mov_b32 s27, s43
14901 ; GFX10-NEXT: v_writelane_b32 v40, s28, 24
14902 ; GFX10-NEXT: s_mov_b32 s28, s44
14903 ; GFX10-NEXT: v_writelane_b32 v40, s29, 25
14904 ; GFX10-NEXT: s_mov_b32 s29, s45
14905 ; GFX10-NEXT: v_writelane_b32 v40, s30, 26
14906 ; GFX10-NEXT: v_writelane_b32 v40, s31, 27
14907 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14908 ; GFX10-NEXT: v_readlane_b32 s31, v40, 27
14909 ; GFX10-NEXT: v_readlane_b32 s30, v40, 26
14910 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25
14911 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24
14912 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23
14913 ; GFX10-NEXT: v_readlane_b32 s26, v40, 22
14914 ; GFX10-NEXT: v_readlane_b32 s25, v40, 21
14915 ; GFX10-NEXT: v_readlane_b32 s24, v40, 20
14916 ; GFX10-NEXT: v_readlane_b32 s23, v40, 19
14917 ; GFX10-NEXT: v_readlane_b32 s22, v40, 18
14918 ; GFX10-NEXT: v_readlane_b32 s21, v40, 17
14919 ; GFX10-NEXT: v_readlane_b32 s20, v40, 16
14920 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15
14921 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14
14922 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13
14923 ; GFX10-NEXT: v_readlane_b32 s16, v40, 12
14924 ; GFX10-NEXT: v_readlane_b32 s15, v40, 11
14925 ; GFX10-NEXT: v_readlane_b32 s14, v40, 10
14926 ; GFX10-NEXT: v_readlane_b32 s13, v40, 9
14927 ; GFX10-NEXT: v_readlane_b32 s12, v40, 8
14928 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
14929 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
14930 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
14931 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
14932 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14933 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14934 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14935 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14936 ; GFX10-NEXT: v_readlane_b32 s34, v40, 28
14937 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14938 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14939 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14940 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14941 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14942 ; GFX10-NEXT: s_mov_b32 s33, s34
14943 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14944 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14946 ; GFX11-LABEL: test_call_external_void_func_v32i32_inreg:
14948 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14949 ; GFX11-NEXT: s_mov_b32 s0, s33
14950 ; GFX11-NEXT: s_mov_b32 s33, s32
14951 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14952 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14953 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14954 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28
14955 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
14956 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14957 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
14958 ; GFX11-NEXT: s_add_i32 s2, s32, 16
14959 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14960 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14961 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14962 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14963 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
14964 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
14965 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
14966 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
14967 ; GFX11-NEXT: v_writelane_b32 v40, s12, 8
14968 ; GFX11-NEXT: v_writelane_b32 v40, s13, 9
14969 ; GFX11-NEXT: v_writelane_b32 v40, s14, 10
14970 ; GFX11-NEXT: v_writelane_b32 v40, s15, 11
14971 ; GFX11-NEXT: v_writelane_b32 v40, s16, 12
14972 ; GFX11-NEXT: v_writelane_b32 v40, s17, 13
14973 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14
14974 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15
14975 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
14976 ; GFX11-NEXT: s_clause 0x1
14977 ; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40
14978 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0
14979 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_inreg@abs32@hi
14980 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_inreg@abs32@lo
14981 ; GFX11-NEXT: v_writelane_b32 v40, s20, 16
14982 ; GFX11-NEXT: v_writelane_b32 v40, s21, 17
14983 ; GFX11-NEXT: v_writelane_b32 v40, s22, 18
14984 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
14985 ; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v5, s51
14986 ; GFX11-NEXT: v_writelane_b32 v40, s23, 19
14987 ; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v1, s47
14988 ; GFX11-NEXT: v_dual_mov_b32 v2, s48 :: v_dual_mov_b32 v3, s49
14989 ; GFX11-NEXT: v_writelane_b32 v40, s24, 20
14990 ; GFX11-NEXT: s_mov_b32 s20, s36
14991 ; GFX11-NEXT: s_mov_b32 s21, s37
14992 ; GFX11-NEXT: s_mov_b32 s22, s38
14993 ; GFX11-NEXT: s_mov_b32 s23, s39
14994 ; GFX11-NEXT: v_writelane_b32 v40, s25, 21
14995 ; GFX11-NEXT: s_mov_b32 s24, s40
14996 ; GFX11-NEXT: s_mov_b32 s25, s41
14997 ; GFX11-NEXT: scratch_store_b64 off, v[4:5], s2
14998 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
14999 ; GFX11-NEXT: v_writelane_b32 v40, s26, 22
15000 ; GFX11-NEXT: s_mov_b32 s26, s42
15001 ; GFX11-NEXT: v_writelane_b32 v40, s27, 23
15002 ; GFX11-NEXT: s_mov_b32 s27, s43
15003 ; GFX11-NEXT: v_writelane_b32 v40, s28, 24
15004 ; GFX11-NEXT: s_mov_b32 s28, s44
15005 ; GFX11-NEXT: v_writelane_b32 v40, s29, 25
15006 ; GFX11-NEXT: s_mov_b32 s29, s45
15007 ; GFX11-NEXT: v_writelane_b32 v40, s30, 26
15008 ; GFX11-NEXT: v_writelane_b32 v40, s31, 27
15009 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
15010 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
15011 ; GFX11-NEXT: v_readlane_b32 s31, v40, 27
15012 ; GFX11-NEXT: v_readlane_b32 s30, v40, 26
15013 ; GFX11-NEXT: v_readlane_b32 s29, v40, 25
15014 ; GFX11-NEXT: v_readlane_b32 s28, v40, 24
15015 ; GFX11-NEXT: v_readlane_b32 s27, v40, 23
15016 ; GFX11-NEXT: v_readlane_b32 s26, v40, 22
15017 ; GFX11-NEXT: v_readlane_b32 s25, v40, 21
15018 ; GFX11-NEXT: v_readlane_b32 s24, v40, 20
15019 ; GFX11-NEXT: v_readlane_b32 s23, v40, 19
15020 ; GFX11-NEXT: v_readlane_b32 s22, v40, 18
15021 ; GFX11-NEXT: v_readlane_b32 s21, v40, 17
15022 ; GFX11-NEXT: v_readlane_b32 s20, v40, 16
15023 ; GFX11-NEXT: v_readlane_b32 s19, v40, 15
15024 ; GFX11-NEXT: v_readlane_b32 s18, v40, 14
15025 ; GFX11-NEXT: v_readlane_b32 s17, v40, 13
15026 ; GFX11-NEXT: v_readlane_b32 s16, v40, 12
15027 ; GFX11-NEXT: v_readlane_b32 s15, v40, 11
15028 ; GFX11-NEXT: v_readlane_b32 s14, v40, 10
15029 ; GFX11-NEXT: v_readlane_b32 s13, v40, 9
15030 ; GFX11-NEXT: v_readlane_b32 s12, v40, 8
15031 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
15032 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
15033 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
15034 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
15035 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
15036 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
15037 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
15038 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
15039 ; GFX11-NEXT: v_readlane_b32 s0, v40, 28
15040 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15041 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
15042 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15043 ; GFX11-NEXT: s_add_i32 s32, s32, -16
15044 ; GFX11-NEXT: s_mov_b32 s33, s0
15045 ; GFX11-NEXT: s_waitcnt vmcnt(0)
15046 ; GFX11-NEXT: s_setpc_b64 s[30:31]
15048 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg:
15049 ; GFX10-SCRATCH: ; %bb.0:
15050 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15051 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
15052 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
15053 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15054 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
15055 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15056 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15057 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28
15058 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
15059 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
15060 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 16
15061 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
15062 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
15063 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
15064 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
15065 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
15066 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
15067 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
15068 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
15069 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
15070 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
15071 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
15072 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
15073 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
15074 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
15075 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
15076 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
15077 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15078 ; GFX10-SCRATCH-NEXT: s_clause 0x1
15079 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
15080 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
15081 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_inreg@abs32@hi
15082 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_inreg@abs32@lo
15083 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16
15084 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17
15085 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18
15086 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15087 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
15088 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19
15089 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
15090 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
15091 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
15092 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
15093 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20
15094 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
15095 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
15096 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
15097 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
15098 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21
15099 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
15100 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
15101 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
15102 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s2
15103 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
15104 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22
15105 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
15106 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23
15107 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
15108 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24
15109 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44
15110 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25
15111 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45
15112 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26
15113 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27
15114 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
15115 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27
15116 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26
15117 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25
15118 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24
15119 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23
15120 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22
15121 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21
15122 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20
15123 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19
15124 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18
15125 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17
15126 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16
15127 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
15128 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
15129 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
15130 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
15131 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
15132 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
15133 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
15134 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
15135 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
15136 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
15137 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
15138 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
15139 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
15140 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
15141 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
15142 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
15143 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 28
15144 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15145 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
15146 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15147 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15148 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
15149 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
15150 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
15151 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
15152 %ptr = load ptr addrspace(4), ptr addrspace(4) undef
15153 %val = load <32 x i32>, ptr addrspace(4) %ptr
15154 call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val)
15158 define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
15159 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg:
15161 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15162 ; GFX9-NEXT: s_mov_b32 s34, s33
15163 ; GFX9-NEXT: s_mov_b32 s33, s32
15164 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15165 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
15166 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15167 ; GFX9-NEXT: v_writelane_b32 v40, s34, 28
15168 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
15169 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
15170 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
15171 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
15172 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
15173 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
15174 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
15175 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
15176 ; GFX9-NEXT: v_writelane_b32 v40, s12, 8
15177 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9
15178 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10
15179 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11
15180 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12
15181 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13
15182 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14
15183 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
15184 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15
15185 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16
15186 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17
15187 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18
15188 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19
15189 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
15190 ; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0
15191 ; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35
15192 ; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35
15193 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40
15194 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
15195 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20
15196 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21
15197 ; GFX9-NEXT: s_addk_i32 s32, 0x400
15198 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22
15199 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
15200 ; GFX9-NEXT: v_mov_b32_e32 v0, s52
15201 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23
15202 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
15203 ; GFX9-NEXT: v_mov_b32_e32 v0, s46
15204 ; GFX9-NEXT: v_writelane_b32 v40, s28, 24
15205 ; GFX9-NEXT: v_mov_b32_e32 v1, s47
15206 ; GFX9-NEXT: v_mov_b32_e32 v2, s48
15207 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
15208 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
15209 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
15210 ; GFX9-NEXT: v_mov_b32_e32 v0, s49
15211 ; GFX9-NEXT: v_writelane_b32 v40, s29, 25
15212 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
15213 ; GFX9-NEXT: v_mov_b32_e32 v0, s50
15214 ; GFX9-NEXT: v_writelane_b32 v40, s30, 26
15215 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
15216 ; GFX9-NEXT: v_mov_b32_e32 v0, s51
15217 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi
15218 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32_inreg@abs32@lo
15219 ; GFX9-NEXT: s_mov_b32 s20, s36
15220 ; GFX9-NEXT: s_mov_b32 s21, s37
15221 ; GFX9-NEXT: s_mov_b32 s22, s38
15222 ; GFX9-NEXT: s_mov_b32 s23, s39
15223 ; GFX9-NEXT: s_mov_b32 s24, s40
15224 ; GFX9-NEXT: s_mov_b32 s25, s41
15225 ; GFX9-NEXT: s_mov_b32 s26, s42
15226 ; GFX9-NEXT: s_mov_b32 s27, s43
15227 ; GFX9-NEXT: s_mov_b32 s28, s44
15228 ; GFX9-NEXT: s_mov_b32 s29, s45
15229 ; GFX9-NEXT: v_writelane_b32 v40, s31, 27
15230 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
15231 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
15232 ; GFX9-NEXT: v_readlane_b32 s31, v40, 27
15233 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26
15234 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25
15235 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24
15236 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23
15237 ; GFX9-NEXT: v_readlane_b32 s26, v40, 22
15238 ; GFX9-NEXT: v_readlane_b32 s25, v40, 21
15239 ; GFX9-NEXT: v_readlane_b32 s24, v40, 20
15240 ; GFX9-NEXT: v_readlane_b32 s23, v40, 19
15241 ; GFX9-NEXT: v_readlane_b32 s22, v40, 18
15242 ; GFX9-NEXT: v_readlane_b32 s21, v40, 17
15243 ; GFX9-NEXT: v_readlane_b32 s20, v40, 16
15244 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15
15245 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14
15246 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13
15247 ; GFX9-NEXT: v_readlane_b32 s16, v40, 12
15248 ; GFX9-NEXT: v_readlane_b32 s15, v40, 11
15249 ; GFX9-NEXT: v_readlane_b32 s14, v40, 10
15250 ; GFX9-NEXT: v_readlane_b32 s13, v40, 9
15251 ; GFX9-NEXT: v_readlane_b32 s12, v40, 8
15252 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
15253 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
15254 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
15255 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
15256 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
15257 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
15258 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
15259 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
15260 ; GFX9-NEXT: v_readlane_b32 s34, v40, 28
15261 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15262 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
15263 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15264 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
15265 ; GFX9-NEXT: s_mov_b32 s33, s34
15266 ; GFX9-NEXT: s_waitcnt vmcnt(0)
15267 ; GFX9-NEXT: s_setpc_b64 s[30:31]
15269 ; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg:
15271 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15272 ; GFX10-NEXT: s_mov_b32 s34, s33
15273 ; GFX10-NEXT: s_mov_b32 s33, s32
15274 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15275 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
15276 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15277 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15278 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28
15279 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
15280 ; GFX10-NEXT: s_addk_i32 s32, 0x200
15281 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
15282 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
15283 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
15284 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
15285 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
15286 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
15287 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
15288 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
15289 ; GFX10-NEXT: v_writelane_b32 v40, s12, 8
15290 ; GFX10-NEXT: v_writelane_b32 v40, s13, 9
15291 ; GFX10-NEXT: v_writelane_b32 v40, s14, 10
15292 ; GFX10-NEXT: v_writelane_b32 v40, s15, 11
15293 ; GFX10-NEXT: v_writelane_b32 v40, s16, 12
15294 ; GFX10-NEXT: v_writelane_b32 v40, s17, 13
15295 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14
15296 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15
15297 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
15298 ; GFX10-NEXT: s_clause 0x2
15299 ; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0
15300 ; GFX10-NEXT: ; meta instruction
15301 ; GFX10-NEXT: ; meta instruction
15302 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40
15303 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
15304 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi
15305 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32_inreg@abs32@lo
15306 ; GFX10-NEXT: v_writelane_b32 v40, s20, 16
15307 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17
15308 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18
15309 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
15310 ; GFX10-NEXT: v_mov_b32_e32 v0, s52
15311 ; GFX10-NEXT: v_mov_b32_e32 v1, s47
15312 ; GFX10-NEXT: v_writelane_b32 v40, s23, 19
15313 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
15314 ; GFX10-NEXT: v_mov_b32_e32 v0, s46
15315 ; GFX10-NEXT: v_mov_b32_e32 v2, s48
15316 ; GFX10-NEXT: v_mov_b32_e32 v3, s49
15317 ; GFX10-NEXT: v_writelane_b32 v40, s24, 20
15318 ; GFX10-NEXT: s_mov_b32 s20, s36
15319 ; GFX10-NEXT: s_mov_b32 s21, s37
15320 ; GFX10-NEXT: s_mov_b32 s22, s38
15321 ; GFX10-NEXT: s_mov_b32 s23, s39
15322 ; GFX10-NEXT: v_writelane_b32 v40, s25, 21
15323 ; GFX10-NEXT: s_mov_b32 s24, s40
15324 ; GFX10-NEXT: s_mov_b32 s25, s41
15325 ; GFX10-NEXT: v_mov_b32_e32 v4, s50
15326 ; GFX10-NEXT: v_mov_b32_e32 v5, s51
15327 ; GFX10-NEXT: v_writelane_b32 v40, s26, 22
15328 ; GFX10-NEXT: s_mov_b32 s26, s42
15329 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
15330 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
15331 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
15332 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
15333 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16
15334 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20
15335 ; GFX10-NEXT: v_writelane_b32 v40, s27, 23
15336 ; GFX10-NEXT: s_mov_b32 s27, s43
15337 ; GFX10-NEXT: v_writelane_b32 v40, s28, 24
15338 ; GFX10-NEXT: s_mov_b32 s28, s44
15339 ; GFX10-NEXT: v_writelane_b32 v40, s29, 25
15340 ; GFX10-NEXT: s_mov_b32 s29, s45
15341 ; GFX10-NEXT: v_writelane_b32 v40, s30, 26
15342 ; GFX10-NEXT: v_writelane_b32 v40, s31, 27
15343 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
15344 ; GFX10-NEXT: v_readlane_b32 s31, v40, 27
15345 ; GFX10-NEXT: v_readlane_b32 s30, v40, 26
15346 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25
15347 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24
15348 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23
15349 ; GFX10-NEXT: v_readlane_b32 s26, v40, 22
15350 ; GFX10-NEXT: v_readlane_b32 s25, v40, 21
15351 ; GFX10-NEXT: v_readlane_b32 s24, v40, 20
15352 ; GFX10-NEXT: v_readlane_b32 s23, v40, 19
15353 ; GFX10-NEXT: v_readlane_b32 s22, v40, 18
15354 ; GFX10-NEXT: v_readlane_b32 s21, v40, 17
15355 ; GFX10-NEXT: v_readlane_b32 s20, v40, 16
15356 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15
15357 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14
15358 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13
15359 ; GFX10-NEXT: v_readlane_b32 s16, v40, 12
15360 ; GFX10-NEXT: v_readlane_b32 s15, v40, 11
15361 ; GFX10-NEXT: v_readlane_b32 s14, v40, 10
15362 ; GFX10-NEXT: v_readlane_b32 s13, v40, 9
15363 ; GFX10-NEXT: v_readlane_b32 s12, v40, 8
15364 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
15365 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
15366 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
15367 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
15368 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
15369 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
15370 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
15371 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
15372 ; GFX10-NEXT: v_readlane_b32 s34, v40, 28
15373 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15374 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
15375 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15376 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15377 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
15378 ; GFX10-NEXT: s_mov_b32 s33, s34
15379 ; GFX10-NEXT: s_waitcnt vmcnt(0)
15380 ; GFX10-NEXT: s_setpc_b64 s[30:31]
15382 ; GFX11-LABEL: test_call_external_void_func_v32i32_i32_inreg:
15384 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15385 ; GFX11-NEXT: s_mov_b32 s0, s33
15386 ; GFX11-NEXT: s_mov_b32 s33, s32
15387 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15388 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
15389 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15390 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28
15391 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
15392 ; GFX11-NEXT: s_add_i32 s32, s32, 16
15393 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
15394 ; GFX11-NEXT: s_add_i32 s3, s32, 16
15395 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
15396 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
15397 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
15398 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
15399 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
15400 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
15401 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
15402 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
15403 ; GFX11-NEXT: v_writelane_b32 v40, s12, 8
15404 ; GFX11-NEXT: v_writelane_b32 v40, s13, 9
15405 ; GFX11-NEXT: v_writelane_b32 v40, s14, 10
15406 ; GFX11-NEXT: v_writelane_b32 v40, s15, 11
15407 ; GFX11-NEXT: v_writelane_b32 v40, s16, 12
15408 ; GFX11-NEXT: v_writelane_b32 v40, s17, 13
15409 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14
15410 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15
15411 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
15412 ; GFX11-NEXT: s_clause 0x2
15413 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0
15414 ; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40
15415 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0
15416 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32_inreg@abs32@hi
15417 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32_inreg@abs32@lo
15418 ; GFX11-NEXT: v_writelane_b32 v40, s20, 16
15419 ; GFX11-NEXT: v_writelane_b32 v40, s21, 17
15420 ; GFX11-NEXT: v_writelane_b32 v40, s22, 18
15421 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
15422 ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v5, s51
15423 ; GFX11-NEXT: v_writelane_b32 v40, s23, 19
15424 ; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v1, s47
15425 ; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v3, s49
15426 ; GFX11-NEXT: v_writelane_b32 v40, s24, 20
15427 ; GFX11-NEXT: v_mov_b32_e32 v2, s48
15428 ; GFX11-NEXT: s_add_i32 s2, s32, 24
15429 ; GFX11-NEXT: s_mov_b32 s20, s36
15430 ; GFX11-NEXT: s_mov_b32 s21, s37
15431 ; GFX11-NEXT: v_writelane_b32 v40, s25, 21
15432 ; GFX11-NEXT: s_mov_b32 s22, s38
15433 ; GFX11-NEXT: s_mov_b32 s23, s39
15434 ; GFX11-NEXT: s_mov_b32 s24, s40
15435 ; GFX11-NEXT: s_mov_b32 s25, s41
15436 ; GFX11-NEXT: v_writelane_b32 v40, s26, 22
15437 ; GFX11-NEXT: s_mov_b32 s26, s42
15438 ; GFX11-NEXT: scratch_store_b32 off, v6, s2
15439 ; GFX11-NEXT: scratch_store_b64 off, v[4:5], s3
15440 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
15441 ; GFX11-NEXT: v_writelane_b32 v40, s27, 23
15442 ; GFX11-NEXT: s_mov_b32 s27, s43
15443 ; GFX11-NEXT: v_writelane_b32 v40, s28, 24
15444 ; GFX11-NEXT: s_mov_b32 s28, s44
15445 ; GFX11-NEXT: v_writelane_b32 v40, s29, 25
15446 ; GFX11-NEXT: s_mov_b32 s29, s45
15447 ; GFX11-NEXT: v_writelane_b32 v40, s30, 26
15448 ; GFX11-NEXT: v_writelane_b32 v40, s31, 27
15449 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
15450 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
15451 ; GFX11-NEXT: v_readlane_b32 s31, v40, 27
15452 ; GFX11-NEXT: v_readlane_b32 s30, v40, 26
15453 ; GFX11-NEXT: v_readlane_b32 s29, v40, 25
15454 ; GFX11-NEXT: v_readlane_b32 s28, v40, 24
15455 ; GFX11-NEXT: v_readlane_b32 s27, v40, 23
15456 ; GFX11-NEXT: v_readlane_b32 s26, v40, 22
15457 ; GFX11-NEXT: v_readlane_b32 s25, v40, 21
15458 ; GFX11-NEXT: v_readlane_b32 s24, v40, 20
15459 ; GFX11-NEXT: v_readlane_b32 s23, v40, 19
15460 ; GFX11-NEXT: v_readlane_b32 s22, v40, 18
15461 ; GFX11-NEXT: v_readlane_b32 s21, v40, 17
15462 ; GFX11-NEXT: v_readlane_b32 s20, v40, 16
15463 ; GFX11-NEXT: v_readlane_b32 s19, v40, 15
15464 ; GFX11-NEXT: v_readlane_b32 s18, v40, 14
15465 ; GFX11-NEXT: v_readlane_b32 s17, v40, 13
15466 ; GFX11-NEXT: v_readlane_b32 s16, v40, 12
15467 ; GFX11-NEXT: v_readlane_b32 s15, v40, 11
15468 ; GFX11-NEXT: v_readlane_b32 s14, v40, 10
15469 ; GFX11-NEXT: v_readlane_b32 s13, v40, 9
15470 ; GFX11-NEXT: v_readlane_b32 s12, v40, 8
15471 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
15472 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
15473 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
15474 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
15475 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
15476 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
15477 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
15478 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
15479 ; GFX11-NEXT: v_readlane_b32 s0, v40, 28
15480 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15481 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
15482 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15483 ; GFX11-NEXT: s_add_i32 s32, s32, -16
15484 ; GFX11-NEXT: s_mov_b32 s33, s0
15485 ; GFX11-NEXT: s_waitcnt vmcnt(0)
15486 ; GFX11-NEXT: s_setpc_b64 s[30:31]
15488 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg:
15489 ; GFX10-SCRATCH: ; %bb.0:
15490 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15491 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
15492 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
15493 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15494 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
15495 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15496 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15497 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28
15498 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
15499 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
15500 ; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16
15501 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
15502 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
15503 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
15504 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
15505 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
15506 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
15507 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
15508 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
15509 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
15510 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
15511 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
15512 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
15513 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
15514 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
15515 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
15516 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
15517 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15518 ; GFX10-SCRATCH-NEXT: s_clause 0x2
15519 ; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0
15520 ; GFX10-SCRATCH-NEXT: ; meta instruction
15521 ; GFX10-SCRATCH-NEXT: ; meta instruction
15522 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
15523 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
15524 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32_inreg@abs32@hi
15525 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32_inreg@abs32@lo
15526 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16
15527 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17
15528 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18
15529 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15530 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2
15531 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 24
15532 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
15533 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19
15534 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
15535 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
15536 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
15537 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
15538 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20
15539 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
15540 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
15541 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
15542 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
15543 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21
15544 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
15545 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
15546 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
15547 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s2
15548 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s3
15549 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
15550 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22
15551 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
15552 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23
15553 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
15554 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24
15555 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44
15556 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25
15557 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45
15558 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26
15559 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27
15560 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
15561 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27
15562 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26
15563 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25
15564 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24
15565 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23
15566 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22
15567 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21
15568 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20
15569 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19
15570 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18
15571 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17
15572 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16
15573 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
15574 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
15575 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
15576 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
15577 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
15578 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
15579 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
15580 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
15581 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
15582 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
15583 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
15584 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
15585 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
15586 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
15587 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
15588 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
15589 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 28
15590 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15591 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
15592 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15593 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15594 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
15595 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
15596 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
15597 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
15598 %ptr0 = load ptr addrspace(4), ptr addrspace(4) undef
15599 %val0 = load <32 x i32>, ptr addrspace(4) %ptr0
15600 %val1 = load i32, ptr addrspace(4) undef
15601 call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg %val0, i32 inreg %val1)
15605 define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
15606 ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
15607 ; GFX9: ; %bb.0: ; %entry
15608 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15609 ; GFX9-NEXT: s_mov_b32 s34, s33
15610 ; GFX9-NEXT: s_mov_b32 s33, s32
15611 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15612 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
15613 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15614 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33
15615 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4
15616 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
15617 ; GFX9-NEXT: s_addk_i32 s32, 0x400
15618 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
15619 ; GFX9-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi
15620 ; GFX9-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo
15621 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
15622 ; GFX9-NEXT: s_waitcnt vmcnt(1)
15623 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
15624 ; GFX9-NEXT: s_waitcnt vmcnt(1)
15625 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
15626 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
15627 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
15628 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
15629 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
15630 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15631 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
15632 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15633 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
15634 ; GFX9-NEXT: s_mov_b32 s33, s34
15635 ; GFX9-NEXT: s_waitcnt vmcnt(0)
15636 ; GFX9-NEXT: s_setpc_b64 s[30:31]
15638 ; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64:
15639 ; GFX10: ; %bb.0: ; %entry
15640 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15641 ; GFX10-NEXT: s_mov_b32 s34, s33
15642 ; GFX10-NEXT: s_mov_b32 s33, s32
15643 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15644 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
15645 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15646 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15647 ; GFX10-NEXT: s_clause 0x1
15648 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33
15649 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4
15650 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
15651 ; GFX10-NEXT: s_addk_i32 s32, 0x200
15652 ; GFX10-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi
15653 ; GFX10-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo
15654 ; GFX10-NEXT: s_waitcnt vmcnt(1)
15655 ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32
15656 ; GFX10-NEXT: s_waitcnt vmcnt(0)
15657 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
15658 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
15659 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
15660 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
15661 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
15662 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
15663 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
15664 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15665 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
15666 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15667 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15668 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
15669 ; GFX10-NEXT: s_mov_b32 s33, s34
15670 ; GFX10-NEXT: s_waitcnt vmcnt(0)
15671 ; GFX10-NEXT: s_setpc_b64 s[30:31]
15673 ; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
15674 ; GFX11: ; %bb.0: ; %entry
15675 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15676 ; GFX11-NEXT: s_mov_b32 s0, s33
15677 ; GFX11-NEXT: s_mov_b32 s33, s32
15678 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15679 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
15680 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15681 ; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33
15682 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
15683 ; GFX11-NEXT: s_add_i32 s32, s32, 16
15684 ; GFX11-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi
15685 ; GFX11-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo
15686 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
15687 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
15688 ; GFX11-NEXT: s_waitcnt vmcnt(0)
15689 ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32
15690 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
15691 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
15692 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
15693 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
15694 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15695 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
15696 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15697 ; GFX11-NEXT: s_add_i32 s32, s32, -16
15698 ; GFX11-NEXT: s_mov_b32 s33, s0
15699 ; GFX11-NEXT: s_waitcnt vmcnt(0)
15700 ; GFX11-NEXT: s_setpc_b64 s[30:31]
15702 ; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64:
15703 ; GFX10-SCRATCH: ; %bb.0: ; %entry
15704 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15705 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
15706 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
15707 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15708 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
15709 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15710 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15711 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33
15712 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
15713 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
15714 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi
15715 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo
15716 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
15717 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
15718 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
15719 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
15720 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
15721 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
15722 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
15723 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
15724 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15725 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
15726 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15727 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15728 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
15729 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
15730 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
15731 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
15733 call amdgpu_gfx void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
15737 define amdgpu_gfx void @stack_12xv3i32() #0 {
15738 ; GFX9-LABEL: stack_12xv3i32:
15739 ; GFX9: ; %bb.0: ; %entry
15740 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15741 ; GFX9-NEXT: s_mov_b32 s34, s33
15742 ; GFX9-NEXT: s_mov_b32 s33, s32
15743 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15744 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
15745 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15746 ; GFX9-NEXT: s_addk_i32 s32, 0x400
15747 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
15748 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
15749 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
15750 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
15751 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
15752 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
15753 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
15754 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
15755 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
15756 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
15757 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_12xv3i32@abs32@hi
15758 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_12xv3i32@abs32@lo
15759 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
15760 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
15761 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
15762 ; GFX9-NEXT: v_mov_b32_e32 v3, 1
15763 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
15764 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
15765 ; GFX9-NEXT: v_mov_b32_e32 v6, 2
15766 ; GFX9-NEXT: v_mov_b32_e32 v7, 2
15767 ; GFX9-NEXT: v_mov_b32_e32 v8, 2
15768 ; GFX9-NEXT: v_mov_b32_e32 v9, 3
15769 ; GFX9-NEXT: v_mov_b32_e32 v10, 3
15770 ; GFX9-NEXT: v_mov_b32_e32 v11, 3
15771 ; GFX9-NEXT: v_mov_b32_e32 v12, 4
15772 ; GFX9-NEXT: v_mov_b32_e32 v13, 4
15773 ; GFX9-NEXT: v_mov_b32_e32 v14, 4
15774 ; GFX9-NEXT: v_mov_b32_e32 v15, 5
15775 ; GFX9-NEXT: v_mov_b32_e32 v16, 5
15776 ; GFX9-NEXT: v_mov_b32_e32 v17, 5
15777 ; GFX9-NEXT: v_mov_b32_e32 v18, 6
15778 ; GFX9-NEXT: v_mov_b32_e32 v19, 6
15779 ; GFX9-NEXT: v_mov_b32_e32 v20, 6
15780 ; GFX9-NEXT: v_mov_b32_e32 v21, 7
15781 ; GFX9-NEXT: v_mov_b32_e32 v22, 7
15782 ; GFX9-NEXT: v_mov_b32_e32 v23, 7
15783 ; GFX9-NEXT: v_mov_b32_e32 v24, 8
15784 ; GFX9-NEXT: v_mov_b32_e32 v25, 8
15785 ; GFX9-NEXT: v_mov_b32_e32 v26, 8
15786 ; GFX9-NEXT: v_mov_b32_e32 v27, 9
15787 ; GFX9-NEXT: v_mov_b32_e32 v28, 9
15788 ; GFX9-NEXT: v_mov_b32_e32 v29, 9
15789 ; GFX9-NEXT: v_mov_b32_e32 v30, 10
15790 ; GFX9-NEXT: v_mov_b32_e32 v31, 11
15791 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
15792 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
15793 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
15794 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
15795 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
15796 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15797 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
15798 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15799 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
15800 ; GFX9-NEXT: s_mov_b32 s33, s34
15801 ; GFX9-NEXT: s_waitcnt vmcnt(0)
15802 ; GFX9-NEXT: s_setpc_b64 s[30:31]
15804 ; GFX10-LABEL: stack_12xv3i32:
15805 ; GFX10: ; %bb.0: ; %entry
15806 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15807 ; GFX10-NEXT: s_mov_b32 s34, s33
15808 ; GFX10-NEXT: s_mov_b32 s33, s32
15809 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15810 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
15811 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15812 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15813 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
15814 ; GFX10-NEXT: v_mov_b32_e32 v0, 12
15815 ; GFX10-NEXT: v_mov_b32_e32 v1, 13
15816 ; GFX10-NEXT: v_mov_b32_e32 v2, 14
15817 ; GFX10-NEXT: s_addk_i32 s32, 0x200
15818 ; GFX10-NEXT: v_mov_b32_e32 v3, 15
15819 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
15820 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
15821 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
15822 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
15823 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
15824 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
15825 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
15826 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
15827 ; GFX10-NEXT: v_mov_b32_e32 v3, 1
15828 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
15829 ; GFX10-NEXT: v_mov_b32_e32 v5, 1
15830 ; GFX10-NEXT: v_mov_b32_e32 v6, 2
15831 ; GFX10-NEXT: v_mov_b32_e32 v7, 2
15832 ; GFX10-NEXT: v_mov_b32_e32 v8, 2
15833 ; GFX10-NEXT: v_mov_b32_e32 v9, 3
15834 ; GFX10-NEXT: v_mov_b32_e32 v10, 3
15835 ; GFX10-NEXT: v_mov_b32_e32 v11, 3
15836 ; GFX10-NEXT: v_mov_b32_e32 v12, 4
15837 ; GFX10-NEXT: v_mov_b32_e32 v13, 4
15838 ; GFX10-NEXT: v_mov_b32_e32 v14, 4
15839 ; GFX10-NEXT: v_mov_b32_e32 v15, 5
15840 ; GFX10-NEXT: v_mov_b32_e32 v16, 5
15841 ; GFX10-NEXT: v_mov_b32_e32 v17, 5
15842 ; GFX10-NEXT: v_mov_b32_e32 v18, 6
15843 ; GFX10-NEXT: v_mov_b32_e32 v19, 6
15844 ; GFX10-NEXT: v_mov_b32_e32 v20, 6
15845 ; GFX10-NEXT: v_mov_b32_e32 v21, 7
15846 ; GFX10-NEXT: v_mov_b32_e32 v22, 7
15847 ; GFX10-NEXT: v_mov_b32_e32 v23, 7
15848 ; GFX10-NEXT: v_mov_b32_e32 v24, 8
15849 ; GFX10-NEXT: v_mov_b32_e32 v25, 8
15850 ; GFX10-NEXT: v_mov_b32_e32 v26, 8
15851 ; GFX10-NEXT: v_mov_b32_e32 v27, 9
15852 ; GFX10-NEXT: v_mov_b32_e32 v28, 9
15853 ; GFX10-NEXT: v_mov_b32_e32 v29, 9
15854 ; GFX10-NEXT: v_mov_b32_e32 v30, 10
15855 ; GFX10-NEXT: v_mov_b32_e32 v31, 11
15856 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_12xv3i32@abs32@hi
15857 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_12xv3i32@abs32@lo
15858 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
15859 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
15860 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
15861 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
15862 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
15863 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15864 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
15865 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15866 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15867 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
15868 ; GFX10-NEXT: s_mov_b32 s33, s34
15869 ; GFX10-NEXT: s_waitcnt vmcnt(0)
15870 ; GFX10-NEXT: s_setpc_b64 s[30:31]
15872 ; GFX11-LABEL: stack_12xv3i32:
15873 ; GFX11: ; %bb.0: ; %entry
15874 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15875 ; GFX11-NEXT: s_mov_b32 s0, s33
15876 ; GFX11-NEXT: s_mov_b32 s33, s32
15877 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15878 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
15879 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15880 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
15881 ; GFX11-NEXT: v_dual_mov_b32 v0, 12 :: v_dual_mov_b32 v1, 13
15882 ; GFX11-NEXT: v_dual_mov_b32 v2, 14 :: v_dual_mov_b32 v3, 15
15883 ; GFX11-NEXT: s_add_i32 s32, s32, 16
15884 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
15885 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 1
15886 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
15887 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
15888 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1
15889 ; GFX11-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, 2
15890 ; GFX11-NEXT: v_dual_mov_b32 v8, 2 :: v_dual_mov_b32 v9, 3
15891 ; GFX11-NEXT: v_dual_mov_b32 v10, 3 :: v_dual_mov_b32 v11, 3
15892 ; GFX11-NEXT: v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v13, 4
15893 ; GFX11-NEXT: v_dual_mov_b32 v14, 4 :: v_dual_mov_b32 v15, 5
15894 ; GFX11-NEXT: v_dual_mov_b32 v16, 5 :: v_dual_mov_b32 v17, 5
15895 ; GFX11-NEXT: v_dual_mov_b32 v18, 6 :: v_dual_mov_b32 v19, 6
15896 ; GFX11-NEXT: v_dual_mov_b32 v20, 6 :: v_dual_mov_b32 v21, 7
15897 ; GFX11-NEXT: v_dual_mov_b32 v22, 7 :: v_dual_mov_b32 v23, 7
15898 ; GFX11-NEXT: v_dual_mov_b32 v24, 8 :: v_dual_mov_b32 v25, 8
15899 ; GFX11-NEXT: v_dual_mov_b32 v26, 8 :: v_dual_mov_b32 v27, 9
15900 ; GFX11-NEXT: v_dual_mov_b32 v28, 9 :: v_dual_mov_b32 v29, 9
15901 ; GFX11-NEXT: v_dual_mov_b32 v30, 10 :: v_dual_mov_b32 v31, 11
15902 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_12xv3i32@abs32@hi
15903 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_12xv3i32@abs32@lo
15904 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
15905 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
15906 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
15907 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
15908 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
15909 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
15910 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15911 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
15912 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15913 ; GFX11-NEXT: s_add_i32 s32, s32, -16
15914 ; GFX11-NEXT: s_mov_b32 s33, s0
15915 ; GFX11-NEXT: s_waitcnt vmcnt(0)
15916 ; GFX11-NEXT: s_setpc_b64 s[30:31]
15918 ; GFX10-SCRATCH-LABEL: stack_12xv3i32:
15919 ; GFX10-SCRATCH: ; %bb.0: ; %entry
15920 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15921 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
15922 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
15923 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15924 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
15925 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15926 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15927 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
15928 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 12
15929 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 13
15930 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14
15931 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15
15932 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
15933 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
15934 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
15935 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1
15936 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
15937 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
15938 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
15939 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
15940 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 1
15941 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 2
15942 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 2
15943 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 2
15944 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 3
15945 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 3
15946 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 3
15947 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 4
15948 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 4
15949 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 4
15950 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 5
15951 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 5
15952 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 5
15953 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 6
15954 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 6
15955 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 6
15956 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 7
15957 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 7
15958 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 7
15959 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 8
15960 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 8
15961 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 8
15962 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 9
15963 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 9
15964 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 9
15965 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 10
15966 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 11
15967 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_12xv3i32@abs32@hi
15968 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_12xv3i32@abs32@lo
15969 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
15970 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
15971 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
15972 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
15973 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
15974 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15975 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
15976 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15977 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15978 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
15979 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
15980 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
15981 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
15983 call amdgpu_gfx void @external_void_func_12xv3i32(
15984 <3 x i32><i32 0, i32 0, i32 0>,
15985 <3 x i32><i32 1, i32 1, i32 1>,
15986 <3 x i32><i32 2, i32 2, i32 2>,
15987 <3 x i32><i32 3, i32 3, i32 3>,
15988 <3 x i32><i32 4, i32 4, i32 4>,
15989 <3 x i32><i32 5, i32 5, i32 5>,
15990 <3 x i32><i32 6, i32 6, i32 6>,
15991 <3 x i32><i32 7, i32 7, i32 7>,
15992 <3 x i32><i32 8, i32 8, i32 8>,
15993 <3 x i32><i32 9, i32 9, i32 9>,
15994 <3 x i32><i32 10, i32 11, i32 12>,
15995 <3 x i32><i32 13, i32 14, i32 15>)
15999 define amdgpu_gfx void @stack_8xv5i32() #0 {
16000 ; GFX9-LABEL: stack_8xv5i32:
16001 ; GFX9: ; %bb.0: ; %entry
16002 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16003 ; GFX9-NEXT: s_mov_b32 s34, s33
16004 ; GFX9-NEXT: s_mov_b32 s33, s32
16005 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16006 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16007 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16008 ; GFX9-NEXT: s_addk_i32 s32, 0x400
16009 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
16010 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
16011 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
16012 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
16013 ; GFX9-NEXT: v_mov_b32_e32 v0, 10
16014 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
16015 ; GFX9-NEXT: v_mov_b32_e32 v0, 11
16016 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16017 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
16018 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
16019 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
16020 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
16021 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
16022 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
16023 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
16024 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
16025 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
16026 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
16027 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_8xv5i32@abs32@hi
16028 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_8xv5i32@abs32@lo
16029 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
16030 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
16031 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
16032 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
16033 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
16034 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
16035 ; GFX9-NEXT: v_mov_b32_e32 v6, 1
16036 ; GFX9-NEXT: v_mov_b32_e32 v7, 1
16037 ; GFX9-NEXT: v_mov_b32_e32 v8, 1
16038 ; GFX9-NEXT: v_mov_b32_e32 v9, 1
16039 ; GFX9-NEXT: v_mov_b32_e32 v10, 2
16040 ; GFX9-NEXT: v_mov_b32_e32 v11, 2
16041 ; GFX9-NEXT: v_mov_b32_e32 v12, 2
16042 ; GFX9-NEXT: v_mov_b32_e32 v13, 2
16043 ; GFX9-NEXT: v_mov_b32_e32 v14, 2
16044 ; GFX9-NEXT: v_mov_b32_e32 v15, 3
16045 ; GFX9-NEXT: v_mov_b32_e32 v16, 3
16046 ; GFX9-NEXT: v_mov_b32_e32 v17, 3
16047 ; GFX9-NEXT: v_mov_b32_e32 v18, 3
16048 ; GFX9-NEXT: v_mov_b32_e32 v19, 3
16049 ; GFX9-NEXT: v_mov_b32_e32 v20, 4
16050 ; GFX9-NEXT: v_mov_b32_e32 v21, 4
16051 ; GFX9-NEXT: v_mov_b32_e32 v22, 4
16052 ; GFX9-NEXT: v_mov_b32_e32 v23, 4
16053 ; GFX9-NEXT: v_mov_b32_e32 v24, 4
16054 ; GFX9-NEXT: v_mov_b32_e32 v25, 5
16055 ; GFX9-NEXT: v_mov_b32_e32 v26, 5
16056 ; GFX9-NEXT: v_mov_b32_e32 v27, 5
16057 ; GFX9-NEXT: v_mov_b32_e32 v28, 5
16058 ; GFX9-NEXT: v_mov_b32_e32 v29, 5
16059 ; GFX9-NEXT: v_mov_b32_e32 v30, 6
16060 ; GFX9-NEXT: v_mov_b32_e32 v31, 7
16061 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
16062 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
16063 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
16064 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
16065 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
16066 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16067 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16068 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16069 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
16070 ; GFX9-NEXT: s_mov_b32 s33, s34
16071 ; GFX9-NEXT: s_waitcnt vmcnt(0)
16072 ; GFX9-NEXT: s_setpc_b64 s[30:31]
16074 ; GFX10-LABEL: stack_8xv5i32:
16075 ; GFX10: ; %bb.0: ; %entry
16076 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16077 ; GFX10-NEXT: s_mov_b32 s34, s33
16078 ; GFX10-NEXT: s_mov_b32 s33, s32
16079 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16080 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16081 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16082 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16083 ; GFX10-NEXT: v_mov_b32_e32 v0, 8
16084 ; GFX10-NEXT: v_mov_b32_e32 v1, 9
16085 ; GFX10-NEXT: v_mov_b32_e32 v2, 10
16086 ; GFX10-NEXT: s_addk_i32 s32, 0x200
16087 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
16088 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
16089 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
16090 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
16091 ; GFX10-NEXT: v_mov_b32_e32 v0, 11
16092 ; GFX10-NEXT: v_mov_b32_e32 v1, 12
16093 ; GFX10-NEXT: v_mov_b32_e32 v2, 13
16094 ; GFX10-NEXT: v_mov_b32_e32 v3, 14
16095 ; GFX10-NEXT: v_mov_b32_e32 v4, 15
16096 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
16097 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16098 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16
16099 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20
16100 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24
16101 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28
16102 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
16103 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
16104 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
16105 ; GFX10-NEXT: v_mov_b32_e32 v3, 0
16106 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
16107 ; GFX10-NEXT: v_mov_b32_e32 v5, 1
16108 ; GFX10-NEXT: v_mov_b32_e32 v6, 1
16109 ; GFX10-NEXT: v_mov_b32_e32 v7, 1
16110 ; GFX10-NEXT: v_mov_b32_e32 v8, 1
16111 ; GFX10-NEXT: v_mov_b32_e32 v9, 1
16112 ; GFX10-NEXT: v_mov_b32_e32 v10, 2
16113 ; GFX10-NEXT: v_mov_b32_e32 v11, 2
16114 ; GFX10-NEXT: v_mov_b32_e32 v12, 2
16115 ; GFX10-NEXT: v_mov_b32_e32 v13, 2
16116 ; GFX10-NEXT: v_mov_b32_e32 v14, 2
16117 ; GFX10-NEXT: v_mov_b32_e32 v15, 3
16118 ; GFX10-NEXT: v_mov_b32_e32 v16, 3
16119 ; GFX10-NEXT: v_mov_b32_e32 v17, 3
16120 ; GFX10-NEXT: v_mov_b32_e32 v18, 3
16121 ; GFX10-NEXT: v_mov_b32_e32 v19, 3
16122 ; GFX10-NEXT: v_mov_b32_e32 v20, 4
16123 ; GFX10-NEXT: v_mov_b32_e32 v21, 4
16124 ; GFX10-NEXT: v_mov_b32_e32 v22, 4
16125 ; GFX10-NEXT: v_mov_b32_e32 v23, 4
16126 ; GFX10-NEXT: v_mov_b32_e32 v24, 4
16127 ; GFX10-NEXT: v_mov_b32_e32 v25, 5
16128 ; GFX10-NEXT: v_mov_b32_e32 v26, 5
16129 ; GFX10-NEXT: v_mov_b32_e32 v27, 5
16130 ; GFX10-NEXT: v_mov_b32_e32 v28, 5
16131 ; GFX10-NEXT: v_mov_b32_e32 v29, 5
16132 ; GFX10-NEXT: v_mov_b32_e32 v30, 6
16133 ; GFX10-NEXT: v_mov_b32_e32 v31, 7
16134 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_8xv5i32@abs32@hi
16135 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_8xv5i32@abs32@lo
16136 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
16137 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
16138 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
16139 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
16140 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
16141 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16142 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16143 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16144 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16145 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
16146 ; GFX10-NEXT: s_mov_b32 s33, s34
16147 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16148 ; GFX10-NEXT: s_setpc_b64 s[30:31]
16150 ; GFX11-LABEL: stack_8xv5i32:
16151 ; GFX11: ; %bb.0: ; %entry
16152 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16153 ; GFX11-NEXT: s_mov_b32 s0, s33
16154 ; GFX11-NEXT: s_mov_b32 s33, s32
16155 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16156 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
16157 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16158 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
16159 ; GFX11-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 9
16160 ; GFX11-NEXT: v_dual_mov_b32 v2, 10 :: v_dual_mov_b32 v3, 11
16161 ; GFX11-NEXT: v_dual_mov_b32 v4, 12 :: v_dual_mov_b32 v5, 13
16162 ; GFX11-NEXT: v_dual_mov_b32 v6, 14 :: v_dual_mov_b32 v7, 15
16163 ; GFX11-NEXT: s_add_i32 s32, s32, 16
16164 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
16165 ; GFX11-NEXT: s_add_i32 s0, s32, 16
16166 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
16167 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0
16168 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
16169 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
16170 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1
16171 ; GFX11-NEXT: v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v7, 1
16172 ; GFX11-NEXT: v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v9, 1
16173 ; GFX11-NEXT: v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v11, 2
16174 ; GFX11-NEXT: v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v13, 2
16175 ; GFX11-NEXT: v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v15, 3
16176 ; GFX11-NEXT: v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v17, 3
16177 ; GFX11-NEXT: v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v19, 3
16178 ; GFX11-NEXT: v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v21, 4
16179 ; GFX11-NEXT: v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v23, 4
16180 ; GFX11-NEXT: v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v25, 5
16181 ; GFX11-NEXT: v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v27, 5
16182 ; GFX11-NEXT: v_dual_mov_b32 v28, 5 :: v_dual_mov_b32 v29, 5
16183 ; GFX11-NEXT: v_dual_mov_b32 v30, 6 :: v_dual_mov_b32 v31, 7
16184 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_8xv5i32@abs32@hi
16185 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_8xv5i32@abs32@lo
16186 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
16187 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
16188 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
16189 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
16190 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
16191 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
16192 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16193 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
16194 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16195 ; GFX11-NEXT: s_add_i32 s32, s32, -16
16196 ; GFX11-NEXT: s_mov_b32 s33, s0
16197 ; GFX11-NEXT: s_waitcnt vmcnt(0)
16198 ; GFX11-NEXT: s_setpc_b64 s[30:31]
16200 ; GFX10-SCRATCH-LABEL: stack_8xv5i32:
16201 ; GFX10-SCRATCH: ; %bb.0: ; %entry
16202 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16203 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
16204 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
16205 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16206 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
16207 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16208 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16209 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
16210 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 8
16211 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 9
16212 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 10
16213 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 11
16214 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 12
16215 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 13
16216 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 14
16217 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 15
16218 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
16219 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
16220 ; GFX10-SCRATCH-NEXT: s_add_i32 s0, s32, 16
16221 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
16222 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s0
16223 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
16224 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
16225 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
16226 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0
16227 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
16228 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1
16229 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 1
16230 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 1
16231 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 1
16232 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 1
16233 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 2
16234 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 2
16235 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 2
16236 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 2
16237 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 2
16238 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 3
16239 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 3
16240 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 3
16241 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 3
16242 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 3
16243 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 4
16244 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 4
16245 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 4
16246 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 4
16247 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 4
16248 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 5
16249 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 5
16250 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 5
16251 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 5
16252 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 5
16253 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 6
16254 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 7
16255 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_8xv5i32@abs32@hi
16256 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_8xv5i32@abs32@lo
16257 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
16258 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
16259 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
16260 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
16261 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
16262 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16263 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
16264 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16265 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16266 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
16267 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
16268 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
16269 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
16271 call amdgpu_gfx void @external_void_func_8xv5i32(
16272 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
16273 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
16274 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
16275 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
16276 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
16277 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
16278 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
16279 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
16283 define amdgpu_gfx void @stack_8xv5f32() #0 {
16284 ; GFX9-LABEL: stack_8xv5f32:
16285 ; GFX9: ; %bb.0: ; %entry
16286 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16287 ; GFX9-NEXT: s_mov_b32 s34, s33
16288 ; GFX9-NEXT: s_mov_b32 s33, s32
16289 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16290 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16291 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16292 ; GFX9-NEXT: s_addk_i32 s32, 0x400
16293 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000
16294 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
16295 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000
16296 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
16297 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41200000
16298 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
16299 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000
16300 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16301 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000
16302 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
16303 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000
16304 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
16305 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000
16306 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
16307 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
16308 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000
16309 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
16310 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
16311 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_8xv5f32@abs32@hi
16312 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_8xv5f32@abs32@lo
16313 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
16314 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
16315 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
16316 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
16317 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
16318 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0
16319 ; GFX9-NEXT: v_mov_b32_e32 v6, 1.0
16320 ; GFX9-NEXT: v_mov_b32_e32 v7, 1.0
16321 ; GFX9-NEXT: v_mov_b32_e32 v8, 1.0
16322 ; GFX9-NEXT: v_mov_b32_e32 v9, 1.0
16323 ; GFX9-NEXT: v_mov_b32_e32 v10, 2.0
16324 ; GFX9-NEXT: v_mov_b32_e32 v11, 2.0
16325 ; GFX9-NEXT: v_mov_b32_e32 v12, 2.0
16326 ; GFX9-NEXT: v_mov_b32_e32 v13, 2.0
16327 ; GFX9-NEXT: v_mov_b32_e32 v14, 2.0
16328 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40400000
16329 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40400000
16330 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40400000
16331 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40400000
16332 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40400000
16333 ; GFX9-NEXT: v_mov_b32_e32 v20, 4.0
16334 ; GFX9-NEXT: v_mov_b32_e32 v21, 4.0
16335 ; GFX9-NEXT: v_mov_b32_e32 v22, 4.0
16336 ; GFX9-NEXT: v_mov_b32_e32 v23, 4.0
16337 ; GFX9-NEXT: v_mov_b32_e32 v24, 4.0
16338 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x40a00000
16339 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x40a00000
16340 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x40a00000
16341 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000
16342 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000
16343 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000
16344 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000
16345 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
16346 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
16347 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
16348 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
16349 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
16350 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16351 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16352 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16353 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
16354 ; GFX9-NEXT: s_mov_b32 s33, s34
16355 ; GFX9-NEXT: s_waitcnt vmcnt(0)
16356 ; GFX9-NEXT: s_setpc_b64 s[30:31]
16358 ; GFX10-LABEL: stack_8xv5f32:
16359 ; GFX10: ; %bb.0: ; %entry
16360 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16361 ; GFX10-NEXT: s_mov_b32 s34, s33
16362 ; GFX10-NEXT: s_mov_b32 s33, s32
16363 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16364 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16365 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16366 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16367 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000
16368 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000
16369 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000
16370 ; GFX10-NEXT: s_addk_i32 s32, 0x200
16371 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
16372 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
16373 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
16374 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
16375 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41300000
16376 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41400000
16377 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41500000
16378 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000
16379 ; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000
16380 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
16381 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16382 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16
16383 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20
16384 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24
16385 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28
16386 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
16387 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
16388 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
16389 ; GFX10-NEXT: v_mov_b32_e32 v3, 0
16390 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
16391 ; GFX10-NEXT: v_mov_b32_e32 v5, 1.0
16392 ; GFX10-NEXT: v_mov_b32_e32 v6, 1.0
16393 ; GFX10-NEXT: v_mov_b32_e32 v7, 1.0
16394 ; GFX10-NEXT: v_mov_b32_e32 v8, 1.0
16395 ; GFX10-NEXT: v_mov_b32_e32 v9, 1.0
16396 ; GFX10-NEXT: v_mov_b32_e32 v10, 2.0
16397 ; GFX10-NEXT: v_mov_b32_e32 v11, 2.0
16398 ; GFX10-NEXT: v_mov_b32_e32 v12, 2.0
16399 ; GFX10-NEXT: v_mov_b32_e32 v13, 2.0
16400 ; GFX10-NEXT: v_mov_b32_e32 v14, 2.0
16401 ; GFX10-NEXT: v_mov_b32_e32 v15, 0x40400000
16402 ; GFX10-NEXT: v_mov_b32_e32 v16, 0x40400000
16403 ; GFX10-NEXT: v_mov_b32_e32 v17, 0x40400000
16404 ; GFX10-NEXT: v_mov_b32_e32 v18, 0x40400000
16405 ; GFX10-NEXT: v_mov_b32_e32 v19, 0x40400000
16406 ; GFX10-NEXT: v_mov_b32_e32 v20, 4.0
16407 ; GFX10-NEXT: v_mov_b32_e32 v21, 4.0
16408 ; GFX10-NEXT: v_mov_b32_e32 v22, 4.0
16409 ; GFX10-NEXT: v_mov_b32_e32 v23, 4.0
16410 ; GFX10-NEXT: v_mov_b32_e32 v24, 4.0
16411 ; GFX10-NEXT: v_mov_b32_e32 v25, 0x40a00000
16412 ; GFX10-NEXT: v_mov_b32_e32 v26, 0x40a00000
16413 ; GFX10-NEXT: v_mov_b32_e32 v27, 0x40a00000
16414 ; GFX10-NEXT: v_mov_b32_e32 v28, 0x40a00000
16415 ; GFX10-NEXT: v_mov_b32_e32 v29, 0x40a00000
16416 ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000
16417 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000
16418 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_8xv5f32@abs32@hi
16419 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_8xv5f32@abs32@lo
16420 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
16421 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
16422 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
16423 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
16424 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
16425 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16426 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16427 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16428 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16429 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
16430 ; GFX10-NEXT: s_mov_b32 s33, s34
16431 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16432 ; GFX10-NEXT: s_setpc_b64 s[30:31]
16434 ; GFX11-LABEL: stack_8xv5f32:
16435 ; GFX11: ; %bb.0: ; %entry
16436 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16437 ; GFX11-NEXT: s_mov_b32 s0, s33
16438 ; GFX11-NEXT: s_mov_b32 s33, s32
16439 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16440 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
16441 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16442 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
16443 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41000000
16444 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41100000
16445 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41200000
16446 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41300000
16447 ; GFX11-NEXT: v_mov_b32_e32 v4, 0x41400000
16448 ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41500000
16449 ; GFX11-NEXT: v_mov_b32_e32 v6, 0x41600000
16450 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41700000
16451 ; GFX11-NEXT: s_add_i32 s32, s32, 16
16452 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
16453 ; GFX11-NEXT: s_add_i32 s0, s32, 16
16454 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
16455 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0
16456 ; GFX11-NEXT: v_mov_b32_e32 v6, 1.0
16457 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
16458 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
16459 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0
16460 ; GFX11-NEXT: v_dual_mov_b32 v7, 1.0 :: v_dual_mov_b32 v8, 1.0
16461 ; GFX11-NEXT: v_dual_mov_b32 v9, 1.0 :: v_dual_mov_b32 v10, 2.0
16462 ; GFX11-NEXT: v_dual_mov_b32 v11, 2.0 :: v_dual_mov_b32 v12, 2.0
16463 ; GFX11-NEXT: v_dual_mov_b32 v13, 2.0 :: v_dual_mov_b32 v14, 2.0
16464 ; GFX11-NEXT: v_dual_mov_b32 v15, 0x40400000 :: v_dual_mov_b32 v16, 0x40400000
16465 ; GFX11-NEXT: v_dual_mov_b32 v17, 0x40400000 :: v_dual_mov_b32 v18, 0x40400000
16466 ; GFX11-NEXT: v_dual_mov_b32 v19, 0x40400000 :: v_dual_mov_b32 v20, 4.0
16467 ; GFX11-NEXT: v_dual_mov_b32 v21, 4.0 :: v_dual_mov_b32 v22, 4.0
16468 ; GFX11-NEXT: v_dual_mov_b32 v23, 4.0 :: v_dual_mov_b32 v24, 4.0
16469 ; GFX11-NEXT: v_dual_mov_b32 v25, 0x40a00000 :: v_dual_mov_b32 v26, 0x40a00000
16470 ; GFX11-NEXT: v_dual_mov_b32 v27, 0x40a00000 :: v_dual_mov_b32 v28, 0x40a00000
16471 ; GFX11-NEXT: v_mov_b32_e32 v29, 0x40a00000
16472 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x40c00000
16473 ; GFX11-NEXT: v_mov_b32_e32 v31, 0x40e00000
16474 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_8xv5f32@abs32@hi
16475 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_8xv5f32@abs32@lo
16476 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
16477 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
16478 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
16479 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
16480 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
16481 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
16482 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16483 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
16484 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16485 ; GFX11-NEXT: s_add_i32 s32, s32, -16
16486 ; GFX11-NEXT: s_mov_b32 s33, s0
16487 ; GFX11-NEXT: s_waitcnt vmcnt(0)
16488 ; GFX11-NEXT: s_setpc_b64 s[30:31]
16490 ; GFX10-SCRATCH-LABEL: stack_8xv5f32:
16491 ; GFX10-SCRATCH: ; %bb.0: ; %entry
16492 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16493 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
16494 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
16495 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16496 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
16497 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16498 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16499 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
16500 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x41000000
16501 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x41100000
16502 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0x41200000
16503 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x41300000
16504 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0x41400000
16505 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x41500000
16506 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 0x41600000
16507 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 0x41700000
16508 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
16509 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
16510 ; GFX10-SCRATCH-NEXT: s_add_i32 s0, s32, 16
16511 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
16512 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s0
16513 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
16514 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
16515 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
16516 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0
16517 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
16518 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1.0
16519 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 1.0
16520 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 1.0
16521 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 1.0
16522 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 1.0
16523 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 2.0
16524 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 2.0
16525 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 2.0
16526 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 2.0
16527 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 2.0
16528 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 0x40400000
16529 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0x40400000
16530 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 0x40400000
16531 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 0x40400000
16532 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 0x40400000
16533 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 4.0
16534 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 4.0
16535 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 4.0
16536 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 4.0
16537 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 4.0
16538 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 0x40a00000
16539 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 0x40a00000
16540 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 0x40a00000
16541 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 0x40a00000
16542 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 0x40a00000
16543 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 0x40c00000
16544 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 0x40e00000
16545 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_8xv5f32@abs32@hi
16546 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_8xv5f32@abs32@lo
16547 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
16548 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
16549 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
16550 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
16551 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
16552 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16553 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
16554 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16555 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16556 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
16557 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
16558 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
16559 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
16561 call amdgpu_gfx void @external_void_func_8xv5f32(
16562 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
16563 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
16564 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
16565 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
16566 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
16567 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
16568 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
16569 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
16573 declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
16574 declare hidden amdgpu_gfx void @stack_passed_f64_arg(<32 x i32>, double) #0
16575 declare hidden amdgpu_gfx void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
16576 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
16577 declare hidden amdgpu_gfx void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
16578 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
16579 declare hidden amdgpu_gfx void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
16580 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
16581 declare hidden amdgpu_gfx void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
16582 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
16583 attributes #0 = { nounwind }
16584 attributes #1 = { nounwind noinline }